diff --git a/pr-checker/rules.json b/pr-checker/adoc_rules.json similarity index 100% rename from pr-checker/rules.json rename to pr-checker/adoc_rules.json diff --git a/pr-checker/checker.py b/pr-checker/checker.py index ae249006..846b6e3f 100644 --- a/pr-checker/checker.py +++ b/pr-checker/checker.py @@ -1,6 +1,7 @@ import argparse import sys from datetime import date, datetime +from lxml import etree import re import json import os @@ -28,10 +29,10 @@ def adoc_checker(file, valid_tags, rules): release_date_re = re.compile( ":page-releasedate:[ ]*([0-9]{4}[-][0-9]{2}[-][0-9]{2})") - tags_re = re.compile(":page-tags: *\[(.*)\]") + tags_re = re.compile(r":page-tags: *\[(.*)\]") list_re = re.compile("^- ") file_tags_re = re.compile("^.*(hide_tags=).*(tags=).*$") - hotspot_re = re.compile("\[(hotspot(=[^ =\n]+)? ?)+( file(=[0-9]+)?)?\]`[^`\n]*`") + hotspot_re = re.compile(r"\[(hotspot(=[^ =\n]+)? ?)+( file(=[0-9]+)?)?\]`[^`\n]*`") skip_list = os.environ.get('SKIP_LIST') print(f"SKIP_LIST={skip_list}\n"); @@ -133,6 +134,128 @@ def check_vocabulary(file, deny_list, warning_list): return output +def pom_xml_checker(file, guide_id, rules): + """ + Checks if the artifactId in pom.xml starts with the guide's ID + """ + guide_id = "guide-" + guide_id[len("guide-"):] + output = '' + + path_parts = file.split(os.sep) # Split the path by directory separator + + # Check if it's a direct pom.xml in the root of finish/start + if path_parts[-2] in rules['main_directory']: + # Base module (finish/pom.xml or start/pom.xml) + expected_artifact_id = guide_id + elif path_parts[-3] in rules['main_directory']: + # module (finish/xxx/pom.xml or start/xxx/pom.xml) + module_name = path_parts[-2] + expected_artifact_id = f"{guide_id}-{module_name}" + else: + output += f"[ERROR] Unrecognized path structure for {file}.\n" + return output + + try: + tree = etree.parse(file) + root = tree.getroot() + namespace = root.nsmap.get(None) + namespaces = {'m': namespace} if namespace else {} + artifact_id_element = root.find('m:artifactId', namespaces=namespaces) + if artifact_id_element is None or artifact_id_element.text is None: + output += f"[ERROR] Project's artifactId not found.\n" + return output + + artifact_id = artifact_id_element.text.strip() + line_number = artifact_id_element.sourceline + if artifact_id != expected_artifact_id: + output += f"[ERROR] [LINE {line_number}] The project's artifactId '{artifact_id}' does not match the expected '{expected_artifact_id}'.\n" + + except: + e = sys.exc_info()[0] + print(f"something went wrong with {file} parsing", e) + + return output + + +def find_corresponding_pom_xml(server_file): + """ + Given a server.xml path, finds the corresponding pom.xml path. + """ + path_parts = server_file.split(os.sep) + + try: + src_index = path_parts.index('src') + except ValueError: + # 'src' not found in path; cannot determine pom.xml location + return None + + module_root_dir = os.sep.join(path_parts[:src_index]) + pom_xml_path = os.path.join(module_root_dir, 'pom.xml') + if os.path.exists(pom_xml_path): + return pom_xml_path + else: + return None + + +def extract_artifact_id(pom_file): + """ + Extracts the artifactId from the given pom.xml file. + """ + try: + tree = etree.parse(pom_file) + root = tree.getroot() + namespace = root.nsmap.get(None) + namespaces = {'m': namespace} if namespace else {} + + artifact_id_element = root.find('m:artifactId', namespaces=namespaces) + if artifact_id_element is not None and artifact_id_element.text: + return artifact_id_element.text.strip() + except: + e = sys.exc_info()[0] + print(f"something went wrong with {pom_file} parsing", e) + + return None + + +def server_xml_checker(file): + """ + Checks if the location attribute in server.xml's webApplication element + matches the artifactId from pom.xml. + """ + output = '' + + try: + tree = etree.parse(file) + root = tree.getroot() + + # Find the webApplication element + web_app_element = root.find(".//webApplication") + if web_app_element is None: + output += f"[ERROR] webApplication element not found.\n" + return output + + # Get the location attribute + location = web_app_element.get("location") + if location is None: + line_number = web_app_element.sourceline + output += f"[ERROR] [LINE {line_number}] location attribute not found in the webApplication element.\n" + return output + + pom_file = find_corresponding_pom_xml(file) + artifact_id = extract_artifact_id(pom_file) + # Expected location is the artifactId with .war extension + expected_location = f"{artifact_id}.war" + + if location != expected_location: + line_number = web_app_element.sourceline + output += f"[ERROR] [LINE {line_number}] webApplication location '{location}' does not match the expected '{expected_location}'.\n" + except: + e = sys.exc_info()[0] + print(f"something went wrong with {file} parsing", e) + + return output + + if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('--deny', nargs=1, @@ -141,7 +264,9 @@ def check_vocabulary(file, deny_list, warning_list): type=argparse.FileType('r')) parser.add_argument('--tags', nargs=1, type=argparse.FileType('r')) - parser.add_argument('--rules', nargs=1, + parser.add_argument('--adoc-rules', nargs=1, + type=argparse.FileType('r')) + parser.add_argument('--xml-rules', nargs=1, type=argparse.FileType('r')) parser.add_argument('--repo', nargs=1, type=str) parser.add_argument('infile', nargs='*', @@ -171,16 +296,15 @@ def check_vocabulary(file, deny_list, warning_list): e = sys.exc_info()[0] print("something went wrong with tags parsing", e) tags = [] - if args.rules is not None and args.repo is not None: + if args.adoc_rules is not None and args.repo is not None: try: repo = args.repo[0].split('/')[-1] - rules = dict(map(lambda rule: (rule[0], {'check': repo not in rule[1]['exception'], 'log-level': rule[1]['log-level']}), - json.loads(args.rules[0].read()).items())) + adoc_rules = dict(map(lambda adoc_rules: (adoc_rules[0], {'check': repo not in adoc_rules[1]['exception'], 'log-level': adoc_rules[1]['log-level']}), + json.loads(args.adoc_rules[0].read()).items())) except: e = sys.exc_info()[0] - print("something went wrong with repo and rule parsing", e) - repo = '' - rules = { + print("something went wrong with repo and adoc-rules parsing", e) + adoc_rules = { "license": {'check': True, 'log-level': 'ERROR'}, "release_date": {'check': True, 'log-level': 'ERROR'}, "page_tags": {'check': True, 'log-level': 'ERROR'}, @@ -188,6 +312,14 @@ def check_vocabulary(file, deny_list, warning_list): "line-length": {'check': True, 'log-level': 'WARNING'}, "file_tags": {'check': True, 'log-level': 'ERROR'}, } + if args.xml_rules is not None and args.repo is not None: + try: + repo = args.repo[0].split('/')[-1] + xml_rules = {"check": repo not in (xml_data := json.loads(args.xml_rules[0].read()))['skip'], 'main_directory': xml_data['main_directory']} + except: + e = sys.exc_info()[0] + print("something went wrong with repo and xml-rules parsing", e) + xml_rules = { "check" : False } file_extensions = map(lambda f: f.name.split( '/')[-1].split('.')[-1], args.infile) @@ -195,8 +327,29 @@ def check_vocabulary(file, deny_list, warning_list): for i, f in enumerate(file_extensions): if f == 'adoc': - output += adoc_checker(args.infile[i], tags, rules) - output += check_vocabulary(args.infile[i], deny_list, warning_list) + result = adoc_checker(args.infile[i], tags, adoc_rules) + check_vocabulary(args.infile[i], deny_list, warning_list) + if result != '': + output += f"[INFO] File:[{args.infile[i].name}]\n{result}" + + for i, file in enumerate(args.infile): + if os.path.basename(file.name) == 'pom.xml' and xml_rules["check"]: + result = pom_xml_checker(file.name, repo, xml_rules) + if result: + output += f"[INFO] File:[{file.name}]\n{result}" + + if os.path.basename(file.name) == 'server.xml' and xml_rules["check"]: + path_parts = file.name.split(os.sep) + if path_parts[-2] == 'staging' or path_parts[-3] == 'staging': + continue + pom_xml_path = find_corresponding_pom_xml(file.name) + pom_xml_result = pom_xml_checker(pom_xml_path, repo, xml_rules) + if pom_xml_result: + output += f"[INFO] File:[{pom_xml_path}]\n{pom_xml_result}" + continue + server_xml_result = server_xml_checker(file.name) + if server_xml_result: + output += f"[INFO] File:[{file.name}]\n{server_xml_result}" + if output != '': print(output.rstrip()) if 'ERROR' in output: diff --git a/pr-checker/checker.sh b/pr-checker/checker.sh index d67b621f..0b680d4d 100644 --- a/pr-checker/checker.sh +++ b/pr-checker/checker.sh @@ -1,6 +1,8 @@ #!/bin/sh set -o pipefail +python3 -m pip install lxml + repo=$1 pr_number=$2 @@ -33,4 +35,4 @@ else echo "::set-output name=canSkip::false" fi -python3 "$SCRIPTPATH"/checker.py --deny "$SCRIPTPATH"/deny_list.json --warn "$SCRIPTPATH"/warning_list.json --tags "$SCRIPTPATH"/../guide_tags.json --repo "$repo" --rules "$SCRIPTPATH"/rules.json $(echo $UPDATED_FILES | jq '.[]' | tr -d '"') +python3 "$SCRIPTPATH"/checker.py --deny "$SCRIPTPATH"/deny_list.json --warn "$SCRIPTPATH"/warning_list.json --tags "$SCRIPTPATH"/../guide_tags.json --repo "$repo" --adoc-rules "$SCRIPTPATH"/adoc_rules.json --xml-rules "$SCRIPTPATH"/xml_rules.json $(echo $UPDATED_FILES | jq '.[]' | tr -d '"') diff --git a/pr-checker/xml_rules.json b/pr-checker/xml_rules.json new file mode 100644 index 00000000..296f62da --- /dev/null +++ b/pr-checker/xml_rules.json @@ -0,0 +1,12 @@ +{ + "main_directory": [ + "start", + "finish", + "ibmmq" + ], + "skip": [ + "guide-liberty-deep-dive", + "guide-liberty-deep-dive-gradle" + ] + } + \ No newline at end of file