diff --git a/tests/scripts/check_files.py b/tests/scripts/check_files.py index 68871efe40db..a2a9dfa8d018 100755 --- a/tests/scripts/check_files.py +++ b/tests/scripts/check_files.py @@ -10,10 +10,11 @@ Note: requires python 3, must be run from Mbed TLS root. """ -import os import argparse -import logging import codecs +import inspect +import logging +import os import re import subprocess import sys @@ -345,6 +346,100 @@ def issue_with_line(self, line, _filepath, _line_number): return False +def this_location(): + frame = inspect.currentframe() + assert frame is not None + info = inspect.getframeinfo(frame) + return os.path.basename(info.filename), info.lineno +THIS_FILE_BASE_NAME, LINE_NUMBER_BEFORE_LICENSE_ISSUE_TRACKER = this_location() + +class LicenseIssueTracker(LineIssueTracker): + """Check copyright statements and license indications. + + This class only checks that statements are correct if present. It does + not enforce the presence of statements in each file. + """ + + heading = "License issue:" + + LICENSE_EXEMPTION_RE_LIST = [ + # Third-party code, other than whitelisted third-party modules, + # may be under a different license. + r'3rdparty/(?!(p256-m)/.*)', + # Documentation explaining the license may have accidental + # false positives. + r'(ChangeLog|LICENSE|[-0-9A-Z_a-z]+\.md)\Z', + # Files imported from TF-M, and not used except in test builds, + # may be under a different license. + r'configs/crypto_config_profile_medium\.h\Z', + r'configs/tfm_mbedcrypto_config_profile_medium\.h\Z', + # Third-party file. + r'dco\.txt\Z', + ] + path_exemptions = re.compile('|'.join(BINARY_FILE_PATH_RE_LIST + + LICENSE_EXEMPTION_RE_LIST)) + + COPYRIGHT_HOLDER = rb'The Mbed TLS Contributors' + # Catch "Copyright foo", "Copyright (C) foo", "Copyright © foo", etc. + COPYRIGHT_RE = re.compile(rb'.*\bcopyright\s+((?:\w|\s|[()]|[^ -~])*\w)', re.I) + + SPDX_HEADER_KEY = b'SPDX-License-Identifier' + LICENSE_IDENTIFIER = b'Apache-2.0 OR GPL-2.0-or-later' + SPDX_RE = re.compile(br'.*?(' + + re.escape(SPDX_HEADER_KEY) + + br')(:\s*(.*?)\W*\Z|.*)', re.I) + + LICENSE_MENTION_RE = re.compile(rb'.*(?:' + rb'|'.join([ + rb'Apache License', + rb'General Public License', + ]) + rb')', re.I) + + def __init__(self): + super().__init__() + # Record what problem was caused. We can't easily report it due to + # the structure of the script. To be fixed after + # https://github.com/Mbed-TLS/mbedtls/pull/2506 + self.problem = None + + def issue_with_line(self, line, filepath, line_number): + #pylint: disable=too-many-return-statements + + # Use endswith() rather than the more correct os.path.basename() + # because experimentally, it makes a significant difference to + # the running time. + if filepath.endswith(THIS_FILE_BASE_NAME) and \ + line_number > LINE_NUMBER_BEFORE_LICENSE_ISSUE_TRACKER: + # Avoid false positives from the code in this class. + # Also skip the rest of this file, which is highly unlikely to + # contain any problematic statements since we put those near the + # top of files. + return False + + m = self.COPYRIGHT_RE.match(line) + if m and m.group(1) != self.COPYRIGHT_HOLDER: + self.problem = 'Invalid copyright line' + return True + + m = self.SPDX_RE.match(line) + if m: + if m.group(1) != self.SPDX_HEADER_KEY: + self.problem = 'Misspelled ' + self.SPDX_HEADER_KEY.decode() + return True + if not m.group(3): + self.problem = 'Improperly formatted SPDX license identifier' + return True + if m.group(3) != self.LICENSE_IDENTIFIER: + self.problem = 'Wrong SPDX license identifier' + return True + + m = self.LICENSE_MENTION_RE.match(line) + if m: + self.problem = 'Suspicious license mention' + return True + + return False + + class IntegrityChecker: """Sanity-check files under the current directory.""" @@ -365,6 +460,7 @@ def __init__(self, log_file): TrailingWhitespaceIssueTracker(), TabIssueTracker(), MergeArtifactIssueTracker(), + LicenseIssueTracker(), ] def setup_logger(self, log_file, level=logging.INFO):