From 990030bce0b2e91088c0dd74aed59b82533b3b0f Mon Sep 17 00:00:00 2001 From: Gilles Peskine Date: Fri, 3 Nov 2023 13:55:00 +0100 Subject: [PATCH 1/4] Sort imports Signed-off-by: Gilles Peskine --- tests/scripts/check_files.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/scripts/check_files.py b/tests/scripts/check_files.py index 68871efe40db..31edf8b61c51 100755 --- a/tests/scripts/check_files.py +++ b/tests/scripts/check_files.py @@ -10,10 +10,10 @@ Note: requires python 3, must be run from Mbed TLS root. """ -import os import argparse -import logging import codecs +import logging +import os import re import subprocess import sys From f2fb9f667c42244415911609d623df1270b38813 Mon Sep 17 00:00:00 2001 From: Gilles Peskine Date: Fri, 3 Nov 2023 14:13:55 +0100 Subject: [PATCH 2/4] Check copyright statements and SPDX license identifier Enforce a specific copyright statement and a specific SPDX license identifier where they are present. Binary files, third-party modules and a few other exceptions are not checked. There is currently no check that copyright statements and license identifiers are present. Signed-off-by: Gilles Peskine --- tests/scripts/check_files.py | 80 ++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) diff --git a/tests/scripts/check_files.py b/tests/scripts/check_files.py index 31edf8b61c51..554ed479761c 100755 --- a/tests/scripts/check_files.py +++ b/tests/scripts/check_files.py @@ -12,6 +12,7 @@ import argparse import codecs +import inspect import logging import os import re @@ -345,6 +346,84 @@ def issue_with_line(self, line, _filepath, _line_number): return False +THIS_FILE_BASE_NAME = \ + os.path.basename(inspect.getframeinfo(inspect.currentframe()).filename) +LINE_NUMBER_BEFORE_LICENSE_ISSUE_TRACKER = \ + inspect.getframeinfo(inspect.currentframe()).lineno +class LicenseIssueTracker(LineIssueTracker): + """Check copyright statements and license indications. + + This class only checks that statements are correct if present. It does + not enforce the presence of statements in each file. + """ + + heading = "License issue:" + + LICENSE_EXEMPTION_RE_LIST = [ + # Third-party code, other than whitelisted third-party modules, + # may be under a different license. + r'3rdparty/(?!(p256-m)/.*)', + # Documentation explaining the license may have accidental + # false positives. + r'(ChangeLog|LICENSE|[-0-9A-Z_a-z]+\.md)\Z', + # Files imported from TF-M, and not used except in test builds, + # may be under a different license. + r'configs/crypto_config_profile_medium\.h\Z', + r'configs/tfm_mbedcrypto_config_profile_medium\.h\Z', + # Third-party file. + r'dco\.txt\Z', + ] + path_exemptions = re.compile('|'.join(BINARY_FILE_PATH_RE_LIST + + LICENSE_EXEMPTION_RE_LIST)) + + COPYRIGHT_HOLDER = rb'The Mbed TLS Contributors' + # Catch "Copyright foo", "Copyright (C) foo", "Copyright © foo", etc. + COPYRIGHT_RE = re.compile(rb'.*\bcopyright\s+((?:\w|\s|[()]|[^ -~])*\w)', re.I) + + SPDX_HEADER_KEY = b'SPDX-License-Identifier' + LICENSE_IDENTIFIER = b'Apache-2.0 OR GPL-2.0-or-later' + SPDX_RE = re.compile(br'.*?(' + + re.escape(SPDX_HEADER_KEY) + + br')(:\s*(.*?)\W*\Z|.*)', re.I) + + def __init__(self): + super().__init__() + # Record what problem was caused. We can't easily report it due to + # the structure of the script. To be fixed after + # https://github.com/Mbed-TLS/mbedtls/pull/2506 + self.problem = None + + def issue_with_line(self, line, filepath, line_number): + # Use endswith() rather than the more correct os.path.basename() + # because experimentally, it makes a significant difference to + # the running time. + if filepath.endswith(THIS_FILE_BASE_NAME) and \ + line_number > LINE_NUMBER_BEFORE_LICENSE_ISSUE_TRACKER: + # Avoid false positives from the code in this class. + # Also skip the rest of this file, which is highly unlikely to + # contain any problematic statements since we put those near the + # top of files. + return False + + m = self.COPYRIGHT_RE.match(line) + if m and m.group(1) != self.COPYRIGHT_HOLDER: + self.problem = 'Invalid copyright line' + return True + + m = self.SPDX_RE.match(line) + if m: + if m.group(1) != self.SPDX_HEADER_KEY: + self.problem = 'Misspelled ' + self.SPDX_HEADER_KEY.decode() + return True + if not m.group(3): + self.problem = 'Improperly formatted SPDX license identifier' + return True + if m.group(3) != self.LICENSE_IDENTIFIER: + self.problem = 'Wrong SPDX license identifier' + return True + return False + + class IntegrityChecker: """Sanity-check files under the current directory.""" @@ -365,6 +444,7 @@ def __init__(self, log_file): TrailingWhitespaceIssueTracker(), TabIssueTracker(), MergeArtifactIssueTracker(), + LicenseIssueTracker(), ] def setup_logger(self, log_file, level=logging.INFO): From 3b9facd8ac44f1c03b698a2e5ebdcc5d71ff6aa4 Mon Sep 17 00:00:00 2001 From: Gilles Peskine Date: Fri, 3 Nov 2023 14:35:28 +0100 Subject: [PATCH 3/4] Also complain if licenses are mentioned Signed-off-by: Gilles Peskine --- tests/scripts/check_files.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/scripts/check_files.py b/tests/scripts/check_files.py index 554ed479761c..3eb60d72421c 100755 --- a/tests/scripts/check_files.py +++ b/tests/scripts/check_files.py @@ -386,6 +386,11 @@ class LicenseIssueTracker(LineIssueTracker): re.escape(SPDX_HEADER_KEY) + br')(:\s*(.*?)\W*\Z|.*)', re.I) + LICENSE_MENTION_RE = re.compile(rb'.*(?:' + rb'|'.join([ + rb'Apache License', + rb'General Public License', + ]) + rb')', re.I) + def __init__(self): super().__init__() # Record what problem was caused. We can't easily report it due to @@ -394,6 +399,8 @@ def __init__(self): self.problem = None def issue_with_line(self, line, filepath, line_number): + #pylint: disable=too-many-return-statements + # Use endswith() rather than the more correct os.path.basename() # because experimentally, it makes a significant difference to # the running time. @@ -421,6 +428,12 @@ def issue_with_line(self, line, filepath, line_number): if m.group(3) != self.LICENSE_IDENTIFIER: self.problem = 'Wrong SPDX license identifier' return True + + m = self.LICENSE_MENTION_RE.match(line) + if m: + self.problem = 'Suspicious license mention' + return True + return False From ce78200fb5d1698ae09e540783007a1397b6fcbd Mon Sep 17 00:00:00 2001 From: Gilles Peskine Date: Fri, 3 Nov 2023 14:49:12 +0100 Subject: [PATCH 4/4] Pacify mypy Signed-off-by: Gilles Peskine --- tests/scripts/check_files.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tests/scripts/check_files.py b/tests/scripts/check_files.py index 3eb60d72421c..a2a9dfa8d018 100755 --- a/tests/scripts/check_files.py +++ b/tests/scripts/check_files.py @@ -346,10 +346,13 @@ def issue_with_line(self, line, _filepath, _line_number): return False -THIS_FILE_BASE_NAME = \ - os.path.basename(inspect.getframeinfo(inspect.currentframe()).filename) -LINE_NUMBER_BEFORE_LICENSE_ISSUE_TRACKER = \ - inspect.getframeinfo(inspect.currentframe()).lineno +def this_location(): + frame = inspect.currentframe() + assert frame is not None + info = inspect.getframeinfo(frame) + return os.path.basename(info.filename), info.lineno +THIS_FILE_BASE_NAME, LINE_NUMBER_BEFORE_LICENSE_ISSUE_TRACKER = this_location() + class LicenseIssueTracker(LineIssueTracker): """Check copyright statements and license indications.