Skip to content

Commit

Permalink
Merge pull request #712 from pypa/license-expression
Browse files Browse the repository at this point in the history
Support simple SPDX license expressions (PEP 639)
  • Loading branch information
takluyver authored Feb 8, 2025
2 parents c13d414 + 0d0d06b commit 2818f41
Show file tree
Hide file tree
Showing 8 changed files with 831 additions and 24 deletions.
3 changes: 2 additions & 1 deletion doc/pyproject_toml.rst
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,8 @@ requires-python
A version specifier for the versions of Python this requires, e.g. ``~=3.3`` or
``>=3.3,<4``, which are equivalents.
license
A table with either a ``file`` key (a relative path to a license file) or a
A valid SPDX `license expression <https://peps.python.org/pep-0639/#term-license-expression>`_
or a table with either a ``file`` key (a relative path to a license file) or a
``text`` key (the license text).
license-files
A list of glob patterns for license files to include.
Expand Down
651 changes: 651 additions & 0 deletions flit_core/flit_core/_spdx_data.py

Large diffs are not rendered by default.

13 changes: 12 additions & 1 deletion flit_core/flit_core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,7 @@ class Metadata(object):
maintainer = None
maintainer_email = None
license = None
license_expression = None
description = None
keywords = None
download_url = None
Expand Down Expand Up @@ -399,7 +400,6 @@ def write_metadata_file(self, fp):
optional_fields = [
'Summary',
'Home-page',
'License',
'Keywords',
'Author',
'Author-email',
Expand All @@ -423,6 +423,17 @@ def write_metadata_file(self, fp):
value = '\n '.join(value.splitlines())
fp.write(u"{}: {}\n".format(field, value))


license_expr = getattr(self, self._normalise_field_name("License-Expression"))
license = getattr(self, self._normalise_field_name("License"))
if license_expr:
# TODO: License-Expression requires Metadata-Version '2.4'
# Backfill it to the 'License' field for now
# fp.write(u'License-Expression: {}\n'.format(license_expr))
fp.write(u'License: {}\n'.format(license_expr))
elif license:
fp.write(u'License: {}\n'.format(license))

for clsfr in self.classifiers:
fp.write(u'Classifier: {}\n'.format(clsfr))

Expand Down
101 changes: 80 additions & 21 deletions flit_core/flit_core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -495,6 +495,14 @@ def _check_type(d, field_name, cls):
"{} field should be {}, not {}".format(field_name, cls, type(d[field_name]))
)

def _check_types(d, field_name, cls_list) -> None:
if not isinstance(d[field_name], cls_list):
raise ConfigError(
"{} field should be {}, not {}".format(
field_name, ' or '.join(map(str, cls_list)), type(d[field_name])
)
)

def _check_list_of_str(d, field_name):
if not isinstance(d[field_name], list) or not all(
isinstance(e, str) for e in d[field_name]
Expand Down Expand Up @@ -577,30 +585,38 @@ def read_pep621_metadata(proj, path) -> LoadedConfig:

license_files = set()
if 'license' in proj:
_check_type(proj, 'license', dict)
license_tbl = proj['license']
unrec_keys = set(license_tbl.keys()) - {'text', 'file'}
if unrec_keys:
raise ConfigError(
"Unrecognised keys in [project.license]: {}".format(unrec_keys)
)
_check_types(proj, 'license', (str, dict))
if isinstance(proj['license'], str):
md_dict['license_expression'] = normalize_license_expr(proj['license'])
else:
license_tbl = proj['license']
unrec_keys = set(license_tbl.keys()) - {'text', 'file'}
if unrec_keys:
raise ConfigError(
"Unrecognised keys in [project.license]: {}".format(unrec_keys)
)

# TODO: Do something with license info.
# The 'License' field in packaging metadata is a brief description of
# a license, not the full text or a file path. PEP 639 will improve on
# how licenses are recorded.
if 'file' in license_tbl:
if 'text' in license_tbl:
# The 'License' field in packaging metadata is a brief description of
# a license, not the full text or a file path.
if 'file' in license_tbl:
if 'text' in license_tbl:
raise ConfigError(
"[project.license] should specify file or text, not both"
)
license_f = license_tbl['file']
if isabs_ish(license_f):
raise ConfigError(
f"License file path ({license_f}) cannot be an absolute path"
)
if not (path.parent / license_f).is_file():
raise ConfigError(f"License file {license_f} does not exist")
license_files.add(license_tbl['file'])
elif 'text' in license_tbl:
pass
else:
raise ConfigError(
"[project.license] should specify file or text, not both"
"file or text field required in [project.license] table"
)
license_files.add(license_tbl['file'])
elif 'text' in license_tbl:
pass
else:
raise ConfigError(
"file or text field required in [project.license] table"
)

if 'license-files' in proj:
_check_type(proj, 'license-files', list)
Expand Down Expand Up @@ -635,6 +651,16 @@ def read_pep621_metadata(proj, path) -> LoadedConfig:

if 'classifiers' in proj:
_check_list_of_str(proj, 'classifiers')
classifiers = proj['classifiers']
license_expr = md_dict.get('license_expression', None)
if license_expr:
for cl in classifiers:
if not cl.startswith('License :: '):
continue
raise ConfigError(
"License classifier are deprecated in favor of the license expression. "
"Remove the '{}' classifier".format(cl)
)
md_dict['classifiers'] = proj['classifiers']

if 'urls' in proj:
Expand Down Expand Up @@ -788,3 +814,36 @@ def isabs_ish(path):
absolute paths, we also want to reject these odd halfway paths.
"""
return os.path.isabs(path) or path.startswith(('/', '\\'))


def normalize_license_expr(s: str):
"""Validate & normalise an SPDX license expression
For now this only handles simple expressions (referring to 1 license)
"""
from ._spdx_data import licenses
ls = s.lower()
if ls.startswith('licenseref-'):
ref = s.partition('-')[2]
if re.match(r'([a-zA-Z0-9\-.])+$', ref):
# Normalise case of LicenseRef, leave the rest alone
return "LicenseRef-" + ref
raise ConfigError(
"LicenseRef- license expression can only contain ASCII letters "
"& digits, - and ."
)

or_later = s.endswith('+')
if or_later:
ls = ls[:-1]

try:
info = licenses[ls]
except KeyError:
if os.environ.get('FLIT_ALLOW_INVALID'):
log.warning("Invalid license ID {!r} allowed by FLIT_ALLOW_INVALID"
.format(s))
return s
raise ConfigError(f"{s!r} is not a recognised SPDX license ID")

return info['id'] + ('+' if or_later else '')
2 changes: 1 addition & 1 deletion flit_core/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@ description = "Distribution-building parts of Flit. See flit package for more in
dependencies = []
requires-python = '>=3.6'
readme = "README.rst"
license = "BSD-3-Clause"
license-files = ["LICENSE*", "flit_core/vendor/**/LICENSE*"]
classifiers = [
"License :: OSI Approved :: BSD License",
"Topic :: Software Development :: Libraries :: Python Modules",
]
dynamic = ["version"]
Expand Down
24 changes: 24 additions & 0 deletions flit_core/tests_core/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,3 +205,27 @@ def test_metadata_2_3_provides_extra(provides_extra, expected_result):
msg = email.parser.Parser(policy=email.policy.compat32).parse(sio)
assert msg['Provides-Extra'] == expected_result
assert not msg.defects

@pytest.mark.parametrize(
('value', 'expected_license', 'expected_license_expression'),
[
({'license': 'MIT'}, 'MIT', None),
({'license_expression': 'MIT'}, 'MIT', None), # TODO Metadata 2.4
({'license_expression': 'Apache-2.0'}, 'Apache-2.0', None) # TODO Metadata 2.4
],
)
def test_metadata_license(value, expected_license, expected_license_expression):
d = {
'name': 'foo',
'version': '1.0',
**value,
}
md = Metadata(d)
sio = StringIO()
md.write_metadata_file(sio)
sio.seek(0)

msg = email.parser.Parser(policy=email.policy.compat32).parse(sio)
assert msg.get('License') == expected_license
assert msg.get('License-Expression') == expected_license_expression
assert not msg.defects
37 changes: 37 additions & 0 deletions flit_core/tests_core/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,12 @@ def test_bad_include_paths(path, err_match):
({'license': {'fromage': 2}}, '[Uu]nrecognised'),
({'license': {'file': 'LICENSE', 'text': 'xyz'}}, 'both'),
({'license': {}}, 'required'),
({'license': 1}, "license field should be <class 'str'> or <class 'dict'>, not <class 'int'>"),
# ({'license': "MIT License"}, "Invalid license expression: 'MIT License'"), # TODO
(
{'license': 'MIT', 'classifiers': ['License :: OSI Approved :: MIT License']},
"License classifier are deprecated in favor of the license expression",
),
({'license-files': 1}, r"\blist\b"),
({'license-files': ["/LICENSE"]}, r"'/LICENSE'.+must not start with '/'"),
({'license-files': ["../LICENSE"]}, r"'../LICENSE'.+must not contain '..'"),
Expand Down Expand Up @@ -202,6 +208,37 @@ def test_bad_pep621_readme(readme, err_match):
config.read_pep621_metadata(proj, samples_dir / 'pep621' / 'pyproject.toml')


@pytest.mark.parametrize(('value', 'license_expression'), [
# Accept and normalize valid SPDX expressions for 'license = ...'
("mit", "MIT"),
("apache-2.0", "Apache-2.0"),
("APACHE-2.0+", "Apache-2.0+"),
# TODO: compound expressions
#("mit and (apache-2.0 or bsd-2-clause)", "MIT AND (Apache-2.0 OR BSD-2-Clause)"),
# LicenseRef expressions: only the LicenseRef is normalised
("LiceNseref-Public-DoMain", "LicenseRef-Public-DoMain"),
])
def test_license_expr(value, license_expression):
proj = {
'name': 'module1', 'version': '1.0', 'description': 'x', 'license': value
}
info = config.read_pep621_metadata(proj, samples_dir / 'pep621' / 'pyproject.toml')
assert 'license' not in info.metadata
assert info.metadata['license_expression'] == license_expression

def test_license_expr_error():
proj = {
'name': 'module1', 'version': '1.0', 'description': 'x',
'license': 'LicenseRef-foo_bar', # Underscore not allowed
}
with pytest.raises(config.ConfigError, match="can only contain"):
config.read_pep621_metadata(proj, samples_dir / 'pep621' / 'pyproject.toml')

proj['license'] = "BSD-33-Clause" # Not a real license
with pytest.raises(config.ConfigError, match="recognised"):
config.read_pep621_metadata(proj, samples_dir / 'pep621' / 'pyproject.toml')


def test_license_file_defaults_with_old_metadata():
metadata = {'module': 'mymod', 'author': ''}
info = config._prep_metadata(metadata, samples_dir / 'pep621_license_files' / 'pyproject.toml')
Expand Down
24 changes: 24 additions & 0 deletions prepare_license_list.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Call with path to SPDX license-list-data repo, cloned from:
# https://github.com/spdx/license-list-data

import json
import pprint
import sys
from pathlib import Path

list_data_repo = Path(sys.argv[1])
with (list_data_repo / 'json' / 'licenses.json').open('rb') as f:
licenses_json = json.load(f)

condensed = {
l['licenseId'].lower() : {'id': l['licenseId']}
for l in licenses_json['licenses']
if not l['isDeprecatedLicenseId']
}

with Path('flit_core', 'flit_core', '_spdx_data.py').open('w') as f:
f.write("# This file is generated from SPDX license data; don't edit it manually.\n\n")

f.write("licenses = \\\n")
pprint.pprint(condensed, f)

0 comments on commit 2818f41

Please sign in to comment.