Skip to content

Commit

Permalink
Add route, view and helpers for exporting d/copyright to spdx
Browse files Browse the repository at this point in the history
  • Loading branch information
oorestisime committed Dec 22, 2015
1 parent e634464 commit 56f332e
Show file tree
Hide file tree
Showing 5 changed files with 234 additions and 3 deletions.
13 changes: 11 additions & 2 deletions debsources/app/copyright/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,14 @@
from __future__ import absolute_import


from flask import jsonify
from flask import jsonify, make_response

from ..helper import bind_render
from . import bp_copyright
from ..views import (IndexView, PrefixView, ListPackagesView, ErrorHandler,
Ping, PackageVersionsView, DocView, AboutView, SearchView)
from .views import LicenseView, ChecksumLicenseView, SearchFileView, StatsView
from .views import (LicenseView, ChecksumLicenseView, SearchFileView,
StatsView, SPDXView)


# context vars
Expand Down Expand Up @@ -254,3 +255,11 @@ def skeleton_variables():
render_func=jsonify,
err_func=ErrorHandler(mode='json'),
get_objects='stats_suite'))

# SDPX view
bp_copyright.add_url_rule(
'/spdx/<path:path_to>/',
view_func=SPDXView.as_view(
'spdx',
render_func=make_response,
err_func=ErrorHandler(mode='json')))
1 change: 1 addition & 0 deletions debsources/app/copyright/templates/copyright/license.html
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ <h2>{{ self.title() }} / {{ version }}</h2>
{% if dump == 'True' %}
{% include "source_file_code.inc.html" %}
{% else %}
<div class="warning"><a href="{{url_for('.spdx', path_to=package + '/' + version) }}">Export to SPDX</a></div>
{% include "copyright/license_render.inc.html" %}
{% endif %}
{% endblock %}
50 changes: 50 additions & 0 deletions debsources/app/copyright/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,3 +339,53 @@ def get_stats(self):
dual_results=dual_res,
dual_licenses=sorted(dual_licenses),
suites=all_suites)


class SPDXView(GeneralView):

def _generate_file(self, spdx_values):
output = ''
for value in spdx_values:
output += value.decode('utf-8') + '\n'
return output

def get_objects(self, path_to):
path_dict = path_to.split('/')

package = path_dict[0]
version = path_dict[1]
path = '/'.join(path_dict[2:])

if version == "latest": # we search the latest available version
return self._handle_latest_version(request.endpoint,
package, path)

versions = self.handle_versions(version, package, path)
if versions:
redirect_url_parts = [package, versions[-1]]
if path:
redirect_url_parts.append(path)
redirect_url = '/'.join(redirect_url_parts)
return self._redirect_to_url(request.endpoint,
redirect_url, redirect_code=302)

try:
sources_path = helper.get_sources_path(session, package, version,
current_app.config)
except FileOrFolderNotFound:
raise Http404ErrorSuggestions(package, version,
'debian/copyright')
except InvalidPackageOrVersionError:
raise Http404ErrorSuggestions(package, version, '')

try:
c = helper.parse_license(sources_path)
except Exception:
# non machine readable license
return dict(return_code=404)
spdx = helper.export_copyright_to_spdx(
c, session=session, package=package, version=version)
attachment = "attachment;" + "filename=" + \
path_to.replace('/', '_') + ".spdx"
return dict(spdx=self._generate_file(spdx),
header=attachment)
7 changes: 6 additions & 1 deletion debsources/app/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@
from debian.debian_support import version_compare

from flask import (
current_app, jsonify, render_template, request, url_for, redirect)
current_app, jsonify, render_template, request, url_for, redirect,
make_response)
from flask.views import View

from debsources.excepts import (
Expand Down Expand Up @@ -192,6 +193,10 @@ def dispatch_request(self, **kwargs):
"""
try:
context = self.get_objects(**kwargs)
if self.render_func is make_response:
response = make_response(context['spdx'])
response.headers["Content-Disposition"] = context['header']
return response
return self.render_func(**context)
except Http403Error as e:
return self.err_func(e, http=403)
Expand Down
166 changes: 166 additions & 0 deletions debsources/license_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,13 @@
import io
import logging
import re
import hashlib
from datetime import datetime

from flask import url_for
from debian import copyright

from debsources.models import Checksum, File, Package, PackageName
from debsources.navigation import Location, SourceFile

# import debsources.query as qry
Expand Down Expand Up @@ -134,6 +137,10 @@ def get_license(session, package, version, path, license_path=None):
return None


def get_paragraph(c, path):
return c.find_files_paragraph(path)


def get_copyright_header(copyright):
""" Return all the header attributs
Expand Down Expand Up @@ -197,6 +204,8 @@ def create_url(glob="", base=None,):
def match_license(synopsis):
""" Matches a `synopsis` with a license and creates a url
"""
if any(keyword in synopsis for keyword in ['with', 'exception']):
return None
key = filter(lambda x: re.search(x, synopsis) is not None, Licenses)
if len(key) is not 0:
return Licenses[key[0]]
Expand Down Expand Up @@ -241,3 +250,160 @@ def anchor_to_license(copyright, synopsis):
return '#license-' + str(licenses.index(synopsis))
else:
return None


def export_copyright_to_spdx(c, package, version, session):
""" Creates the SPDX document and saves the result in fname
"""

def create_package_code(session, package, version):
sha = (session.query(Checksum.sha256.label("sha256"))
.filter(Checksum.package_id == Package.id)
.filter(Checksum.file_id == File.id)
.filter(Package.name_id == PackageName.id)
.filter(PackageName.name == package)
.filter(Package.version == version)
.order_by("sha256")
).all()
sha_values = [sha256[0] for sha256 in sha]
return hashlib.sha256("".join(sha_values)).hexdigest()

def create_license_ref(license, count, refs, unknown):
""" Creates license references and adds it in the specific
dictionnary. Also adds the non standard licenses in unknown
licenses.
"""
if license not in refs.keys() and license is not u'':
if not match_license(license):
l_id = 'LicenseRef-' + str(count)
refs[license] = l_id
count += 1
unknown[license] = "LicenseId: " + l_id + \
"\nLicenseName: " + l
else:
# useful in PackageLicenseInfoFromFiles
refs[license] = license

return refs, unknown, count

# find out which are not standard and save SPDX required information
# Non standard licenses are referenced as LicenseRed-<number>
refs = dict()
count = 0
unknown = dict()
for par in c.all_files_paragraphs():
try:
l = par.license.synopsis
if any(keyword in l for keyword in ['and', 'or']):
licenses = re.split(', |and |or ', l)
for license in licenses:
refs, unknown, count = create_license_ref(license.rstrip(),
count, refs,
unknown)
else:
refs, unknown, count = create_license_ref(l, count,
refs, unknown)

except (AttributeError, ValueError):
pass

# add the available extracted license text for unknown licenses
for par in c.all_license_paragraphs():
try:
l = par.license.synopsis
if l in refs.keys() and not match_license(l):
unknown[l] = "LicenseID: " + refs[l] + \
"\nExtractedText: <text>" + \
par.license.text + "</text>" + \
"\nLicenseName: " + l
except (AttributeError, ValueError):
pass

time = datetime.now()
now = str(time.date()) + 'T' + str(time.time()).split('.')[0] + 'Z'

spdx = ["SPDXVersion: SPDX-2.0", "DataLicense:CC0-1.0",
"SPDXID: SPDXRef-DOCUMENT",
"Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-Package",
"DocumentName: " + c.header.upstream_name,
"DocumentNamespace: http://spdx.org/spdxdocs/" +
"spdx-example-444504E0-4F89-41D3-9A0C-0305E82C3301",
"LicenseListVersion: 2.0",
"Creator: Person: Debsources",
"Creator: Organization: Debsources",
"Creator: Tool: Debsources",
"Created: " + now,
"CreatorComment: <text> This document was created by" +
"Debsources by parsing the respective debian/copyright" +
"file of the package provided by the Debian project. You" +
"may follow these links: http://debian.org/ " +
"http://sources.debian.net/ to get more information about " +
"Debian and Debsources. </text>",
"DocumentComment: <text>This document was created using" +
"SPDX 2.0, version 2.3 of the SPDX License List.</text>",
"PackageName: " + c.header.upstream_name,
"SPDXID: SPDXRef-Package",
"PackageDownloadLocation: NOASSERTION",
"PackageVerificationCode: " + create_package_code(session,
package,
version),
"PackageLicenseConcluded: NOASSERTION"]
for value in set(refs.values()):
spdx.append("PackageLicenseInfoFromFiles: " + value)

spdx.extend(["PackageLicenseDeclared: NOASSERTION",
"PackageCopyrightText: NOASSERTION"])
for files in get_files_spdx(refs, package, version, session, c):
for item in files:
spdx.append(str(item))
for u in unknown:
spdx.append(unknown[u])
return spdx


def get_files_spdx(refs, package, version, session, c):
""" Get all files from the DB for a specific package and version and
then create a dictionnary for the SPDX entries
"""

def replace_all(text, dic):
""" Replace all occurences of the keys in dic by the corresponding
value
"""
for i, j in dic.iteritems():
text = text.replace(i, j)
return text

files = (session.query(Checksum.sha256.label("sha256"),
File.path.label("path"))
.filter(Checksum.package_id == Package.id)
.filter(Checksum.file_id == File.id)
.filter(Package.name_id == PackageName.id)
.filter(PackageName.name == package)
.filter(Package.version == version)
)

files_info = []

for i, f in enumerate(files.all()):
par = get_paragraph(c, f.path)
try:
if not match_license(par.license.synopsis):
license_concluded = replace_all(par.license.synopsis, refs)
else:
license_concluded = par.license.synopsis
except (AttributeError, ValueError):
license_concluded = "None"
# NOASSERTION means that the SPDX generator did not calculate that
# value.
sha = 'NOASSERTION' if not f.sha256 else f.sha256
files_info.append(["FileName: " + f.path,
"SPDXID: SPDX-FILE-REF-" + str(i),
"FileChecksum: SHA256: " + sha,
"LicenseConcluded: " + license_concluded,
"LicenseInfoInFile: NOASSERTION",
"FileCopyrightText: <text>" +
par.copyright.encode('utf-8') + "</text>"])
return files_info

0 comments on commit 56f332e

Please sign in to comment.