Skip to content

Commit

Permalink
lets use lists
Browse files Browse the repository at this point in the history
  • Loading branch information
cockroacher committed Sep 27, 2024
1 parent 4b0952b commit 882597a
Showing 1 changed file with 99 additions and 69 deletions.
168 changes: 99 additions & 69 deletions helpers/sri_helper.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
# pylint: disable=too-many-lines
import base64
import json
import re
import urllib
import urllib.parse
Expand Down Expand Up @@ -141,44 +142,49 @@ def append_sri_data_for_html(req_url, req_domain, res, org_domain, result):
# https://www.srihash.org/
content = res['content']['text']
# TODO: Should we match all elements and give penalty when used wrong?
regex = (
r'(?P<raw><(?P<name>link|script) [^>]*?>)'
)

matches = re.finditer(regex, content, re.MULTILINE | re.IGNORECASE)
for _, match in enumerate(matches, start=1):
raw = match.group('raw')
name = match.group('name').lower()

src = None
regex_src = r'(href|src)="(?P<src>[^"\']+)["\']'
group_src = re.search(regex_src, raw, re.IGNORECASE)
if group_src is not None:
src = group_src.group('src')
src = url_2_host_source(src, req_domain)

link_rel = None
regex_rel = r'(rel)="(?P<rel>[^"\']+)["\']'
group_rel = re.search(regex_rel, raw, re.IGNORECASE)
if group_rel is not None:
link_rel = group_rel.group('rel').lower()

should_have_integrity = False
if name in ('link'):
if link_rel in ('stylesheet', 'preload', 'modulepreload'):
should_have_integrity = True
elif name in ('script') and src is not None:
should_have_integrity = True

if should_have_integrity:
print('A', raw)
print('\tname:', name)
print('\tsrc/href:', src)
print('')

candidates = get_sri_candidates(req_domain, content)
nice_candidates = json.dumps(candidates, indent=3)
print('Candidates', nice_candidates)
# regex = (
# r'(?P<raw><(?P<name>link|script)[^<]*? integrity=["\'](?P<integrity>[^"\']+)["\'][^>]*?>)'
# )
found_sris = get_sris(req_domain, content)
nice_found_sris = json.dumps(found_sris, indent=3)
print('SRI', nice_found_sris)

csp_findings_match = csp_findings_match or append_csp_data_for_linked_resources(
req_domain,
org_domain,
result,
content)

regex = r'<(?P<type>style|script|form)>'
matches = re.finditer(regex, content, re.DOTALL | re.IGNORECASE | re.MULTILINE)
for _, match in enumerate(matches, start=1):
element_name = match.group('type').lower()
if element_name in ('style', 'script'):
key = f'\'unsafe-inline\'|{element_name}'
if key not in result[org_domain]['csp-findings']['quotes']:
result[org_domain]['csp-findings']['quotes'].append(key)
csp_findings_match = True
elif element_name == 'form':
element_url = url_2_host_source(req_url, req_domain)
o = urllib.parse.urlparse(element_url)
element_domain = o.hostname
if element_domain == org_domain:
key = f'\'self\'|{element_name}'
if key not in result[org_domain]['csp-findings']['quotes']:
result[org_domain]['csp-findings']['quotes'].append(key)
csp_findings_match = True
else:
key = f'{element_domain}|{element_name}'
if key not in result[org_domain]['csp-findings']['host-sources']:
result[org_domain]['csp-findings']['host-sources'].append(key)
csp_findings_match = True
return csp_findings_match

def get_sris(req_domain, content):
sri_list = []
regex = (
r'(?P<raw><(?P<name>[a-z]+)[^<]*? integrity=["\'](?P<integrity>[^"\']+)["\'][^>]*?>)'
)
Expand All @@ -192,17 +198,23 @@ def append_sri_data_for_html(req_url, req_domain, res, org_domain, result):
# - rel="stylesheet"
# - rel="preload"
# - rel="modulepreload"
print('B', raw)
print('\tname:', name)
print('\tintegrity:', integrity)
sri = {
'raw': raw,
'tag-name': name,
'integrity': integrity
}
# print('B', raw)
# print('\tname:', name)
# print('\tintegrity:', integrity)

src = None
regex_src = r'(href|src)="(?P<src>[^"\']+)["\']'
group_src = re.search(regex_src, raw, re.IGNORECASE)
if group_src is not None:
src = group_src.group('src')
src = url_2_host_source(src, req_domain)
print('\tsrc/href:', src)
sri['src'] = src
# print('\tsrc/href:', src)

src_type = None
if name == 'script':
Expand All @@ -223,8 +235,10 @@ def append_sri_data_for_html(req_url, req_domain, res, org_domain, result):
if src_type is None and link_rel in ('stylesheet'):
src_type = 'style'

print('\ttype:', src_type)
print('\trel:', link_rel)
sri['type'] = src_type
sri['rel'] = link_rel
# print('\ttype:', src_type)
# print('\trel:', link_rel)

if name in ('link'):
if link_rel not in ('stylesheet', 'preload', 'modulepreload'):
Expand All @@ -236,37 +250,53 @@ def append_sri_data_for_html(req_url, req_domain, res, org_domain, result):
print('WEBSITE WARNING: USING integrity incorrectly!')

print('')
sri_list.append(sri)

csp_findings_match = csp_findings_match or append_csp_data_for_linked_resources(
req_domain,
org_domain,
result,
content)
return sri_list

regex = r'<(?P<type>style|script|form)>'
matches = re.finditer(regex, content, re.DOTALL | re.IGNORECASE | re.MULTILINE)
def get_sri_candidates(req_domain, content):
candidates = []
regex = (
r'(?P<raw><(?P<name>link|script) [^>]*?>)'
)

matches = re.finditer(regex, content, re.MULTILINE | re.IGNORECASE)
for _, match in enumerate(matches, start=1):
element_name = match.group('type').lower()
if element_name in ('style', 'script'):
key = f'\'unsafe-inline\'|{element_name}'
if key not in result[org_domain]['csp-findings']['quotes']:
result[org_domain]['csp-findings']['quotes'].append(key)
csp_findings_match = True
elif element_name == 'form':
element_url = url_2_host_source(req_url, req_domain)
o = urllib.parse.urlparse(element_url)
element_domain = o.hostname
if element_domain == org_domain:
key = f'\'self\'|{element_name}'
if key not in result[org_domain]['csp-findings']['quotes']:
result[org_domain]['csp-findings']['quotes'].append(key)
csp_findings_match = True
else:
key = f'{element_domain}|{element_name}'
if key not in result[org_domain]['csp-findings']['host-sources']:
result[org_domain]['csp-findings']['host-sources'].append(key)
csp_findings_match = True
return csp_findings_match
raw = match.group('raw')
name = match.group('name').lower()

src = None
regex_src = r'(href|src)="(?P<src>[^"\']+)["\']'
group_src = re.search(regex_src, raw, re.IGNORECASE)
if group_src is not None:
src = group_src.group('src')
src = url_2_host_source(src, req_domain)

link_rel = None
regex_rel = r'(rel)="(?P<rel>[^"\']+)["\']'
group_rel = re.search(regex_rel, raw, re.IGNORECASE)
if group_rel is not None:
link_rel = group_rel.group('rel').lower()

should_have_integrity = False
if name in ('link'):
if link_rel in ('stylesheet', 'preload', 'modulepreload'):
should_have_integrity = True
elif name in ('script') and src is not None:
should_have_integrity = True

if should_have_integrity:
# print('A', raw)
# print('\tname:', name)
# print('\tsrc/href:', src)
# print('')
candidates.append({
'raw': raw,
'tag-name': name,
'src': src
})

return candidates

def append_csp_data_for_linked_resources(req_domain, org_domain, result, content):
"""
Expand Down

0 comments on commit 882597a

Please sign in to comment.