Skip to content

Commit

Permalink
Merge pull request Webperf-se#354 from Webperf-se/issue-348
Browse files Browse the repository at this point in the history
lighthouse based tests improvement + bugfix
  • Loading branch information
7h3Rabbit authored Apr 1, 2024
2 parents 14c5cb5 + 8263cd2 commit e77e3ff
Show file tree
Hide file tree
Showing 4 changed files with 65 additions and 33 deletions.
2 changes: 1 addition & 1 deletion test_dnssec.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ def testdns(key, datatype, use_dnssec):
print('\ttestdns', key, datatype, use_dnssec)
cache_key = 'dnslookup://{0}#{1}#{2}'.format(key, datatype, use_dnssec)
if has_cache_file(cache_key, True, CACHE_TIME_DELTA):
cache_path = get_cache_path(cache_key, True)
cache_path = get_cache_path_for_file(cache_key, True)
print('\t- Using dnslookup cache')
response = dns.message.from_file(cache_path)
print('\t- response:\n\t\t{0}'.format(response.to_text().replace('\n', '\n\t\t')))
Expand Down
22 changes: 9 additions & 13 deletions tests/lighthouse_base.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
# -*- coding: utf-8 -*-
from datetime import time
import os
from pathlib import Path
import sys
import json
from urllib.parse import urlparse
import time
from models import Rating
from tests.utils import get_config_or_default, get_http_content, is_file_older_than
from tests.utils import get_config_or_default, get_http_content, is_file_older_than, get_cache_path_for_rule

REQUEST_TIMEOUT = get_config_or_default('http_request_timeout')
USE_CACHE = get_config_or_default('cache_when_possible')
Expand Down Expand Up @@ -171,17 +169,12 @@ def get_json_result(langCode, url, googlePageSpeedApiKey, strategy, category, li
print(
'Error! Unfortunately the request for URL "{0}" failed, message:\n{1}'.format(
check_url, sys.exc_info()[0]))
return
return {}
elif USE_CACHE:
base_directory = Path(os.path.dirname(
os.path.realpath(__file__)) + os.path.sep).parent
try:
folder = 'cache'
cache_key_rule = 'lighthouse-{0}'
cache_path = get_cache_path_for_rule(url, cache_key_rule)

o = urlparse(url)
hostname = o.hostname

cache_path = os.path.join(base_directory, folder, hostname, 'lighthouse')
if not os.path.exists(cache_path):
os.makedirs(cache_path)

Expand Down Expand Up @@ -212,7 +205,10 @@ def get_json_result(langCode, url, googlePageSpeedApiKey, strategy, category, li
with open(result_file, 'r', encoding='utf-8', newline='') as file:
return str_to_json('\n'.join(file.readlines()), check_url)
except:
return
print(
'Error! Unfortunately the request for URL "{0}" failed, message:\n{1}'.format(
check_url, sys.exc_info()[0]))
return {}
else:
command = "node node_modules{4}lighthouse{4}cli{4}index.js {1} --output json --output-path stdout --locale {3} --only-categories {0} --form-factor {2} --chrome-flags=\"--headless\" --quiet".format(
category, check_url, strategy, langCode, os.path.sep)
Expand Down
70 changes: 53 additions & 17 deletions tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,18 +119,16 @@ def is_file_older_than(file, delta):
return True
return False


def get_cache_path(url, use_text_instead_of_content):
def get_cache_path_for_rule(url, cache_key_rule):
"""
Generates a cache path for a given URL. The cache path is based on the hostname of the URL and
a hash of the URL itself.
The function also ensures that the necessary directories for storing the cache file exist.
Parameters:
url (str): The URL for which to generate a cache path.
use_text_instead_of_content (bool): Determines the format of the cache file.
If True, the cache file is in '.txt.utf-8' format.
If False, the cache file is in '.bytes' format.
cache_key_rule (str): Determines the format of the cache file/folder name.
{0} in rule will be replaced by a sha512 hexdigest for supplied url.
Returns:
str: The generated cache path.
Expand All @@ -140,10 +138,8 @@ def get_cache_path(url, use_text_instead_of_content):
if hostname is None:
hostname = 'None'

file_ending = '.tmp'
folder = 'tmp'
if USE_CACHE:
file_ending = '.cache'
folder = 'cache'

folder_path = os.path.join(folder)
Expand All @@ -154,16 +150,56 @@ def get_cache_path(url, use_text_instead_of_content):
if not os.path.exists(hostname_path):
os.makedirs(hostname_path)

cache_key_rule = '{0}.txt.utf-8{1}'
if not use_text_instead_of_content:
cache_key_rule = '{0}.bytes{1}'

cache_key = cache_key_rule.format(
hashlib.sha512(url.encode()).hexdigest(), file_ending)
hashlib.sha512(url.encode()).hexdigest())
cache_path = os.path.join(folder, hostname, cache_key)

return cache_path

def get_cache_path_for_folder(url):
"""
Generates a cache path for a given URL. The cache path is based on the hostname of the URL and
a hash of the URL itself.
The function also ensures that the necessary directories for storing the cache file exist.
Parameters:
url (str): The URL for which to generate a cache path.
Returns:
str: The generated cache path.
"""

cache_key_rule = '{0}'

return get_cache_path_for_rule(url, cache_key_rule)


def get_cache_path_for_file(url, use_text_instead_of_content):
"""
Generates a cache path for a given URL. The cache path is based on the hostname of the URL and
a hash of the URL itself.
The function also ensures that the necessary directories for storing the cache file exist.
Parameters:
url (str): The URL for which to generate a cache path.
use_text_instead_of_content (bool): Determines the format of the cache file.
If True, the cache file is in '.txt.utf-8' format.
If False, the cache file is in '.bytes' format.
Returns:
str: The generated cache path.
"""

file_ending = '.tmp'
if USE_CACHE:
file_ending = '.cache'

cache_key_rule = '{0}.txt.utf-8' + file_ending
if not use_text_instead_of_content:
cache_key_rule = '{0}.bytes' + file_ending

return get_cache_path_for_rule(url, cache_key_rule)


def get_cache_file(url, use_text_instead_of_content, time_delta):
"""
Expand All @@ -184,10 +220,10 @@ def get_cache_file(url, use_text_instead_of_content, time_delta):
If the cache file does not exist or is too old, None is returned.
Notes:
- The function uses the get_cache_path function to determine the path of the cache file.
- The function uses the get_cache_path_for_file function to determine the path of the cache file.
- If USE_CACHE is False, the function always returns None.
"""
cache_path = get_cache_path(url, use_text_instead_of_content)
cache_path = get_cache_path_for_file(url, use_text_instead_of_content)

if not os.path.exists(cache_path):
return None
Expand Down Expand Up @@ -217,7 +253,7 @@ def has_cache_file(url, use_text_instead_of_content, time_delta):
bool: True if the cache file exists and is not older than the specified time delta,
False otherwise.
"""
cache_path = get_cache_path(url, use_text_instead_of_content)
cache_path = get_cache_path_for_file(url, use_text_instead_of_content)

if not os.path.exists(cache_path):
return False
Expand Down Expand Up @@ -307,7 +343,7 @@ def set_cache_file(url, content, use_text_instead_of_content):
use_text_instead_of_content (bool): Flag to determine how to write
the content.
"""
cache_path = get_cache_path(url, use_text_instead_of_content)
cache_path = get_cache_path_for_file(url, use_text_instead_of_content)
if use_text_instead_of_content:
with open(cache_path, 'w', encoding='utf-8', newline='') as file:
file.write(content)
Expand Down Expand Up @@ -549,7 +585,7 @@ def dns_lookup(key, datatype):
use_dnssec = False
cache_key = f'dnslookup://{key}#{datatype}#{use_dnssec}'
if has_cache_file(cache_key, True, CACHE_TIME_DELTA):
cache_path = get_cache_path(cache_key, True)
cache_path = get_cache_path_for_file(cache_key, True)
response = dns.message.from_file(cache_path)
return dns_response_to_list(response)

Expand Down
4 changes: 2 additions & 2 deletions tests/w3c_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import os
import subprocess
import json
from tests.utils import get_cache_path, get_config_or_default, has_cache_file, set_cache_file
from tests.utils import get_cache_path_for_file, get_config_or_default, has_cache_file, set_cache_file

# DEFAULTS
REQUEST_TIMEOUT = get_config_or_default('http_request_timeout')
Expand Down Expand Up @@ -53,7 +53,7 @@ def get_errors(test_type, params):
raise ValueError(
f'Tested url must start with \'https://\' or \'http://\': {url}')

file_path = get_cache_path(url, True)
file_path = get_cache_path_for_file(url, True)
if is_html:
html_file_ending_fix = file_path.replace('.cache', '.cache.html')
if has_cache_file(url, True, CACHE_TIME_DELTA) \
Expand Down

0 comments on commit e77e3ff

Please sign in to comment.