Skip to content

Commit

Permalink
pylint
Browse files Browse the repository at this point in the history
  • Loading branch information
cockroacher committed Apr 7, 2024
1 parent b5ea5a0 commit ff0406e
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 98 deletions.
18 changes: 1 addition & 17 deletions carbon-rating.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,23 +75,7 @@ def main(argv):
sys.exit(2)
elif opt in ("-i", "--input"): # input file path
input_filename = arg

file_ending = ""
file_long_ending = ""
if (len(input_filename) > 4):
file_ending = input_filename[-4:].lower()
if (len(input_filename) > 7):
file_long_ending = input_filename[-7:].lower()

if file_long_ending == ".sqlite":
from engines.sqlite import read_sites, add_site, delete_site
elif (file_ending == ".csv"):
from engines.csv_engine import read_sites, add_site, delete_site
elif (file_ending == ".xml"): # https://example.com/sitemap.xml
from engines.sitemap import read_sites, add_site, delete_site
else:
from engines.json_engine import read_tests, read_sites, add_site, delete_site
pass
from engines.json_engine import read_tests
elif opt in ("-o", "--output"): # output file path
output_filename = arg
pass
Expand Down
20 changes: 6 additions & 14 deletions default.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,8 @@
add_site as csv_add_site,\
delete_site as csv_delete_site,\
write_tests as csv_write_tests
from engines.sitemap import read_sites as sitemap_read_sites,\
add_site as sitemap_add_site,\
delete_site as sitemap_delete_site
from engines.sitespeed_result import read_sites as sitespeed_read_sites,\
add_site as sitespeed_add_site,\
delete_site as sitespeed_delete_site
from engines.sitemap import read_sites as sitemap_read_sites
from engines.sitespeed_result import read_sites as sitespeed_read_sites
from engines.webperf import read_sites as webperf_read_sites,\
add_site as webperf_add_site,\
delete_site as webperf_delete_site
Expand Down Expand Up @@ -336,15 +332,11 @@ def set_input_handlers(self, input_filename):
add_site = csv_add_site
delete_site = csv_delete_site
elif file_ending == ".xml" or file_long_ending == ".xml.gz":
# https://example.com/sitemap.xml
# https://example.com/sitemap.xml.gz
# https://example.com/sitemap.xml
# https://example.com/sitemap.xml.gz
read_sites = sitemap_read_sites
add_site = sitemap_add_site
delete_site = sitemap_delete_site
elif file_long_ending == ".result":
read_sites = sitespeed_read_sites
add_site = sitespeed_add_site
delete_site = sitespeed_delete_site
elif file_long_ending == ".webprf":
read_sites = webperf_read_sites
add_site = webperf_add_site
Expand Down Expand Up @@ -482,14 +474,14 @@ def main(argv):
options.input_skip,
options.input_take)

if options.add_url != '':
if options.add_url != '' and options.add_site is not None:
# check if website url should be added
options.sites = options.add_site(
options.input_filename,
options.add_url,
options.input_skip,
options.input_take)
elif options.delete_url != '':
elif options.delete_url != '' and options.delete_site is not None:
# check if website url should be deleted
options.sites = options.delete_site(
options.input_filename,
Expand Down
43 changes: 0 additions & 43 deletions engines/sitemap.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,46 +164,3 @@ def get_root_element(sitemap_content):
root_element = element
break
return root_element


def add_site(input_url, _, input_skip, input_take):
"""
This function reads site data from a specific sitemap,
prints a warning message (because it is read only),
Parameters:
input_url (str): Absolute url to sitemap, .xml and .xml.bz fileendings are supported.
input_skip (int): The number of lines to skip in the input file.
input_take (int): The number of lines to take from the input file after skipping.
Returns:
list: The list of sites read from the specified sitemap.
"""

print("WARNING: sitemap engine is a read only method for testing all pages in a sitemap.xml,"
,"NO changes will be made")

sites = read_sites(input_url, input_skip, input_take)

return sites


def delete_site(input_url, _, input_skip, input_take):
"""
This function reads site data from a specific sitemap,
prints a warning message (because it is read only),
Parameters:
input_url (str): Absolute url to sitemap, .xml and .xml.bz fileendings are supported.
input_skip (int): The number of lines to skip in the input file.
input_take (int): The number of lines to take from the input file after skipping.
Returns:
list: The list of sites read from the specified sitemap.
"""
print("WARNING: sitemap engine is a read only method for testing all pages in a sitemap.xml,"
,"NO changes will be made")

sites = read_sites(input_url, input_skip, input_take)

return sites
62 changes: 38 additions & 24 deletions engines/sitespeed_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,54 +3,67 @@
from pathlib import Path
from urllib.parse import urlparse
import re
from engines.utils import use_item

def add_site(input_filename, url, input_skip, input_take):
sites = []
return sites
def get_url_from_file_content(input_filename):
"""
Extracts the URL from the content of a HAR file.
The function opens the file and reads the first 1024 bytes.
It then uses a regular expression to find the URL in the read data.
If the file does not exist, it prints an error message and returns None.
def delete_site(input_filename, url, input_skip, input_take):
tmpSites = []
return tmpSites
Parameters:
input_filename (str): The path of the HAR file from which to extract the URL.
Returns:
str: The extracted URL. Returns None if the file does not exist or no URL is found.
def get_url_from_file_content(input_filename):
"""
try:
# No need to read all content, just read the first 1024 bytes as our url will be there
# we are doing this for performance
with open(input_filename, 'r', encoding='utf-8') as file:
data = file.read(1024)
regex = r"\"[_]{0,1}url\":[ ]{0,1}\"(?P<url>[^\"]+)\""
matches = re.finditer(regex, data, re.MULTILINE)
for matchNum, match in enumerate(matches, start=1):
for _, match in enumerate(matches, start=1):
return match.group('url')
except:
print('error in get_local_file_content. No such file or directory: {0}'.format(
input_filename))
except OSError:
print(f'Error. No such file or directory: {input_filename}')
return None

return None


def read_sites(hostname_or_argument, input_skip, input_take):
"""
Reads the sites from the cache directory based on the hostname or
the argument that ends with '.result'.
Parameters:
hostname_or_argument (str): The hostname or the argument that ends with '.result'.
input_skip (int): The number of items to skip from the start.
input_take (int): The number of items to take after skipping. If -1, takes all items.
Returns:
list: A list of sites where each site is represented as a
list containing the path to the HAR file and the URL.
"""
sites = []
hostname = hostname_or_argument
if hostname_or_argument.endswith('.result'):
tmp = hostname_or_argument[:hostname_or_argument.rfind('.result')]
o = urlparse(tmp)
hostname = o.hostname

if len(sites) > 0:
return sites
tmp_url = hostname_or_argument[:hostname_or_argument.rfind('.result')]
hostname = urlparse(tmp_url).hostname

dir = Path(os.path.dirname(
base_directory = Path(os.path.dirname(
os.path.realpath(__file__)) + os.path.sep).parent

data_dir = os.path.join(dir, 'cache', hostname) + os.path.sep
if not os.path.exists(data_dir):
cache_dir = os.path.join(base_directory, 'cache', hostname) + os.path.sep
if not os.path.exists(cache_dir):
return sites

dirs = os.listdir(data_dir)
dirs = os.listdir(cache_dir)

urls = {}

Expand All @@ -62,14 +75,15 @@ def read_sites(hostname_or_argument, input_skip, input_take):
continue

full_path = os.path.join(
data_dir, file_name)
cache_dir, file_name)

url = get_url_from_file_content(full_path)
urls[url] = full_path

current_index = 0
for tmp_url in urls.keys():
sites.append([urls[tmp_url], tmp_url])
for url, har_path in urls.items():
if use_item(current_index, input_skip, input_take):
sites.append([har_path, url])
current_index += 1

return sites

0 comments on commit ff0406e

Please sign in to comment.