3
3
from datetime import datetime
4
4
import re
5
5
from models import Rating
6
- from tests .utils import get_config_or_default , get_friendly_url_name , get_translation , set_cache_file
6
+ from tests .utils import get_config_or_default ,\
7
+ get_friendly_url_name ,\
8
+ get_translation ,\
9
+ set_cache_file
7
10
from tests .w3c_base import get_errors , identify_files
8
11
from tests .sitespeed_base import get_result
9
12
10
13
# DEFAULTS
11
14
REQUEST_TIMEOUT = get_config_or_default ('http_request_timeout' )
12
15
USERAGENT = get_config_or_default ('useragent' )
13
- review_show_improvements_only = get_config_or_default ('review_show_improvements_only' )
14
- sitespeed_use_docker = get_config_or_default ('sitespeed_use_docker' )
15
-
16
- sitespeed_timeout = get_config_or_default ('sitespeed_timeout' )
16
+ REVIEW_SHOW_IMPROVEMENTS_ONLY = get_config_or_default ('review_show_improvements_only' )
17
+ SITESPEED_USE_DOCKER = get_config_or_default ('sitespeed_use_docker' )
18
+ SITESPEED_TIMEOUT = get_config_or_default ('sitespeed_timeout' )
17
19
USE_CACHE = get_config_or_default ('cache_when_possible' )
18
20
CACHE_TIME_DELTA = get_config_or_default ('cache_time_delta' )
19
21
22
+ HTML_STRINGS = [
23
+ 'Start tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”' ,
24
+ 'Element “head” is missing a required instance of child element “title”.'
25
+ ]
26
+
27
+
20
28
def run_test (global_translation , lang_code , url ):
21
29
"""
22
30
Only work on a domain-level. Returns tuple with decimal for grade and string with review
23
31
"""
24
32
25
- rating = Rating (global_translation , review_show_improvements_only )
26
- points = 0.0
27
- review = ''
28
-
29
33
local_translation = get_translation ('html_validator_w3c' , lang_code )
30
34
31
35
print (local_translation ('TEXT_RUNNING_TEST' ))
@@ -35,91 +39,19 @@ def run_test(global_translation, lang_code, url):
35
39
36
40
errors = []
37
41
38
- # We don't need extra iterations for what we are using it for
39
- sitespeed_iterations = 1
40
- sitespeed_arg = '--shm-size=1g -b chrome --plugins.remove screenshot --plugins.remove html --plugins.remove metrics --browsertime.screenshot false --screenshot false --screenshotLCP false --browsertime.screenshotLCP false --chrome.cdp.performance false --browsertime.chrome.timeline false --videoParams.createFilmstrip false --visualMetrics false --visualMetricsPerceptual false --visualMetricsContentful false --browsertime.headless true --browsertime.chrome.includeResponseBodies all --utc true --browsertime.chrome.args ignore-certificate-errors -n {0}' .format (
41
- sitespeed_iterations )
42
- if 'nt' not in os .name :
43
- sitespeed_arg += ' --xvfb'
44
-
45
- sitespeed_arg += ' --postScript chrome-cookies.cjs --postScript chrome-versions.cjs'
46
-
47
- (_ , filename ) = get_result (
48
- url , sitespeed_use_docker , sitespeed_arg , sitespeed_timeout )
49
-
50
- # 1. Visit page like a normal user
51
- data = identify_files (filename )
42
+ data = get_data_for_url (url )
52
43
44
+ rating = Rating (global_translation , REVIEW_SHOW_IMPROVEMENTS_ONLY )
45
+ points = 0.0
46
+ review = ''
53
47
48
+ number_of_errors = 0
54
49
for entry in data ['htmls' ]:
55
- req_url = entry ['url' ]
56
- name = get_friendly_url_name (global_translation , req_url , entry ['index' ])
57
- review_header = '- {0} ' .format (name )
58
- html = entry ['content' ]
59
- set_cache_file (req_url , html , True )
60
-
61
- params = {'doc' : req_url ,
62
- 'out' : 'json' ,
63
- 'level' : 'error' }
64
- errors = get_errors ('html' , params )
65
- number_of_errors = len (errors )
66
-
67
-
68
- error_message_grouped_dict = {}
69
- if number_of_errors > 0 :
70
- regex = r"(“[^”]+”)"
71
- for item in errors :
72
- error_message = item ['message' ]
73
-
74
- # Filter out CSS: entries that should not be here
75
- if error_message .startswith ('CSS: ' ):
76
- number_of_errors -= 1
77
- continue
78
-
79
- # Filter out start html document stuff if not start webpage
80
- if entry ['index' ] > 1 :
81
- if 'Start tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”' in error_message :
82
- number_of_errors -= 1
83
- continue
84
- if 'Element “head” is missing a required instance of child element “title”.' in error_message :
85
- number_of_errors -= 1
86
- continue
87
-
88
- error_message = re .sub (
89
- regex , "X" , error_message , 0 , re .MULTILINE )
90
-
91
- if error_message_grouped_dict .get (error_message , False ):
92
- error_message_grouped_dict [error_message ] = error_message_grouped_dict [error_message ] + 1
93
- else :
94
- error_message_grouped_dict [error_message ] = 1
95
-
96
- if len (error_message_grouped_dict ) > 0 :
97
- error_message_grouped_sorted = sorted (
98
- error_message_grouped_dict .items (), key = lambda x : x [1 ], reverse = True )
99
-
100
- for item in error_message_grouped_sorted :
101
-
102
- item_value = item [1 ]
103
- item_text = item [0 ]
104
-
105
- review += local_translation ('TEXT_REVIEW_ERRORS_ITEM' ).format (item_text , item_value )
106
-
107
- number_of_error_types = len (error_message_grouped_dict )
108
-
109
- result = calculate_rating (number_of_error_types , number_of_errors )
110
-
111
- # if number_of_error_types > 0:
112
- error_types_rating = Rating (global_translation , review_show_improvements_only )
113
- error_types_rating .set_overall (result [0 ], review_header + local_translation ('TEXT_REVIEW_RATING_GROUPED' ).format (
114
- number_of_error_types , 0.0 ))
115
- rating += error_types_rating
116
-
117
- # if number_of_errors > 0:
118
- error_rating = Rating (global_translation , review_show_improvements_only )
119
- error_rating .set_overall (result [1 ], review_header + local_translation (
120
- 'TEXT_REVIEW_RATING_ITEMS' ).format (number_of_errors , 0.0 ))
121
- rating += error_rating
50
+ tmp_rating , tmp__errors = rate_entry (entry , global_translation , local_translation )
51
+ rating += tmp_rating
52
+ errors .extend (tmp__errors )
122
53
54
+ number_of_errors = len (errors )
123
55
124
56
points = rating .get_overall ()
125
57
rating .set_standards (points )
@@ -150,16 +82,191 @@ def run_test(global_translation, lang_code, url):
150
82
151
83
return (rating , errors )
152
84
85
+ def get_data_for_url (url ):
86
+ """
87
+ This function retrieves data for a given URL using the Sitespeed.io tool.
88
+
89
+ The function configures Sitespeed.io to run with specific parameters,
90
+ including running in headless mode, ignoring certificate errors,
91
+ and capturing all response bodies.
92
+
93
+ Parameters:
94
+ url (str): The URL for which to retrieve data.
95
+
96
+ Returns:
97
+ data (dict): A dictionary containing the data retrieved from the URL.
98
+ """
99
+
100
+ # We don't need extra iterations for what we are using it for
101
+ sitespeed_iterations = 1
102
+ sitespeed_arg = (
103
+ '--shm-size=1g -b chrome '
104
+ '--plugins.remove screenshot --plugins.remove html --plugins.remove metrics '
105
+ '--browsertime.screenshot false --screenshot false --screenshotLCP false '
106
+ '--browsertime.screenshotLCP false --chrome.cdp.performance false '
107
+ '--browsertime.chrome.timeline false --videoParams.createFilmstrip false '
108
+ '--visualMetrics false --visualMetricsPerceptual false '
109
+ '--visualMetricsContentful false --browsertime.headless true '
110
+ '--browsertime.chrome.includeResponseBodies all --utc true '
111
+ '--browsertime.chrome.args ignore-certificate-errors '
112
+ f'-n { sitespeed_iterations } ' )
113
+ if 'nt' not in os .name :
114
+ sitespeed_arg += ' --xvfb'
115
+
116
+ sitespeed_arg += ' --postScript chrome-cookies.cjs --postScript chrome-versions.cjs'
117
+
118
+ (_ , filename ) = get_result (
119
+ url , SITESPEED_USE_DOCKER , sitespeed_arg , SITESPEED_TIMEOUT )
120
+
121
+ # 1. Visit page like a normal user
122
+ data = identify_files (filename )
123
+ return data
124
+
125
+ def rate_entry (entry , global_translation , local_translation ):
126
+ """
127
+ Rates an entry based on the number and types of HTML errors.
128
+
129
+ This function takes an entry, global translations, and local translations as input.
130
+ It calculates a rating for the entry based on the number and
131
+ types of HTML errors present in the content of the entry.
132
+ The function also groups the error messages and calculates an overall rating.
133
+
134
+ Parameters:
135
+ entry (dict): A dictionary containing the details of the entry including the URL and content.
136
+ global_translation (function): A function for translating text globally.
137
+ local_translation (function): A function for translating text locally.
138
+
139
+ Returns:
140
+ tuple: A tuple containing the overall rating (Rating object) and the errors (list).
141
+ """
142
+ rating = Rating (global_translation , REVIEW_SHOW_IMPROVEMENTS_ONLY )
143
+
144
+ req_url = entry ['url' ]
145
+ name = get_friendly_url_name (global_translation , req_url , entry ['index' ])
146
+ review_header = f'- { name } '
147
+
148
+ set_cache_file (req_url , entry ['content' ], True )
149
+
150
+ errors = get_errors ('html' ,
151
+ {
152
+ 'doc' : req_url ,
153
+ 'out' : 'json' ,
154
+ 'level' : 'error'
155
+ })
156
+ number_of_errors = len (errors )
157
+
158
+ error_message_grouped_dict = {}
159
+ if number_of_errors > 0 :
160
+ error_message_grouped_dict = get_grouped_error_messages (
161
+ entry ,
162
+ local_translation ,
163
+ errors ,
164
+ number_of_errors )
165
+
166
+ number_of_error_types = len (error_message_grouped_dict )
167
+ result = calculate_rating (number_of_error_types , number_of_errors )
168
+
169
+ error_types_rating = Rating (global_translation , REVIEW_SHOW_IMPROVEMENTS_ONLY )
170
+ error_types_rating .set_overall (
171
+ result [0 ],
172
+ review_header + local_translation ('TEXT_REVIEW_RATING_GROUPED' ).format (
173
+ number_of_error_types ,
174
+ 0.0 ))
175
+ rating += error_types_rating
176
+
177
+ error_rating = Rating (global_translation , REVIEW_SHOW_IMPROVEMENTS_ONLY )
178
+ error_rating .set_overall (result [1 ], review_header + local_translation (
179
+ 'TEXT_REVIEW_RATING_ITEMS' ).format (number_of_errors , 0.0 ))
180
+ rating += error_rating
181
+ return (rating , errors )
182
+
183
+ def get_grouped_error_messages (entry , local_translation , errors , number_of_errors ):
184
+ """
185
+ Groups HTML error messages and counts their occurrences.
186
+
187
+ This function takes an entry, local translations, a list of errors,
188
+ and the total number of errors as input.
189
+ It filters out irrelevant errors and groups the remaining ones by their messages.
190
+ The function also counts the occurrences of each error message.
191
+
192
+ Parameters:
193
+ entry (dict): A dictionary containing the details of the entry including the URL and content.
194
+ local_translation (function): A function for translating text locally.
195
+ errors (list): A list of error messages.
196
+ number_of_errors (int): The total number of errors.
197
+
198
+ Returns:
199
+ dict: A dictionary where the keys are the error messages and the values are their counts.
200
+ """
201
+ error_message_grouped_dict = {}
202
+ regex = r"(“[^”]+”)"
203
+ for item in errors :
204
+ error_message = item ['message' ]
205
+
206
+ # Filter out CSS: entries that should not be here
207
+ if error_message .startswith ('CSS: ' ):
208
+ number_of_errors -= 1
209
+ continue
210
+
211
+ # Filter out start html document stuff if not start webpage
212
+ if entry ['index' ] > 1 :
213
+ is_html = False
214
+ for html_str in HTML_STRINGS :
215
+ if html_str in error_message :
216
+ number_of_errors -= 1
217
+ is_html = True
218
+ break
219
+
220
+ if is_html :
221
+ continue
222
+
223
+ error_message = re .sub (
224
+ regex , "X" , error_message , 0 , re .MULTILINE )
225
+
226
+ if error_message_grouped_dict .get (error_message , False ):
227
+ error_message_grouped_dict [error_message ] = \
228
+ error_message_grouped_dict [error_message ] + 1
229
+ else :
230
+ error_message_grouped_dict [error_message ] = 1
231
+
232
+ if len (error_message_grouped_dict ) > 0 :
233
+ error_message_grouped_sorted = sorted (
234
+ error_message_grouped_dict .items (), key = lambda x : x [1 ], reverse = True )
235
+
236
+ for item in error_message_grouped_sorted :
237
+ item_value = item [1 ]
238
+ item_text = item [0 ]
239
+
240
+ review += local_translation (
241
+ 'TEXT_REVIEW_ERRORS_ITEM'
242
+ ).format (item_text , item_value )
243
+
244
+ return error_message_grouped_dict
245
+
153
246
154
247
def calculate_rating (number_of_error_types , number_of_errors ):
248
+ """
249
+ Calculates ratings based on the number of error types and errors.
250
+
251
+ This function takes the number of error types and the total number of errors as input.
252
+ It calculates two ratings: one based on the number of error types and
253
+ the other based on the total number of errors.
254
+ The ratings are calculated such that a higher number of errors or
255
+ error types will result in a lower rating. The minimum rating is 1.0.
155
256
257
+ Parameters:
258
+ number_of_error_types (int): The number of different types of errors.
259
+ number_of_errors (int): The total number of errors.
260
+
261
+ Returns:
262
+ tuple: A tuple containing the rating based on the number of error types and
263
+ the rating based on the total number of errors.
264
+ """
156
265
rating_number_of_error_types = 5.0 - (number_of_error_types / 5.0 )
157
266
158
267
rating_number_of_errors = 5.0 - ((number_of_errors / 2.0 ) / 5.0 )
159
268
160
- if rating_number_of_error_types < 1.0 :
161
- rating_number_of_error_types = 1.0
162
- if rating_number_of_errors < 1.0 :
163
- rating_number_of_errors = 1.0
269
+ rating_number_of_error_types = max (rating_number_of_error_types , 1.0 )
270
+ rating_number_of_errors = max (rating_number_of_errors , 1.0 )
164
271
165
272
return (rating_number_of_error_types , rating_number_of_errors )
0 commit comments