Skip to content

Commit

Permalink
Merge pull request #56 from opendata-swiss/feat/multiple-user-agents
Browse files Browse the repository at this point in the history
Handle multiple user-agents
  • Loading branch information
kovalch authored Feb 5, 2024
2 parents 89b1f0d + 0226650 commit b81620b
Showing 1 changed file with 37 additions and 15 deletions.
52 changes: 37 additions & 15 deletions ckan_pkg_checker/utils/request_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,16 @@
log = logging.getLogger(__name__)


def check_url_status(test_url, http_method="HEAD"):
log.debug("URL %s (%s)" % (test_url, http_method))
error_result = None
def _check_with_user_agent(test_url, http_method, user_agent):
log.debug(f"URL {test_url} ({http_method}), User-Agent: {user_agent}")
try:
headers = {"User-Agent": user_agent}
if http_method == "HEAD":
req = requests.head(
test_url,
verify=False, # SSL certificate will not be verified
timeout=30,
headers={"User-Agent": "Custom"},
headers=headers,
)
elif http_method == "GET":
req = requests.get(
Expand All @@ -32,29 +32,51 @@ def check_url_status(test_url, http_method="HEAD"):
timeout=30,
headers={
"Range": "bytes=0-10", # Request the first 10 bytes
"User-Agent": "Custom",
"User-Agent": user_agent,
},
)
req.raise_for_status()
log.info("sent response %s" % req.status_code)
log.info("Sent response %s" % req.status_code)
return None # Success, no error
except requests.exceptions.HTTPError as e:
log.debug(
"HTTP EXCEPTION OCCURED for URL %s (%s): %r" % (test_url, http_method, e)
"HTTP EXCEPTION OCCURRED for URL %s (%s): %r" % (test_url, http_method, e)
)
# ignore 405 Method Not Allowed errors
if 405 != e.response.status_code:
error_result = str(e)
return str(e) # Return the error message
except (ValueError, requests.exceptions.RequestException) as e:
log.debug(
"REQUEST EXCEPTION OCCURED for URL %s (%s): %r" % (test_url, http_method, e)
"REQUEST EXCEPTION OCCURRED for URL %s (%s): %r"
% (test_url, http_method, e)
)
try:
error_result = str(e.message.reason)
except AttributeError:
error_result = str(e)
if hasattr(e, "message") and hasattr(e.message, "reason"):
return str(e.message.reason)
else:
return str(e)


if error_result:
return error_result
def check_url_status(test_url, http_method="HEAD"):
log.debug("URL %s (%s)" % (test_url, http_method))
user_agents = [
(
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/58.0.3029.110 Safari/537.3 "
"Safari/537.36"
),
"Custom",
]
for user_agent in user_agents:
error_result = _check_with_user_agent(test_url, http_method, user_agent)
if not error_result:
return None # Success, no error
else:
log.debug(
"Retrying with a different User-Agent for URL %s (%s)"
% (test_url, http_method)
)
return error_result # If all attempts fail


class RetryAdapter(HTTPAdapter):
Expand Down

0 comments on commit b81620b

Please sign in to comment.