forked from cwreece/NPL-Challenge-2018-12
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcheck_http_url.py
executable file
·206 lines (163 loc) · 6.79 KB
/
check_http_url.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
#!/usr/bin/env python3.7
# NPL Challenge2 code using 3.7 environment by Carole Warner Reece
#
"""
My plan is to check for URL starting with "http://" or "https://",
then verify domain name is in DNS(so is potentially accessible),
then check for reachablility while verifying return code.
If find an invalid HTTP start string, try assuming entry is a fully qualified domain
or a valid IP address entry.
Optionally for testing, I use the bash script "test_url.sh" with examples file
"test.tsv"vto test multiple URLs cases.
"""
import sys
import socket
import requests
import subprocess
# Define functions in this section
#########################################################################
def get_url() -> str:
"""
get_url either accepts a URL string entered when program is executed
or prompts user to enter a URL
:return: url
"""
try:
url = str(sys.argv[1])
return url
except Exception:
url = input('Enter a URL to be tested: ')
return url
def is_valid_url_start(url: str) -> bool:
"""
is_valid_url_start verfies a URL starts with "http://" or "https://"
:return: boolean - True or False
"""
return url.startswith("http://") or url.startswith("https://")
def parse_domain_name(url: str) -> str:
"""
parse_domain_name extracts a domain name from a valid URL
:return: domain_name
"""
assert is_valid_url_start(url)
testUrl = url.split("//")
resource = testUrl[1].split("/")
domain_name = resource[0]
print("The domain name is: ", domain_name)
return domain_name
def is_dns_resolvable(domain_name: str) -> bool:
"""
check_dns verfies that a domain_name can be resolved by DNS,
or that a valid IP address has been entered.
check_dns also prints status to the screen.
Note: if a domain_name is not resolvable, you will never reach it.
The program will use the python 'requests' module to check http/https status and
reachability since a ping test may fail for servers that block ping,i.e. www.abs.com.
:return: boolean - True if resolvable or an IP address, else False
"""
try:
# Remove comment in next line as needed for troubleshooting
# print("Looking for DNS IP")
# Note: socket.gethostbyname() also validates an IP address
dns_ip = socket.gethostbyname(domain_name)
# Comment out next line as needed for troubleshooting if you don't want it
print("The IP address is", dns_ip)
return True
except Exception:
print("The domain name is unknown in DNS, so is not HTTP reachable.")
return False
def check_ping(domain_name: str) -> bool:
"""
check_ping attempts to ping a domain name and returns True if domain_name
is accessible.
check_ping uses the subprocess and the platform modules.
Note: check_ping may help diagnose reachable IP with a server issue.
:return: boolean - True if domain_name is pingable, else False
"""
ping_command = ['ping', domain_name, '-c 1']
ping_output = subprocess.run(ping_command, False, stdout=subprocess.PIPE)
success = ping_output.returncode
return True if success == 0 else False
def check_http_status(url: str) -> None:
"""
check_http_status attempts an HTTP request and prints the status.
:return: None
"""
try:
"""
Need to include a timeout on the request.get so the program will not hang
indefinitely for non-reachable IPs
"""
request = requests.get(url, timeout=3)
if request.status_code == 200:
print("The request status code is", request.status_code)
print("Success. The server appears to be online and functioning correctly.")
elif request.status_code == 400:
print("The request status code is", request.status_code)
print("The requested page", url, "is not working.")
elif request.status_code == 404:
print("The request status code is", request.status_code)
print("The requested page", url, "is not available.")
elif request.status_code == 500:
print("The request status code is", request.status_code)
print("Internal server error. The requested page", url, "is not available.")
elif request.status_code == 502:
print("The request status code is", request.status_code)
print("Bad gateway. The requested page", url, "is not available.")
else:
print("The request status code is", request.status_code)
except Exception as e:
# Handle known errors in the python "requests" module
# Found by trying URL https://www.abc.com which is invalid
print("There was an error resolving the URL. The reason was: \n", str(e))
def test_invalid_url_as_fqdn_or_ip(url: str) -> None:
"""
test_invalid_url_as_fqdn_or_ip checks a string in case it is either a FQDN or an IP.
If the string is resolvable in DNS or a valid IP, it is tested with ping and
both port 80 and 443.
test_invalid_url_as_fqdn_or_ip also prints status to the screen.
:return: None
"""
# Assuming a FQDN in the sting, pull the domain name from the url
resource = url.split("/")
domain_name = resource[0]
# If the domain name is in DNS or is a valid IP address, continue testing.
if is_dns_resolvable(domain_name):
if check_ping(domain_name):
print("Ping is successful to", domain_name)
else:
print("Ping is NOT successful to", domain_name)
http_url = "http://" + url
print("Checking HTTP version or", http_url)
check_http_status(http_url)
https_url = "https://" + url
print("\nChecking HTTPS version or", https_url)
check_http_status(https_url)
def main():
"""
main function calls other functions to support the program goal of testing URLs
main also prints status to the screen.
:return: None
"""
url = get_url()
if is_valid_url_start(url):
domain_name = parse_domain_name(url)
if check_ping(domain_name):
print("Ping is successful to", domain_name)
else:
print("Ping is NOT successful to", domain_name)
if is_dns_resolvable(domain_name):
check_http_status(url)
else:
print("Invalid URL syntax. The URL needs to start with http:// or https:// \n")
print("(Testing for case where a fully qualified domain name or")
print(" an IP address was entered.) \n")
test_invalid_url_as_fqdn_or_ip(url)
"""
The main program starts here - previous lines were definitions.
Note: by default, a standalone program has the name '__main__'
Using this structure allows the import of a program to re-use its functions
without running the main program in the loaded .
"""
if __name__ == '__main__':
main()