-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnhs.py
48 lines (45 loc) · 1.81 KB
/
nhs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
from bs4 import BeautifulSoup
import requests
def get_list(what):
xlist = []
page = requests.get('https://www.nhs.uk/' + what + '/')
soup = BeautifulSoup(page.text, 'html.parser')
name_list = soup.find_all('a', {'nhsuk-list-panel__link'})
for item in name_list:
xlist.append(item.getText().lower())
return xlist
def get_common_questions(what):
questions = {}
what = what.replace('(', '').replace(')', '').replace(' ', '-')
page = requests.get('https://www.nhs.uk/medicines/' + what + '/')
soup = BeautifulSoup(page.text, 'html.parser')
name_list = soup.find_all('div', {'block-question'})
for item in name_list:
question_ = item.find('span', {'nhsuk-details__summary-text'}).getText().replace('\n', '').split(' ')
question_ = list(filter(None, question_))
question = ''
for word in question_:
if len(question) != 0:
question += ' '
question += word
text_ = item.find('div', {'nhsuk-details__text'}).getText().replace('\n', '').split(' ')
text_ = list(filter(None, text_))
text = ''
for word in text_:
if len(text) != 0:
text += ' '
text += word
questions[question] = text
return questions
def get_section(medicine, section):
text = ''
medicine = medicine.replace('(', '').replace(')', '').replace(' ', '-')
page = requests.get('https://www.nhs.uk/medicines/' + medicine + '/')
soup = BeautifulSoup(page.text, 'html.parser')
name_list = soup.find_all('section', {})
for item in name_list:
if item.find_all('h2', {})[0].getText()[0] == section:
text = item.find('div', {'block-richtext'}).getText()
break
#print(item.find('div', {'block-richtext'}).getText())
return text