This repository has been archived by the owner on Mar 6, 2018. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 6
/
recipemd.py
125 lines (109 loc) · 4.35 KB
/
recipemd.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from bs4 import BeautifulSoup
import requests
import codecs
import sys
import argparse
from argparse import RawTextHelpFormatter
def chefkoch(soup):
# title
title = soup.find('h1', attrs={'class': 'page-title'}).text
if title == 'Fehler: Seite nicht gefunden' or 'Fehler: Rezept nicht gefunden':
raise ValueError('No recipe found, check URL')
# ingredients
ingreds = []
table = soup.find('table', attrs={'class': 'incredients'})
rows = table.find_all('tr')
for row in rows:
cols = row.find_all('td')
cols = [s.text.strip() for s in cols]
ingreds.append([ele for ele in cols if ele]) # get rid of empty values
ingreds = ['- ' + ' '.join(s) for s in ingreds]
# instructions
instruct = soup.find('div', attrs={'id': 'rezept-zubereitung'}).text # only get text
instruct = instruct.strip() # remove leadin and ending whitespace
# write to file
writeFile(title, ingreds, instruct)
def allrecipes(soup):
# title
try:
title = soup.find('h1', attrs={'id': 'itemTitle'}).text
except Exception:
print('No recipe found, check URL')
sys.exit(1)
# ingredients
ingreds = soup.find('div', attrs={'class': 'ingred-left'})
ingreds = [s.getText().strip() for s in ingreds.findAll('li')]
ingreds = ['- ' + s.replace('\n', ' ') for s in ingreds] # add dash + remove newlines
ingreds = [" ".join(s.split()) for s in ingreds] # remove whitespace
# instructions
instruct = soup.find('div', attrs={'class': 'directLeft'})
instruct = [s.getText().strip() for s in instruct.findAll('li')]
instruct = '\n\n'.join(instruct)
# write to file
writeFile(title, ingreds, instruct)
def brigitte(soup):
# title
try:
title = soup.find('h1', attrs={'class': 'briTitle'}).text
except Exception:
print('No recipe found, check URL')
sys.exit(1)
# ingredients
ingreds = soup.find('div', attrs={'class': 'category_row_half'})
ingreds = [s.getText().strip() for s in ingreds.findAll('span', attrs={'itemprop': 'ingredients'})] # remove whitespace
ingreds = ['- ' + " ".join(s.split()) for s in ingreds]
# instructions
instruct = soup.find('div', attrs={'itemprop': 'recipeInstructions'}).text
instruct = instruct.strip() # remove leadin and ending whitespace
# write to file
writeFile(title, ingreds, instruct)
def stewart(soup):
# title
try:
title = soup.find('h1', attrs={'class': 'page-title'}).text.strip()
except Exception:
print('No recipe found, check URL')
sys.exit(1)
# ingredients
ingreds = soup.find('ul', attrs={'class': 'components-list'})
ingreds = [s.getText().strip() for s in ingreds.findAll('li', attrs={'class': 'components-item'})]
ingreds = ['- ' + " ".join(s.split()) for s in ingreds]
# instructions
instruct = soup.find('section', attrs={'class': 'directions-group'})
instruct = [s.getText().strip() for s in instruct.findAll('li')]
instruct = '\n\n'.join(instruct)
# write to file
writeFile(title, ingreds, instruct)
def writeFile(title, ingreds, instruct):
with codecs.open(title.lower().replace(' ', '-') + '.md', 'w', encoding="utf-8") as f:
f.write('# ' + title + '\n\n')
f.write('## Zutaten' + '\n\n')
f.write('\n'.join(ingreds))
f.write('\n\n' + '## Zubereitung' + '\n\n')
f.write(instruct)
print('File written as: "' + title.lower().replace(' ', '-') + '.md"')
def main():
parser = argparse.ArgumentParser(formatter_class=RawTextHelpFormatter)
parser.add_argument('url', help='Input URL to parse recipe \nSupported websites:\nchefkoch.de\nallrecipes.com\nbrigitte.de\nmarthastewart.com')
args = parser.parse_args()
url = args.url
try:
page = requests.get(url)
except Exception:
print('No valid URL')
sys.exit(1)
soup = BeautifulSoup(page.text, "html5lib")
if url.startswith('http://www.chefkoch.de/'):
chefkoch(soup)
elif url.startswith('http://allrecipes.com/'):
allrecipes(soup)
elif url.startswith('http://www.brigitte.de/rezepte/'):
brigitte(soup)
elif url.startswith('http://www.marthastewart.com'):
stewart(soup)
else:
print ('Website not supported')
if __name__ == "__main__":
main()