diff --git a/parsers/__init__.py b/parsers/__init__.py index d46566a..c9a40ab 100644 --- a/parsers/__init__.py +++ b/parsers/__init__.py @@ -1,22 +1,12 @@ -from parsers.gimmesomeoven import Gimmesomeoven from parsers.smittenkitchen import Smittenkitchen from parsers.letsdishrecipes import Letsdishrecipes -from parsers.lovingitvegan import Lovingitvegan from parsers.minimalistbaker import Minimalistbaker -from parsers.bowlofdelicious import Bowlofdelicious -from parsers.chefkoch import Chefkoch -from parsers.hostthetoast import Hostthetoast from parsers.essenundtriken import EssenUndTrinken from parsers.kuechengoetter import Kuechengoetter -from parsers.kochbar import Kochbar -from parsers.hostthetoast import Hostthetoast from parsers.thewoksoflife import Thewoksoflife from parsers.glebekitchen import GlebeKitchen -from parsers.akispetretzikis import AkisPetretzikis from parsers.hervecuisine import Hervecuisine from parsers.thatlowcarblife import ThatLowCarbLife -from parsers.seriouseats import Seriouseats -from parsers.fattoincasadabenedetta import Fattoincasadabenedetta from parsers.dinneratthezoo import DinnerAtTheZoo from parsers.pickledplum import PickledPlum from parsers.realfoodwholelife import RealFoodWholeLife @@ -24,24 +14,15 @@ # Must exclude the "www" portion of the URL PARSERS = { - 'gimmesomeoven.com': Gimmesomeoven, 'smittenkitchen.com': Smittenkitchen, 'letsdishrecipes.com': Letsdishrecipes, - 'lovingitvegan.com': Lovingitvegan, 'minimalistbaker.com': Minimalistbaker, - 'chefkoch.de': Chefkoch, - 'bowlofdelicious.com': Bowlofdelicious, - 'hostthetoast.com': Hostthetoast, 'essen-und-trinken.de': EssenUndTrinken, 'kuechengoetter.de' : Kuechengoetter, - 'kochbar.de' : Kochbar, 'thewoksoflife.com': Thewoksoflife, 'glebekitchen.com': GlebeKitchen, - 'akispetretzikis.com': AkisPetretzikis, 'hervecuisine.com': Hervecuisine, 'thatlowcarblife.com': ThatLowCarbLife, - 'seriouseats.com' : Seriouseats, - 'fattoincasadabenedetta.it': Fattoincasadabenedetta, 'dinneratthezoo.com': DinnerAtTheZoo, 'pickledplum.com': PickledPlum, 'realfoodwholelife.com': RealFoodWholeLife, diff --git a/parsers/akispetretzikis.py b/parsers/akispetretzikis.py deleted file mode 100644 index 18175ed..0000000 --- a/parsers/akispetretzikis.py +++ /dev/null @@ -1,35 +0,0 @@ -import json -from re import split -from parsers.recipe import Recipe - - -class AkisPetretzikis(Recipe): - - def get_json_recipe(self, d): - recipe = {} - if d['@type'] == 'Recipe': - recipe['name'] = d['name'] - recipe['description'] = d['description'] - recipe['ingredients'] = d['recipeIngredient'] - recipe['instructions'] = split(r'\r\n', d['recipeInstructions']) - recipe['instructions'] = [instruction for instruction in recipe['instructions'] if instruction] - recipe['image'] = d['image'] - - return recipe - - def Parse(self, url): - recipe = {} - recipe['url'] = url - recipe['source'] = 'akispetretzikis.com' - - soup = self.fetch_soup(url) - - results = soup.find_all('script', {'type': 'application/ld+json'}) - for result in results: - d = json.loads(result.contents[0]) - if d['@type'].lower() == 'recipe': - parsed_recipe = self.get_json_recipe(d) - recipe.update(parsed_recipe) - else: - continue - return recipe diff --git a/parsers/bowlofdelicious.py b/parsers/bowlofdelicious.py deleted file mode 100644 index 77b3e36..0000000 --- a/parsers/bowlofdelicious.py +++ /dev/null @@ -1,4 +0,0 @@ -from parsers.recipe import WpJsonRecipe - -class Bowlofdelicious(WpJsonRecipe): - pass diff --git a/parsers/chefkoch.py b/parsers/chefkoch.py deleted file mode 100644 index 7735fe2..0000000 --- a/parsers/chefkoch.py +++ /dev/null @@ -1,29 +0,0 @@ -from parsers.recipe import Recipe -from re import split -import json - -class Chefkoch(Recipe): - - def get_json_recipe(self, r): - recipe = {} - - recipe['name'] = r['name'] - recipe['description'] = r['description'] - recipe['ingredients'] = r['recipeIngredient'] - recipe['instructions'] = split(r'\n\s*\n', r['recipeInstructions']) - recipe['image'] = r['image'] - - return recipe - - def Parse(self, url): - recipe = {} - recipe['url'] = url - recipe['source'] = 'www.chefkoch.de' - - soup = self.fetch_soup(url) - result = soup.find_all('script', {'type': 'application/ld+json'})[1] - d = json.loads(result.contents[0]) - parsed_recipe = self.get_json_recipe(d) - recipe.update(parsed_recipe) - - return recipe diff --git a/parsers/fattoincasadabenedetta.py b/parsers/fattoincasadabenedetta.py deleted file mode 100644 index 9f8c128..0000000 --- a/parsers/fattoincasadabenedetta.py +++ /dev/null @@ -1,55 +0,0 @@ -from parsers.recipe import Recipe - -class Fattoincasadabenedetta(Recipe): - - def parse_microformat(self, soup): - recipe = {} - - title = soup.find('h1', {'class': 'entry-title'}) - recipe['name'] = title.contents[0] - - result = soup.find('meta', {'property': 'og:description'}) - recipe['description'] = result['content'] - - times = soup.find('div', {'class': 'recipe-time-box'}).find_all('li') - for time in times: - time_element = time.find('span', {'class': 'time-text'}).contents[0] - time_value = time.find('span', {'class': ''}).contents[0] - recipe['description'] += "\n" + str(time_element) + ' ' + str(time_value) - - result = soup.find('meta', {'property': 'og:image'}) - recipe['image'] = result['content'] - - recipe['ingredients'] = [] - ingredients = soup.find_all('li', {'class': 'wpurp-recipe-ingredient'}) - for ingredient in ingredients: - quantity = ingredient.find('span', {'class': 'wpurp-recipe-ingredient-quantity recipe-ingredient-quantity'}).string or '' - unit = ingredient.find('span', {'class': 'wpurp-recipe-ingredient-unit recipe-ingredient-unit'}).string or '' - name_element = ingredient.find('span', {'class': 'wpurp-recipe-ingredient-name recipe-ingredient-name'}) - name = name_element.contents[0] - notes = '' - if len(name_element.contents) > 1: - notes = name_element.contents[1].contents[0] - - recipe['ingredients'].append(quantity + ' ' + unit + ' ' + name + notes) - - recipe['instructions'] = [] - instructions = soup.find_all('li', {'class': 'wpurp-recipe-instruction'}) - for instruction in instructions: - if instruction.contents[0].string is not None: - recipe['instructions'].append(instruction.contents[0].string) - - return recipe - - def Parse(self, url): - recipe = {} - recipe['url'] = url - recipe['source'] = 'fattoincasadabenedetta.it' - - soup = self.fetch_soup(url) - parsed_recipe = self.parse_microformat(soup) - recipe.update(parsed_recipe) - - print(soup.len) - - return recipe \ No newline at end of file diff --git a/parsers/gimmesomeoven.py b/parsers/gimmesomeoven.py deleted file mode 100644 index ab75646..0000000 --- a/parsers/gimmesomeoven.py +++ /dev/null @@ -1,4 +0,0 @@ -from parsers.recipe import WpJsonRecipe - -class Gimmesomeoven(WpJsonRecipe): - pass diff --git a/parsers/hostthetoast.py b/parsers/hostthetoast.py deleted file mode 100644 index 39b8e13..0000000 --- a/parsers/hostthetoast.py +++ /dev/null @@ -1,4 +0,0 @@ -from parsers.recipe import WpJsonRecipe - -class Hostthetoast(WpJsonRecipe): - pass diff --git a/parsers/kochbar.py b/parsers/kochbar.py deleted file mode 100644 index aae4c97..0000000 --- a/parsers/kochbar.py +++ /dev/null @@ -1,31 +0,0 @@ -import json - -from parsers.recipe import Recipe - -class Kochbar(Recipe): - - def get_json_recipe(self, d): - recipe = {} - - recipe['name'] = d['name'] - recipe['description'] = d['description'] - recipe['ingredients'] = d['recipeIngredient'] - recipe['instructions'] = [i['text'] for i in d['recipeInstructions']] - recipe['image'] = d['image'] - - return recipe - - def Parse(self, url): - recipe = {} - recipe['url'] = url - recipe['source'] = 'kochbar.de' - - soup = self.fetch_soup(url) - - result = soup.find_all('script', {'type': 'application/ld+json'}) - - d = json.loads(result[1].contents[0]) - parsed_recipe = self.get_json_recipe(d) - recipe.update(parsed_recipe) - - return recipe diff --git a/parsers/lovingitvegan.py b/parsers/lovingitvegan.py deleted file mode 100644 index 3917e9e..0000000 --- a/parsers/lovingitvegan.py +++ /dev/null @@ -1,4 +0,0 @@ -from parsers.recipe import WpJsonRecipe - -class Lovingitvegan(WpJsonRecipe): - pass diff --git a/parsers/seriouseats.py b/parsers/seriouseats.py deleted file mode 100644 index e2e48f0..0000000 --- a/parsers/seriouseats.py +++ /dev/null @@ -1,19 +0,0 @@ -from parsers.recipe import Recipe -import recipe_scrapers - - -class Seriouseats(Recipe): - def Parse(self, url): - recipe = {} - - # Scrape the provided website using the url passed in - scraper = recipe_scrapers.scrape_me(url) - - # Assign the recipe's dictionary's keys and values using recipe_scraper dependency - recipe['url'] = url - recipe['name'] = scraper.title() - recipe['image'] = scraper.image() - recipe['ingredients'] = scraper.ingredients() - recipe['instructions'] = (i[3:] for i in scraper.instructions().split("\n")) # Each ingredient starts with a number, period, and whitespace. Remove them. - - return recipe