Skip to content

Commit

Permalink
Improved ingredient search using NLTK wordnet, updated version from 0…
Browse files Browse the repository at this point in the history
….3.3 to 0.3.4
  • Loading branch information
TurconiAndrea committed Jul 24, 2021
1 parent aa2eb8f commit 64372a0
Show file tree
Hide file tree
Showing 5 changed files with 40 additions and 11 deletions.
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

This package provides a classification and tagging system for ingredients and recipes.
The functioning of the package is based on a dataset containing more than 700 ingredients mapped with their own class.
If a provided ingredient is not mapped into the dataset, the library search for it on wikipedia pages and into the dictionary.
If a provided ingredient is not mapped into the dataset, the library search for it on wikipedia pages, into the dictionary and into NLTK Wordnet to find the best possible class.

An ingredient could be classified in one of the following class:
- Vegetable
Expand All @@ -15,6 +15,7 @@ An ingredient could be classified in one of the following class:
- Condiment
- Nut
- Seafood
- Dessert

A recipe is tagged based on its ingredients class.
The library also provides a function to get the class percentage of recipe ingredients.
Expand Down Expand Up @@ -54,5 +55,5 @@ recipe_tagger.get_recipe_class_percentage(['aubergine', 'chicken', 'beef'])

### Todo
- [x] Handling of Wikipedia pages.
- [ ] Better search over dictionary and Wikipedia pages of ingredient.
- [x] Better search over dictionary and Wikipedia pages of ingredient.
- [ ] Possibility to add ingredient after search if it is not present.
1 change: 1 addition & 0 deletions recipe_tagger/foodcategory.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ class FoodCategory(Enum):
condiment = 7
nut = 8
seafood = 9
dessert = 10

class CategorySynset():
categories = [wordnet.synset(f'{FoodCategory.vegetable.name}.n.01'),
Expand Down
31 changes: 27 additions & 4 deletions recipe_tagger/recipe_tagger.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,13 @@
embedding_path = 'data/ingredient_embedding.npy'

def __get_embedding():
embedding = io.BytesIO(pkgutil.get_data(__name__, embedding_path))
return np.load(embedding, allow_pickle=True).item()
"""
Get the dataset of ingredients as a dictionary.
:return: a dictionary representing the embedding
"""
embedding_io = io.BytesIO(pkgutil.get_data(__name__, embedding_path))
return np.load(embedding_io, allow_pickle=True).item()

def lemmatize_word(word):
"""
Expand All @@ -36,6 +41,7 @@ def is_ingredient_vegan(ingredient):
:param ingredient: the name of the ingredient.
:return: a bool indicating whether the ingredient is vegan or not.
"""
ingredient = ingredient.strip()
shelf = Shelf('Milan', month_id=0)
results = shelf.process_ingredients([ingredient])
return results['labels']['vegan']
Expand All @@ -53,12 +59,28 @@ def is_recipe_vegan(ingredients):
return results['labels']['vegan']

def add_ingredient(ingredient, tag):
"""
Map the provided ingredient and the tag into the embedding dataset.
Tag must be one the following FoodCategory:
vegetable, fruit, meat, legume, diary, egg, staple, condiment, nut, seafood, dessert
:param ingredient: the name of the ingredient.
:param tag: the class of the ingredient. Must be one of the listed above.
:return: a bool indicating if the operation has succeded or not.
"""
embedding = __get_embedding()
ingredient = ingredient.strip()
tag = tag.strip()
if ingredient in embedding:
return False

embedding[ingredient] = FoodCategory[tag].value
return True

def search_ingredient_hypernyms(ingredient):
"""
Predict the class of the provided ingredient based on the Wu & Palmer’s similarity between
ingredient, his hypernyms and the 10 FoodCategory.
ingredient, his hypernyms and the 11 FoodCategory.
The FoodCategory is choosen based on the maximum similarity value between the ingredient,
its hypernym and the various categories. If the predicted category is different between ingredient
and hypernym the category is choosen based on the avarege of both.
Expand Down Expand Up @@ -116,6 +138,7 @@ def get_ingredient_class(ingredient):
:return: the class of the ingredient.
"""
embedding = __get_embedding()
ingredient = ingredient.strip()
lemmatized_ing = lemmatize_word(ingredient)
if lemmatized_ing in embedding:
return FoodCategory(embedding[lemmatized_ing]).name
Expand All @@ -139,7 +162,7 @@ def get_recipe_class_percentage(ingredients):
def get_recipe_tags(ingredients):
"""
Classify a recipe in tags based on its ingredient.
Tag could be: Vegetable, Fruit, Meat, Legume, Diary, Egg.
Tag could be: Vegetable, Fruit, Meat, Legume, Diary, Egg, Staple, Condiment, Nut, Seafood
:param ingredients: list of ingredients in the recipe.
:return: set of tags for the recipe.
Expand Down
6 changes: 3 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,16 @@
setup(
name='recipe-tagger',
packages=find_packages(include=['recipe_tagger']),
version='0.3.3',
version='0.3.4',
description='A library for tagging and classify recipes',
author='Andrea Turconi',
license='MIT',
long_description=README,
long_description_content_type="text/markdown",
url='https://github.com/TurconiAndrea/recipe-tagger',
download_url='https://github.com/TurconiAndrea/recipe-tagger/archive/refs/tags/0.3.3.tar.gz',
download_url='https://github.com/TurconiAndrea/recipe-tagger/archive/refs/tags/0.3.4.tar.gz',
keywords=['food', 'recipe', 'tag', 'tagging', 'ingredient'],
install_requires=['wikipedia-api', 'PyDictionary', 'textblob', 'pyfood', 'unidecode', 'numpy'],
install_requires=['wikipedia-api', 'PyDictionary', 'textblob', 'pyfood', 'unidecode', 'numpy', 'nltk'],
test_suite='tests',
package_data={'': ['data/*.npy']},
)
8 changes: 6 additions & 2 deletions tests/test_recipe_tagger.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ def test_is_recipe_vegan():
assert recipe_tagger.is_recipe_vegan(['apple', 'chicken']) == False
assert recipe_tagger.is_recipe_vegan(['apple', 'pear']) == True

#@pytest.mark.skip()
def test_add_ingredient():
assert recipe_tagger.add_ingredient('milk', 'dairy') == True

#@pytest.mark.skip()
def test_search_ingredient_hypernyms():
assert recipe_tagger.search_ingredient_hypernyms('pear') == 'fruit'
Expand All @@ -39,11 +43,11 @@ def test_get_ingredient_class():
assert recipe_tagger.get_ingredient_class('cattle') == 'meat'
assert recipe_tagger.get_ingredient_class('milk') == 'dairy'

@pytest.mark.skip()
#@pytest.mark.skip()
def test_get_recipe_class_percentage():
assert recipe_tagger.get_recipe_class_percentage(['chicken', 'sausage', 'apple']) == [('meat', '66.67%'), ('fruit', '33.33%')]

@pytest.mark.skip()
#@pytest.mark.skip()
def test_get_recipe_tags():
assert recipe_tagger.get_recipe_tags(['aubergine']) == ['vegetable']
assert 'fruit' in recipe_tagger.get_recipe_tags(['pear', 'apple', 'aubergine'])
Expand Down

0 comments on commit 64372a0

Please sign in to comment.