From 64372a086c215ee0c336a6b463255c0b12ca914b Mon Sep 17 00:00:00 2001 From: TurconiAndrea Date: Sat, 24 Jul 2021 18:07:06 +0200 Subject: [PATCH] Improved ingredient search using NLTK wordnet, updated version from 0.3.3 to 0.3.4 --- README.md | 5 +++-- recipe_tagger/foodcategory.py | 1 + recipe_tagger/recipe_tagger.py | 31 +++++++++++++++++++++++++++---- setup.py | 6 +++--- tests/test_recipe_tagger.py | 8 ++++++-- 5 files changed, 40 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 2c1905c..b832be5 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ This package provides a classification and tagging system for ingredients and recipes. The functioning of the package is based on a dataset containing more than 700 ingredients mapped with their own class. -If a provided ingredient is not mapped into the dataset, the library search for it on wikipedia pages and into the dictionary. +If a provided ingredient is not mapped into the dataset, the library search for it on wikipedia pages, into the dictionary and into NLTK Wordnet to find the best possible class. An ingredient could be classified in one of the following class: - Vegetable @@ -15,6 +15,7 @@ An ingredient could be classified in one of the following class: - Condiment - Nut - Seafood +- Dessert A recipe is tagged based on its ingredients class. The library also provides a function to get the class percentage of recipe ingredients. @@ -54,5 +55,5 @@ recipe_tagger.get_recipe_class_percentage(['aubergine', 'chicken', 'beef']) ### Todo - [x] Handling of Wikipedia pages. -- [ ] Better search over dictionary and Wikipedia pages of ingredient. +- [x] Better search over dictionary and Wikipedia pages of ingredient. - [ ] Possibility to add ingredient after search if it is not present. diff --git a/recipe_tagger/foodcategory.py b/recipe_tagger/foodcategory.py index 83d6ce2..f2bc4c3 100644 --- a/recipe_tagger/foodcategory.py +++ b/recipe_tagger/foodcategory.py @@ -12,6 +12,7 @@ class FoodCategory(Enum): condiment = 7 nut = 8 seafood = 9 + dessert = 10 class CategorySynset(): categories = [wordnet.synset(f'{FoodCategory.vegetable.name}.n.01'), diff --git a/recipe_tagger/recipe_tagger.py b/recipe_tagger/recipe_tagger.py index eeea02f..b4c29e5 100644 --- a/recipe_tagger/recipe_tagger.py +++ b/recipe_tagger/recipe_tagger.py @@ -15,8 +15,13 @@ embedding_path = 'data/ingredient_embedding.npy' def __get_embedding(): - embedding = io.BytesIO(pkgutil.get_data(__name__, embedding_path)) - return np.load(embedding, allow_pickle=True).item() + """ + Get the dataset of ingredients as a dictionary. + + :return: a dictionary representing the embedding + """ + embedding_io = io.BytesIO(pkgutil.get_data(__name__, embedding_path)) + return np.load(embedding_io, allow_pickle=True).item() def lemmatize_word(word): """ @@ -36,6 +41,7 @@ def is_ingredient_vegan(ingredient): :param ingredient: the name of the ingredient. :return: a bool indicating whether the ingredient is vegan or not. """ + ingredient = ingredient.strip() shelf = Shelf('Milan', month_id=0) results = shelf.process_ingredients([ingredient]) return results['labels']['vegan'] @@ -53,12 +59,28 @@ def is_recipe_vegan(ingredients): return results['labels']['vegan'] def add_ingredient(ingredient, tag): + """ + Map the provided ingredient and the tag into the embedding dataset. + Tag must be one the following FoodCategory: + vegetable, fruit, meat, legume, diary, egg, staple, condiment, nut, seafood, dessert + + :param ingredient: the name of the ingredient. + :param tag: the class of the ingredient. Must be one of the listed above. + :return: a bool indicating if the operation has succeded or not. + """ embedding = __get_embedding() + ingredient = ingredient.strip() + tag = tag.strip() + if ingredient in embedding: + return False + + embedding[ingredient] = FoodCategory[tag].value + return True def search_ingredient_hypernyms(ingredient): """ Predict the class of the provided ingredient based on the Wu & Palmer’s similarity between - ingredient, his hypernyms and the 10 FoodCategory. + ingredient, his hypernyms and the 11 FoodCategory. The FoodCategory is choosen based on the maximum similarity value between the ingredient, its hypernym and the various categories. If the predicted category is different between ingredient and hypernym the category is choosen based on the avarege of both. @@ -116,6 +138,7 @@ def get_ingredient_class(ingredient): :return: the class of the ingredient. """ embedding = __get_embedding() + ingredient = ingredient.strip() lemmatized_ing = lemmatize_word(ingredient) if lemmatized_ing in embedding: return FoodCategory(embedding[lemmatized_ing]).name @@ -139,7 +162,7 @@ def get_recipe_class_percentage(ingredients): def get_recipe_tags(ingredients): """ Classify a recipe in tags based on its ingredient. - Tag could be: Vegetable, Fruit, Meat, Legume, Diary, Egg. + Tag could be: Vegetable, Fruit, Meat, Legume, Diary, Egg, Staple, Condiment, Nut, Seafood :param ingredients: list of ingredients in the recipe. :return: set of tags for the recipe. diff --git a/setup.py b/setup.py index 2dec856..23fe641 100644 --- a/setup.py +++ b/setup.py @@ -10,16 +10,16 @@ setup( name='recipe-tagger', packages=find_packages(include=['recipe_tagger']), - version='0.3.3', + version='0.3.4', description='A library for tagging and classify recipes', author='Andrea Turconi', license='MIT', long_description=README, long_description_content_type="text/markdown", url='https://github.com/TurconiAndrea/recipe-tagger', - download_url='https://github.com/TurconiAndrea/recipe-tagger/archive/refs/tags/0.3.3.tar.gz', + download_url='https://github.com/TurconiAndrea/recipe-tagger/archive/refs/tags/0.3.4.tar.gz', keywords=['food', 'recipe', 'tag', 'tagging', 'ingredient'], - install_requires=['wikipedia-api', 'PyDictionary', 'textblob', 'pyfood', 'unidecode', 'numpy'], + install_requires=['wikipedia-api', 'PyDictionary', 'textblob', 'pyfood', 'unidecode', 'numpy', 'nltk'], test_suite='tests', package_data={'': ['data/*.npy']}, ) \ No newline at end of file diff --git a/tests/test_recipe_tagger.py b/tests/test_recipe_tagger.py index 3f648ff..c941b02 100644 --- a/tests/test_recipe_tagger.py +++ b/tests/test_recipe_tagger.py @@ -17,6 +17,10 @@ def test_is_recipe_vegan(): assert recipe_tagger.is_recipe_vegan(['apple', 'chicken']) == False assert recipe_tagger.is_recipe_vegan(['apple', 'pear']) == True +#@pytest.mark.skip() +def test_add_ingredient(): + assert recipe_tagger.add_ingredient('milk', 'dairy') == True + #@pytest.mark.skip() def test_search_ingredient_hypernyms(): assert recipe_tagger.search_ingredient_hypernyms('pear') == 'fruit' @@ -39,11 +43,11 @@ def test_get_ingredient_class(): assert recipe_tagger.get_ingredient_class('cattle') == 'meat' assert recipe_tagger.get_ingredient_class('milk') == 'dairy' -@pytest.mark.skip() +#@pytest.mark.skip() def test_get_recipe_class_percentage(): assert recipe_tagger.get_recipe_class_percentage(['chicken', 'sausage', 'apple']) == [('meat', '66.67%'), ('fruit', '33.33%')] -@pytest.mark.skip() +#@pytest.mark.skip() def test_get_recipe_tags(): assert recipe_tagger.get_recipe_tags(['aubergine']) == ['vegetable'] assert 'fruit' in recipe_tagger.get_recipe_tags(['pear', 'apple', 'aubergine'])