From 6cc2695091f2b114ebf6ac9a89ff889a4f94aeaf Mon Sep 17 00:00:00 2001 From: Manimaran Date: Sun, 30 Jun 2024 21:54:12 -0400 Subject: [PATCH] Perform Semantic Search using Chroma vector DB --- vector-chromadb/.gitignore | 163 +++++++++++++++++++++++++ vector-chromadb/main.py | 66 ++++++++++ vector-chromadb/menu-items.csv | 202 +++++++++++++++++++++++++++++++ vector-chromadb/readme.md | 5 + vector-chromadb/requirements.txt | 3 + 5 files changed, 439 insertions(+) create mode 100644 vector-chromadb/.gitignore create mode 100644 vector-chromadb/main.py create mode 100644 vector-chromadb/menu-items.csv create mode 100644 vector-chromadb/readme.md create mode 100644 vector-chromadb/requirements.txt diff --git a/vector-chromadb/.gitignore b/vector-chromadb/.gitignore new file mode 100644 index 0000000..88173fe --- /dev/null +++ b/vector-chromadb/.gitignore @@ -0,0 +1,163 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/latest/usage/project/#working-with-version-control +.pdm.toml +.pdm-python +.pdm-build/ + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +venv-app/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ diff --git a/vector-chromadb/main.py b/vector-chromadb/main.py new file mode 100644 index 0000000..f5e03bd --- /dev/null +++ b/vector-chromadb/main.py @@ -0,0 +1,66 @@ +import chromadb +from chromadb.utils import embedding_functions +import json + +import csv +import os + + +# Store the csv files line to a array +documents = [] +# Store the metadata to a array +metadatas = [] +# Store the unique id to a array +ids = [] + +#Load csv file +if os.path.exists('menu-items.csv'): + print('File exists') + # Open the CSV file for reading + with open('menu-items.csv', 'r') as file: + # Create a CSV reader object + reader = csv.reader(file) + id=1 + # # Iterate over each row in the CSV + # Iterate over each row in the csv using reader object + for i, line in enumerate(reader): + # Skip the header row + if i == 0: + continue + # Add the document, metadata and id to the respective arrays + documents.append(line[1]) + metadatas.append({'item_id': line[0]}) + ids.append(str(id)) + id+=1 + + +# print(documents) +# create the Chroma Client +# client = chromadb.Client() +client = chromadb.PersistentClient(path='my_vectordb') +sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction("all-mpnet-base-v2") +collection = client.get_or_create_collection("menu-items", embedding_function=sentence_transformer_ef) + +# add the documents to the collection +collection.add(documents=documents, metadatas=metadatas, ids=ids) + +# Query mispelled words +results = collection.query( + query_texts=["sphagetti"], + n_results=5, + include=["documents", "distances","metadatas"] +) +print(results['documents']) + +results = collection.query( + query_texts=["chicken"], + n_results=5, + include=["documents", "distances","metadatas"] +) +print(results['documents']) +results = collection.query( + query_texts=["shrimp"], + n_results=5, + include=["documents", "distances","metadatas"] +) +print(f"Search Result for shrimp:\n{results['documents']}") \ No newline at end of file diff --git a/vector-chromadb/menu-items.csv b/vector-chromadb/menu-items.csv new file mode 100644 index 0000000..daf4236 --- /dev/null +++ b/vector-chromadb/menu-items.csv @@ -0,0 +1,202 @@ +"item_id","item_name" +"A1","Vegan Chicken Salad" +"A2","Spring Rolls (4 pieces)" +"A3","Pot Stickers (6 pieces)" +"A4","Fried Wonton (10 pieces)" +"A5","Fried Tofu with Soy Sauce" +"A6","Salt and Pepper Tofu" +"A7","Vegetable Tempura" +"A11","Salt and Pepper Fresh Mushrooms" +"A12","Salt and Pepper Vegan Chicken" +"A13","Moo Shu Vegetable (6 pieces)" +"A15","Salt and Pepper Pumpkin" +"A16","Salt and Pepper Green Beans" +"A17","Chinese Doughnut" +"A18","French Fries" +"B3","San Pellegrino" +"B6","Thai Ice Almond Milk Tea" +"B7","Lemon Ice Tea" +"B8","Snapple" +"B14","Soda" +"B15","Bottled Water" +"B16","Strawberry Popping Boba Tea" +"B17","Mango Popping Boba Tea" +"B18","Lemon Popping Boba Tea" +"B19","Passionfruit Popping Boba Tea" +"B21","Wintermelon Tea with Popping Boba" +"B22","Grapefruit Tea with Popping Boba" +"B24","Mango Smoothie" +"B25","Strawberry Smoothie" +"B26","Strawberry Jelly Tea" +"B27","Mango Jelly Tea" +"B28","Passionfruit Jelly Tea" +"B29","Grapefruit Jelly Tea" +"B30","Fresh Orange Juice" +"B31","Veggie Lee Secret Drink" +"B32","Avocado Smoothie" +"B33","Banana Smoothie" +"B34","Taro Tapioca Pearl Coconut Milk" +"B35","Pearl Milk Tea" +"B36","Brown Sugar Pearl Cream Tea" +"B37","Passionfruit Pearl Green Tea" +"B38","Mango Pearl Green Tea" +"B39","Strawberry Pearl Green Tea" +"E1","Ginger with Pea Leaves" +"E2","Pea Leaves with Bamboo Fungus" +"E3","Pea Leaves with Ginko Nuts and Dry Bean Curd" +"E5","Pea Leaves with Black Mushrooms" +"E6","Pea Leaves with Abalone Mushrooms" +"E11","Bok Choy with Ginger" +"E12","Chinese Broccoli with Ginger" +"E13","Sauteed Basil and Chinese Broccoli" +"E14","Broccoli with 3 Types of Mushrooms" +"E15","Broccoli with Szechuan Sauce" +"E16","Broccoli with Ginger" +"E17","Sauteed Basil Green Beans" +"E18","Dry Sauteed Green Beans" +"E19","Green Beans with Ginger" +"E20","Eggplant in Szechuan Sauce" +"E21","Satay Eggplant" +"E22","Sauteed Fried Tofu with Eggplant" +"E23","Braised Fried Tofu with Greens" +"E24","Mushrooms and Tofu in Satay Sauce" +"E25","Sauteed Assorted Vegetables" +"E27","Soft Tofu with Minced Vegan Pork and Chili" +"E28","Sauteed Lotus Root" +"E29","Mixed Vegetables with Fungus" +"E30","Braised Mixed Vegetables" +"E31","Fried Tofu with Golden Mushrooms and Goji Berry" +"E33","Kung Pao Tofu" +"E34","Sweet and Sour Tofu" +"E35","Sizzling Fried Tofu with Black Bean Sauce" +"E36","Sizzling Vegan Chicken with Black Pepper" +"E37","Sizzling Vegan Beef and Mushroom" +"E38","Sizzling Eggplant and Vegan Fish Steak" +"E39","Sizzling 3 Flavors Vegan Chicken" +"E40","Sizzling Vegan Fish with Black Pepper Sauce" +"E41","Thai Style Vegan Fish Fillet" +"E42","Vegan Assorted Meat with Tofu Hot Pot" +"E43","Mixed Fresh Mushrooms Hot Pot" +"E45","Healthy Herbal Rice Hot Pot" +"E46","Thai Style Rice Hot Pot" +"E48","Pumpkin and Tofu with Curry Sauce" +"E49","Eggplant Wheat Gluten with Pumpkin" +"E50","Melon and Vermicelli" +"E53","Vegan Prawns with Mixed Nuts" +"E54","Vegan Prawns and Mushrooms with Black Pepper Sauce" +"E56","Vegan Prawns and Mushrooms and Vegetables" +"E57","Vegan Kung Pao Prawns" +"E58","House Saute with Marinated Bean Curd" +"E59","Snow Pea, Broccoli and Black Mushrooms" +"E63","Vegan Sesame Chicken" +"E64","General Tso's Vegan Chicken" +"E65","Vegan Orange Chicken" +"E66","Vegan Chicken with Mushrooms" +"E67","Vegan Kung Pao Chicken" +"E68","Vegan Chicken with Broccoli" +"E69","Vegan Chicken with Curry Sauce" +"E70","Vegan Chicken with Sichuan-Style" +"E71","Vegan Chicken with Sweet and Sour Sauce" +"E72","Kung Pao Vegan Beef, Chicken and Prawns" +"E74","Vegan Beef with Bitter Melon" +"E75","Mongolian Vegan Beef" +"E76","Vegan Beef with Satay Sauce" +"E77","Vegan Beef with Green Beans" +"E78","Vegan Beef with Black Pepper Sauce" +"E79","Vegan Beef with Chinese Broccoli" +"E80","Vegan Beef with Assorted Vegetables" +"E81","Vegan Beef with Broccoli" +"E83","Vegan Pork with Sichuan Sauce" +"E84","Mixed Mushrooms and Mixed Nuts" +"E85","Salt and Pepper Vegan Prawns" +"L1","Assorted Vegetables" +"L2","Snow Bean Vege Chicken and Tofu" +"L3","Eggplant in Sichuan Style" +"L4","Broccoli in Sichuan Style" +"L5","Vegan Pork in Sichuan Style" +"L6","Vegan Pork and Sweet and Sour Sauce" +"L7","Vegan Beef with Broccoli" +"L8","Vegan Beef with Mongolian-Style" +"L9","Vegan Beef with Sichuan-Style" +"L10","Vegan Beef with Black Pepper Sauce" +"L11","Vegan Pork and Cabbage" +"L12","Vegan Sesame Chicken" +"L13","General Tso's Vegan Chicken" +"L14","Orange Vegan Chicken" +"L15","Fresh Mushrooms with Vegan Chicken" +"L16","Salt and Pepper Vegan Chicken" +"L17","Kung Pao Vegan Chicken" +"L18","Vegan Chicken with Cashew Nuts" +"L19","Vegan Chicken with Black Bean Sauce" +"L20","Vegan Chicken with Curry Sauce" +"L21","Vegan Pork with Green Beans" +"L22","Dry Sauteed Green Beans" +"L23","Soft Tofu with Vegan Minced Pork and Chili" +"L24","Braised Fried Tofu with Greens" +"L25","Kung Pao Sauce with Mixed Vegetables" +"L26","Kung Pao Vegan Prawns" +"L27","Vegan Prawns with Mixed Vegetables" +"L28","Bok Choy with Ginger" +"L29","Deluxe Mixed Vegetables" +"L30","Black and White Mushrooms with Tofu" +"R1","Veggie Lee Style Fried Rice" +"R2","Yin-Yang Fried Rice" +"R3","Fu-Kin Fried Rice" +"R4","Chef Lee Fried Rice" +"R5","Mixed Vegetable Fried Rice" +"R6","Vegan Chicken and Pineapple Fried Rice" +"R8","Vegan Beef Fried Rice" +"R9","Chao-Style Fried Rice" +"R10","Crispy Dry Soy Seaweed Fried Rice" +"R11","Mixed Vegetable and Tofu Pan Fried Noodle" +"R12","Pan Fried Noodle with Tofu and Curry Sauce" +"R13","Mixed Vegetable Pan Fried Noodle" +"R14","Vegan Pork and Cabbage Pan Fried Noodle" +"R15","Vegan Pork Chow Mein" +"R16","Vegetable Chow Mein" +"R17","Soy Sauce Chow Mein" +"R18","Vegan Pork and Rice Noodle in Preserved Vegetable Soup" +"R19","Wonton Soup with Noodle" +"R20","Vegetable Noodle Soup" +"R21","Vegan Beef Chow Fun" +"R22","Chinese Broccoli Basil Chow Fun" +"R23","Vegan Beef with Black Pepper Sauce Over Chow Fun" +"R24","Vegan Beef with Black Bean Sauce Over Chow Fun" +"R25","Malaysian-Style Chow Fun" +"R26","Singaporean-Style Rice Noodle" +"R28","Rice Noodle with Vegan Pork and Preserved Vegetable" +"R29","Chao-Style Rice Noodle" +"R30","Braised Noodle" +"R32","Mapo Tofu Over Noodle" +"R34","Plain Steamed Rice" +"R35","Brown Steamed Rice" +"S1","Vegan Shark's Fin Soup" +"S3","Vegan Shark's Fin with Bamboo Fungus Soup" +"S4","Snow Mushroom Soup" +"S6","Corn Tofu Soup" +"S7","Seaweed and Tofu Soup" +"S8","Hot and Sour Soup" +"S9","Wonton Soup" +"S12","Sizzling Rice and Vegetable Soup" +"SP1","Fresh Bean Curd Skin with Vegetable" +"C1","Vegan BBQ Bun (cold item - steam at home)" +"SP2","Mixed Vegetables and Avocado with Black Pepper" +"C2","Steamed Dumplings (cold item - steam at home)" +"C3","Pan Fried Turnip Cake" +"SP3","Double Mushroom with Vegetable" +"SP5","Lemon Grass Bean Curd Roll with Lemon" +"C5","Sesame Ball" +"SP7","Pan Fried Bean Curd Roll" +"SP8","Avocado with Tofu" +"SP11","Tofu with Toons" +"SP13","Toons Pan Cake" +"SP16","House Assorted Platter" +"C18","Happy Ball" +"A19","Homemade Hot Sauce (Chili Oil) - 1 Jar" +"SP19","Fresh Mushroom with Vegan Sea Cucumber" +"SP20","Pinenuts with Toons Fried Rice" +"SP27","Fried Turnip Ball" +"SP28","Fried Curd" +"SP30","Stuffed Eggplant" +"SP31","Black Pepper Vegan Chicken Roll" +"SP32","Crispy Vegan Chicken with Mixed Nuts" \ No newline at end of file diff --git a/vector-chromadb/readme.md b/vector-chromadb/readme.md new file mode 100644 index 0000000..56e4da2 --- /dev/null +++ b/vector-chromadb/readme.md @@ -0,0 +1,5 @@ +# Using Chroma DB for Semantic Search + +- Collection - Collections are where you will store the embeddings, documents and any additional metadata + + diff --git a/vector-chromadb/requirements.txt b/vector-chromadb/requirements.txt new file mode 100644 index 0000000..100f6a6 --- /dev/null +++ b/vector-chromadb/requirements.txt @@ -0,0 +1,3 @@ +chromadb +sentence-transformers +streamlit