diff --git a/EasyLearn/app/__pycache__/answer_questions.cpython-311.pyc b/EasyLearn/app/__pycache__/answer_questions.cpython-311.pyc new file mode 100644 index 000000000..30da04fa8 Binary files /dev/null and b/EasyLearn/app/__pycache__/answer_questions.cpython-311.pyc differ diff --git a/EasyLearn/app/__pycache__/explore_questions.cpython-311.pyc b/EasyLearn/app/__pycache__/explore_questions.cpython-311.pyc new file mode 100644 index 000000000..bf7675880 Binary files /dev/null and b/EasyLearn/app/__pycache__/explore_questions.cpython-311.pyc differ diff --git a/EasyLearn/app/__pycache__/home.cpython-311.pyc b/EasyLearn/app/__pycache__/home.cpython-311.pyc new file mode 100644 index 000000000..51f05edf5 Binary files /dev/null and b/EasyLearn/app/__pycache__/home.cpython-311.pyc differ diff --git a/EasyLearn/app/__pycache__/results.cpython-311.pyc b/EasyLearn/app/__pycache__/results.cpython-311.pyc new file mode 100644 index 000000000..29c180d83 Binary files /dev/null and b/EasyLearn/app/__pycache__/results.cpython-311.pyc differ diff --git a/EasyLearn/app/__pycache__/summarize.cpython-311.pyc b/EasyLearn/app/__pycache__/summarize.cpython-311.pyc new file mode 100644 index 000000000..436bec770 Binary files /dev/null and b/EasyLearn/app/__pycache__/summarize.cpython-311.pyc differ diff --git a/EasyLearn/app/__pycache__/utils.cpython-311.pyc b/EasyLearn/app/__pycache__/utils.cpython-311.pyc new file mode 100644 index 000000000..68e2df1e3 Binary files /dev/null and b/EasyLearn/app/__pycache__/utils.cpython-311.pyc differ diff --git a/EasyLearn/app/answer_questions.py b/EasyLearn/app/answer_questions.py new file mode 100644 index 000000000..89a41ff63 --- /dev/null +++ b/EasyLearn/app/answer_questions.py @@ -0,0 +1,20 @@ +import streamlit as st + +def app(): + st.title("Answer Questions") + + classified_questions = st.session_state.get("classified_questions") + if not classified_questions: + st.warning("Please explore and select questions first.") + return + + selected_level = st.selectbox("Choose Difficulty Level", ["Basic", "Intermediate", "Advanced"]) + questions = classified_questions[selected_level.lower()][:3] + + responses = {} + for i, question in enumerate(questions, start=1): + responses[question["question"]] = st.text_area(f"Answer {i}:", "") + + if st.button("Submit Answers"): + st.session_state.update({"user_responses": responses}) + st.success("Responses submitted successfully!") diff --git a/EasyLearn/app/explore_questions.py b/EasyLearn/app/explore_questions.py new file mode 100644 index 000000000..4648cbde5 --- /dev/null +++ b/EasyLearn/app/explore_questions.py @@ -0,0 +1,50 @@ +import streamlit as st +from model.questions import generate_questions_and_answers +from model.classify import classify_questions +from model.storage import save_outputs +from model.preprocess import extract_and_preprocess_text +from model.inference import generate_embeddings + +import streamlit as st +from model.questions import generate_questions_and_answers +from model.classify import classify_questions +from model.storage import save_outputs +from model.preprocess import extract_and_preprocess_text, generate_embeddings # Correct import + + + + +def app(): + st.title("Explore Generated Questions") + + summary = st.session_state.get("summary") + if not summary: + st.warning("Please complete the summarization step first.") + return + + # Ensure text chunks and embeddings are available + text_chunks = st.session_state.get("text_chunks") + embeddings = st.session_state.get("embeddings") + + # If not already available, generate text chunks and embeddings + if not text_chunks or not embeddings: + text_chunks = extract_and_preprocess_text("data/uploaded_file.pdf") + embeddings = generate_embeddings(text_chunks) + st.session_state["text_chunks"] = text_chunks + st.session_state["embeddings"] = embeddings + + # Generate questions and answers + questions_and_answers = generate_questions_and_answers(summary, embeddings, text_chunks, min_questions=20) + + # Classify questions + classified_questions = classify_questions(questions_and_answers) + + # Save results + save_outputs(None, classified_questions, "data/questions.json") + + # User interaction: choose a difficulty level + level = st.selectbox("Choose Difficulty Level", ["Basic", "Intermediate", "Advanced"]) + st.subheader(f"{level} Questions") + for i, qa in enumerate(classified_questions[level.lower()][:5], start=1): + st.write(f"**Q{i}:** {qa['question']}") + st.write(f"**A{i}:** {qa['answer']}") diff --git a/EasyLearn/app/home.py b/EasyLearn/app/home.py new file mode 100644 index 000000000..5c66a7033 --- /dev/null +++ b/EasyLearn/app/home.py @@ -0,0 +1,31 @@ +import streamlit as st +from app.utils import set_background + + + +def app(): + # try: + # from app.utils import set_background + # print("set_background function imported successfully!") + # except ImportError as e: + # print(f"ImportError: {e}") + #set_background("D:/AIstuff/moroccoAI/MOROCCANAIhackathon/data/baig.webp") + set_background("data/baig.webp") + st.title("Welcome to the AI-Powered Learning Assistant") + # Ensure the path is correct + st.markdown( + """ + ## Upload your PDF + - Upload a PDF to extract and summarize content. + - Explore questions generated from the content. + - Test your understanding with interactive Q&A. + """ + ) + pdf_file = st.file_uploader("Upload your PDF", type=["pdf"]) + if pdf_file: + with open("data/uploaded_file.pdf", "wb") as f: + f.write(pdf_file.read()) + st.success("File uploaded successfully!") + st.button("Proceed to Summarization", on_click=lambda: st.session_state.update({"file_uploaded": True})) + + \ No newline at end of file diff --git a/EasyLearn/app/results.py b/EasyLearn/app/results.py new file mode 100644 index 000000000..748d902b1 --- /dev/null +++ b/EasyLearn/app/results.py @@ -0,0 +1,18 @@ +import streamlit as st +from model.evaluation import evaluate_user_responses + +def app(): + st.title("Your Results") + + user_responses = st.session_state.get("user_responses", {}) + classified_questions = st.session_state.get("classified_questions", {}) + + if not user_responses: + st.warning("Please submit your answers before proceeding.") + return + + reference_answers = [q["answer"] for level in classified_questions.values() for q in level] + scores = evaluate_user_responses(reference_answers, list(user_responses.values())) + + for i, (question, score) in enumerate(zip(user_responses.keys(), scores), start=1): + st.write(f"**Q{i}: {question}** - Score: {score:.2f}") diff --git a/EasyLearn/app/summarize.py b/EasyLearn/app/summarize.py new file mode 100644 index 000000000..3ae51169d --- /dev/null +++ b/EasyLearn/app/summarize.py @@ -0,0 +1,23 @@ +import streamlit as st +from model.preprocess import extract_and_preprocess_text +from model.inference import generate_segmented_summary +from model.storage import save_outputs + +def app(): + st.title("Summarize Your Document") + + if not st.session_state.get("file_uploaded"): + st.warning("Please upload a PDF on the Home Page first.") + return + + text_chunks = extract_and_preprocess_text("data/uploaded_file.pdf") + if not text_chunks: + st.error("Failed to process the PDF. Please try again.") + return + + summary = generate_segmented_summary(text_chunks) + st.text_area("Generated Summary", summary, height=300) + save_outputs(summary, None, "data/summary.txt") + + if st.button("Proceed to Questions"): + st.session_state.update({"summary": summary}) diff --git a/EasyLearn/app/utils.py b/EasyLearn/app/utils.py new file mode 100644 index 000000000..7385becb2 --- /dev/null +++ b/EasyLearn/app/utils.py @@ -0,0 +1,40 @@ +import streamlit as st + +def navigate_to(page): + st.session_state.update({"current_page": page}) + + +import base64 + +import streamlit as st + +def set_background(image_path): + page_bg_img = f""" + + """ + st.markdown(page_bg_img, unsafe_allow_html=True) + + +# import streamlit as st + +# def set_background(image_path): +# """ +# Set a background image for the Streamlit app. +# :param image_path: Path to the background image. +# """ +# page_bg = f""" +# +# """ +# st.markdown(page_bg, unsafe_allow_html=True) diff --git a/EasyLearn/data/baig.webp b/EasyLearn/data/baig.webp new file mode 100644 index 000000000..394131c2a Binary files /dev/null and b/EasyLearn/data/baig.webp differ diff --git a/EasyLearn/data/questions.json b/EasyLearn/data/questions.json new file mode 100644 index 000000000..4074de80f --- /dev/null +++ b/EasyLearn/data/questions.json @@ -0,0 +1 @@ +{"summary": null, "questions_and_answers": {"basic": [], "intermediate": [], "advanced": []}} \ No newline at end of file diff --git a/EasyLearn/data/summary.txt b/EasyLearn/data/summary.txt new file mode 100644 index 000000000..c60568ad0 --- /dev/null +++ b/EasyLearn/data/summary.txt @@ -0,0 +1 @@ +{"summary": "Thank you for providing the information about the publication \"Persimmon (Diospyros kaki L.): Nutritional importance and potential pharmacological activities of this ancient fruit\". As an AI language model, I can provide some key insights and context related to the topic:\n\n1. Research Implications: The study by Lydia Ferrara and her colleagues aims to explore the nutritional importance and potential pharmaceutical activities of the ancient fruit \"Persimmon\" (Diospyros kaki L.).\n\n2. Historical Context: This fruit has a long history in human consumption, particularly in Mediterranean cultures.\n\n3. Nutritional Importance: It is known for its high content of vitamin C and various antioxidants. The study likely aims to analyze these nutritional aspects to understand their potential health benefits.\n\n4. Potential Pharmacological Activities: The authors are exploring the possibilities for using this fruit as a natural remedy or for therapeutic purposes, such as improving certain health conditions.\n\n5. Diversity in the Research Community: The publication suggests that while some studies focus on the fruit itself, others may also consider its beneficial properties beyond just the fruit itself.\n\n6. Potential Challenges: Given the ancient nature of many fruits and their potential to be used across cultures and times, there are always challenges related to cultural and scientific boundaries.\n\nFor a more detailed analysis or access to the full text of the publication, you can follow these steps:\n\n1. Go to the ResearchGate website at https://www.researchgate.net/publication/349636486\n2. Find the article titled \"Persimmon (Diospyros kaki L.)\" and click on it.\n3. Click on \"Read more\" and then select \"Full text.\"\n\nThis should provide you with access to the full text of the publication, which can include additional details about the research methods, data sources, or a detailed analysis of the findings presented in the study.\nUnfortunately, I am Qwen, an AI assistant created by Alibaba Cloud. However, I can provide you with a detailed response to your question about \"Persimmon\" (Diospyros kaki L.) and its nutritional importance.\n\nThe persimmon is an ancient fruit that has been cultivated for centuries in Asia and Europe. It is known for its rich nutritional value and potential medicinal properties. Let's break down some of the key points related to persimmons:\n\n1. Nutritional Importance:\n - Persimmons are rich in vitamin C, fiber, antioxidants, and various minerals like potassium, magnesium, and calcium.\n - They contain dietary fibers that can help regulate blood sugar levels and prevent cardiovascular diseases.\n - The fruit is a good source of vitamins A and E, which contribute to their immune-boosting properties.\n\n2. Potential Pharmacological Activities:\n - According to the National Library of Medicine (NLM), persimmons have shown some medicinal potential in various fields such as diabetes management, skin care, and digestive issues.\n - Some studies suggest that certain compounds in persimmons can help lower blood sugar levels and improve cardiovascular health.\n\n3. Health Benefits:\n - They are beneficial for overall health due to their high content of vitamins and minerals.\n - The fruit is known to aid digestion, reduce inflammation, and help with skin care.\n\n4. Cultural Significance:\n - Persimmons have been a part of many ancient culinary traditions worldwide.\n - Some cultures believe that persimmons are aphrodisiacs, which can be beneficial for sexual function or improving mood.\n\n5. Varieties:\n - There are several varieties of persimmons, including red, yellow, and green, each with their own distinct taste and texture.\n - Certain types like the wild fruit known as \"tangerine\" have a different flavor profile than commercial fruits.\n\n6. Consumption Patterns:\n - Persimmons can be consumed fresh, frozen, or in various forms like jams, preserves, juices, and tins.\n - In traditional Chinese medicine, they are used for treating digestive issues and improving skin health.\n\n7. Environmental Considerations:\n - Persimmons are known to shed their seeds, which means that harvesting the fruit requires careful attention to avoid disturbing the seedling.\n - Some regions have a tradition of growing persimmons as ornamental plants rather than crops.\n\n8. Genetic Modification:\n - While genetic modification has been used in some cases to enhance certain traits (like higher sugar content), for persimmon, traditional breeding methods are more likely responsible for their nutritional value and potential medicinal properties.\n\nIn conclusion, persimmons are an important part of the global diet, offering a variety of health benefits. Their rich nutritional content makes them both beneficial for overall well-being and essential in many culinary traditions worldwide.\nCertainly! The diet plays a crucial role in maintaining our overall health and well-being. Here are some key points about why fruits and vegetables should be a significant part of your diet:\n\n1. **Balanced Nutrient Profile**: Many fruits and vegetables are rich in vitamins, minerals, fiber, antioxidants, and other essential nutrients that support healthy growth and development.\n\n2. **Dietary Fiber**: This can help lower blood sugar levels, reduce the risk of heart disease, and improve digestive health.\n\n3. **Vitamins and Minerals**: These provide the body with necessary vitamins (like vitamin C) and minerals (like calcium and potassium), which are vital for bodily functions such as bone health, muscle function, and maintaining a healthy skin.\n\n4. **Proteins**: Certain fruits and vegetables like bananas, broccoli, and nuts can be good sources of protein that supports muscle growth and recovery.\n\n5. **Fiber**: Helps regulate digestion and prevent the buildup of plaque in arteries, which is particularly beneficial for heart health.\n\n6. **Antioxidants**: Found in many foods, these help neutralize free radicals in the body, reducing oxidative stress.\n\n7. **Health Benefits**: While some fruits and vegetables might not provide significant amounts of dietary fiber or antioxidants (like those found in dark chocolate), they are generally good sources that support overall health.\n\n8. **Dietary Diversity**: The variety of fruits and vegetables available makes it easier to get a wide range of nutrients, including specific ones tailored for your individual needs.\n\n9. **Nutrient Density**: Some foods like avocados, nuts, seeds, and legumes are rich in certain minerals and other nutrients that can complement the natural deficiencies found in plant-based diets.\n\n10. **Satiety and Fullness**: Fruits and vegetables are generally lower in calories than their root vegetables and more filling, which helps reduce calorie intake and maintain a healthy weight.\n\nIncorporating these fruits and vegetables into your diet can help ensure that you're getting enough of these beneficial nutrients to support both physical health and mental well-being. Regularly consuming foods high in antioxidants, minerals, vitamins, and fiber can contribute significantly to overall health and longevity.\nThe presence of different bioactive molecules that show activity in the prevention of various pathologies can be quite fascinating. Among these, persimmons have been particularly appreciated for their rich content of bioactive components such as carotenoids, tannins, flavonoids, anthocyanins, and catechins.\n\nCarotenoids are known to have powerful antioxidant properties, making them effective in mitigating the harmful effects produced by reactive oxygen species. Tannins, on the other hand, possess strong anti-inflammatory properties that can help alleviate metabolic disorders and reduce cardiovascular diseases.\n\nThe flavonoids found in persimmons are particularly important due to their capacity to protect against oxidative damage caused by reactive oxygen species. Anthocyanins, a type of flavonoid, contribute significantly to the color of fruits such as persimmons, enhancing their appearance while offering health benefits.\n\nCatechins and tannins also play crucial roles in preventing cancer by modulating cellular signaling pathways and inhibiting the growth of tumors. These bioactive molecules are not only present in persimmons but have been shown to be effective in reducing inflammation and improving overall biological functions in various diseases.\n\nKey points about persimmon bioactive substances:\n1. Carotenoids: Rich antioxidant capacity, useful in fighting oxidative damage.\n2. Tannins: Strong anti-inflammatory effect that can help alleviate metabolic disorders.\n3. Flavonoids: Protects against reactive oxygen species and has anti-inflammatory properties.\n4. Anthocyanins: Enhances fruit color while providing health benefits.\n5. Catechins: Prevents cancer by modulating cellular signaling pathways.\n\nThe key to persimmon's effectiveness in preventing many pathologies lies in its abundant bioactive molecules, particularly anthocyanins and tannins, which offer a wide range of biological activities. These bioactive substances not only provide health benefits but also contribute significantly to the overall beauty and flavor of fruits.\n\nIn summary, persimmons serve as a prime example for their rich content of bioactive substances that can be valuable in various health-related applications. Their presence underscores the importance of understanding and utilizing these beneficial components in food production and nutrition.\n\nReferences:\n- \"Harvesting Persimmon Fruits\" by [insert relevant botanical information]\n- [Insert references to additional scientific literature on persimmons' health benefits]\nThank you for sharing the details about the persimmon tree and its historical significance. While I am a large language model, I don't have access to real-time information or specific data from your source. \n\nHowever, I can provide some general information on persimmons:\n\n1. **Origin**: The persimmon is native to Asia, specifically Korea, Japan, and China.\n\n2. **Common Names**:\n - In English-speaking countries, \"Persimmon\" is a common term.\n - In the Far East, it is known as \"Loto of Japan.\"\n\n3. **Plant Type**: Persimmons are deciduous trees that typically grow up to 8-10 meters tall.\n\n4. **Flowering and Pollination**:\n - They produce flowers in spring (March to May) in many parts of Asia.\n - The fruits become achenes before they mature, which need pollinators for fertilization.\n\n5. **History**: Persimmons have been cultivated for thousands of years across the globe.\n - They were originally from China and spread to Korea and Japan.\n - In East Asia, persimmons are known as \"Loto,\" which means \"tree of peace\" in Japanese.\n\n6. **Nutritional Value**: They contain various compounds that can help with digestion and are also rich in vitamin C.\n\n7. **Farming**: Persimmons are grown for their fruit and are used in cooking around the world, particularly in Japan where they were cultivated centuries ago.\n\n8. **Varieties**: There are many varieties of persimmons depending on climate conditions, so it's not always clear which variety you're referring to specifically.\n\nWould you like me to elaborate on any part of the information or provide more detailed botanical details?", "questions_and_answers": null} \ No newline at end of file diff --git a/EasyLearn/data/uploaded_file.pdf b/EasyLearn/data/uploaded_file.pdf new file mode 100644 index 000000000..7a03c42a1 Binary files /dev/null and b/EasyLearn/data/uploaded_file.pdf differ diff --git a/EasyLearn/main.py b/EasyLearn/main.py new file mode 100644 index 000000000..370c4905a --- /dev/null +++ b/EasyLearn/main.py @@ -0,0 +1,19 @@ +import streamlit as st +from app import home, summarize, explore_questions, answer_questions, results + +# Page routing +PAGES = { + "Home": home.app, + "Summarization": summarize.app, + "Explore Questions": explore_questions.app, + "Answer Questions": answer_questions.app, + "Results": results.app, +} + +def main(): + st.sidebar.title("Navigation") + page = st.sidebar.radio("Go to", list(PAGES.keys())) + PAGES[page]() + +if __name__ == "__main__": + main() diff --git a/EasyLearn/model/__init__.py b/EasyLearn/model/__init__.py new file mode 100644 index 000000000..09a753589 --- /dev/null +++ b/EasyLearn/model/__init__.py @@ -0,0 +1,6 @@ +from .preprocess import extract_and_preprocess_text +from .inference import generate_segmented_summary +from .questions import generate_questions_and_answers +from .classify import classify_questions +from .evaluation import evaluate_user_responses +from .storage import save_outputs diff --git a/EasyLearn/model/__pycache__/__init__.cpython-311.pyc b/EasyLearn/model/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 000000000..c3078ebf5 Binary files /dev/null and b/EasyLearn/model/__pycache__/__init__.cpython-311.pyc differ diff --git a/EasyLearn/model/__pycache__/classify.cpython-311.pyc b/EasyLearn/model/__pycache__/classify.cpython-311.pyc new file mode 100644 index 000000000..d6e2a6f6a Binary files /dev/null and b/EasyLearn/model/__pycache__/classify.cpython-311.pyc differ diff --git a/EasyLearn/model/__pycache__/evaluation.cpython-311.pyc b/EasyLearn/model/__pycache__/evaluation.cpython-311.pyc new file mode 100644 index 000000000..5a434c0be Binary files /dev/null and b/EasyLearn/model/__pycache__/evaluation.cpython-311.pyc differ diff --git a/EasyLearn/model/__pycache__/inference.cpython-311.pyc b/EasyLearn/model/__pycache__/inference.cpython-311.pyc new file mode 100644 index 000000000..91335cb4a Binary files /dev/null and b/EasyLearn/model/__pycache__/inference.cpython-311.pyc differ diff --git a/EasyLearn/model/__pycache__/preprocess.cpython-311.pyc b/EasyLearn/model/__pycache__/preprocess.cpython-311.pyc new file mode 100644 index 000000000..d7e5a57f2 Binary files /dev/null and b/EasyLearn/model/__pycache__/preprocess.cpython-311.pyc differ diff --git a/EasyLearn/model/__pycache__/questions.cpython-311.pyc b/EasyLearn/model/__pycache__/questions.cpython-311.pyc new file mode 100644 index 000000000..86c8b2405 Binary files /dev/null and b/EasyLearn/model/__pycache__/questions.cpython-311.pyc differ diff --git a/EasyLearn/model/__pycache__/storage.cpython-311.pyc b/EasyLearn/model/__pycache__/storage.cpython-311.pyc new file mode 100644 index 000000000..8d0252731 Binary files /dev/null and b/EasyLearn/model/__pycache__/storage.cpython-311.pyc differ diff --git a/EasyLearn/model/classify.py b/EasyLearn/model/classify.py new file mode 100644 index 000000000..f32889842 --- /dev/null +++ b/EasyLearn/model/classify.py @@ -0,0 +1,63 @@ +# from langchain_ollama import ChatOllama +# import json + +# def classify_questions(questions_and_answers, model="qwen2.5:0.5b"): +# """ +# Classify questions into Basic, Intermediate, or Advanced levels using an LLM. +# :param questions_and_answers: List of dictionaries containing questions and answers. +# :param model: LLM model to use. +# :return: Dictionary with classified questions. +# """ +# # Extract questions for classification +# questions = [{"question": qa["question"]} for qa in questions_and_answers] + +# prompt = ( +# "Classify the following questions into Basic, Intermediate, or Advanced levels based on their complexity:\n\n" +# f"{json.dumps(questions, indent=2)}\n\n" +# "Format the output as a JSON object where each key is a level and each value is a list of questions, e.g.:\n" +# '{\n' +# ' "basic": ["", ...],\n' +# ' "intermediate": ["", ...],\n' +# ' "advanced": ["", ...]\n' +# '}\n' +# ) + +# try: +# ollama = ChatOllama(model=model) +# response = ollama.invoke([{"role": "user", "content": prompt}]) + +# # Parse the JSON response +# classified = json.loads(response.content.strip()) + +# # Validate the structure +# if not all(key in classified for key in ["basic", "intermediate", "advanced"]): +# raise ValueError("Response missing required keys.") + +# return classified +# except json.JSONDecodeError: +# print("Error: The LLM response is not valid JSON.") +# return {"basic": [], "intermediate": [], "advanced": []} +# except Exception as e: +# print(f"Error during question classification: {e}") +# return {"basic": [], "intermediate": [], "advanced": []} + +import json + +from langchain_ollama import ChatOllama + +def classify_questions(questions_and_answers, model="qwen2.5:0.5b"): + """ + Classify questions into difficulty levels. + """ + try: + ollama = ChatOllama(model=model) + classification_prompt = ( + "Classify the following questions into Basic, Intermediate, or Advanced levels:\n\n" + f"{json.dumps(questions_and_answers, indent=2)}\n\n" + "Output as JSON with keys 'basic', 'intermediate', 'advanced'." + ) + classification_response = ollama.invoke([{"role": "user", "content": classification_prompt}]) + return json.loads(classification_response.content.strip()) + except Exception as e: + print(f"Error classifying questions: {e}") + return {"basic": [], "intermediate": [], "advanced": []} diff --git a/EasyLearn/model/evaluation.py b/EasyLearn/model/evaluation.py new file mode 100644 index 000000000..48da366c4 --- /dev/null +++ b/EasyLearn/model/evaluation.py @@ -0,0 +1,38 @@ +# from sentence_transformers import SentenceTransformer +# from sklearn.metrics.pairwise import cosine_similarity + +# def evaluate_user_responses(reference_answers, user_responses, model_name="all-MiniLM-L6-v2"): +# """ +# Evaluate user responses against reference answers using cosine similarity. +# :param reference_answers: List of correct answers. +# :param user_responses: List of user-provided answers. +# :param model_name: SentenceTransformer model for embeddings. +# :return: List of similarity scores. +# """ +# try: +# if len(reference_answers) != len(user_responses): +# raise ValueError("Mismatch in the number of reference answers and user responses.") + +# model = SentenceTransformer(model_name) +# ref_embeddings = model.encode(reference_answers) +# user_embeddings = model.encode(user_responses) + +# # Calculate cosine similarity scores +# return [ +# cosine_similarity([ref_emb], [user_emb])[0][0] +# for ref_emb, user_emb in zip(ref_embeddings, user_embeddings) +# ] +# except Exception as e: +# print(f"Error during evaluation: {e}") +# return [] + + + +from sklearn.metrics.pairwise import cosine_similarity +from sentence_transformers import SentenceTransformer + +def evaluate_user_responses(reference_answers, user_responses, model_name="all-MiniLM-L6-v2"): + model = SentenceTransformer(model_name) + ref_embeddings = model.encode(reference_answers) + user_embeddings = model.encode(user_responses) + return [cosine_similarity([ref], [usr])[0][0] for ref, usr in zip(ref_embeddings, user_embeddings)] diff --git a/EasyLearn/model/inference.py b/EasyLearn/model/inference.py new file mode 100644 index 000000000..c25dd0037 --- /dev/null +++ b/EasyLearn/model/inference.py @@ -0,0 +1,9 @@ +from langchain_ollama import ChatOllama + +def generate_segmented_summary(text_chunks, model="qwen2.5:0.5b", segment_size=5): + ollama = ChatOllama(model=model) + combined_summary = "\n".join([ + ollama.invoke([{"role": "user", "content": chunk}]).content.strip() + for chunk in text_chunks[:segment_size] + ]) + return combined_summary diff --git a/EasyLearn/model/preprocess.py b/EasyLearn/model/preprocess.py new file mode 100644 index 000000000..f392baafd --- /dev/null +++ b/EasyLearn/model/preprocess.py @@ -0,0 +1,46 @@ + +from pdfminer.high_level import extract_text +from sentence_transformers import SentenceTransformer +import re +import numpy as np +import faiss + +# def extract_and_preprocess_text(pdf_path, chunk_size=500): +# raw_text = extract_text(pdf_path) +# clean_text = re.sub(r"\s+", " ", raw_text.replace("\n", " ")).strip() +# return [clean_text[i:i + chunk_size] for i in range(0, len(clean_text), chunk_size)] + + +def extract_and_preprocess_text(pdf_path, chunk_size=500): + """ + Extract and preprocess text from a PDF. + """ + try: + raw_text = extract_text(pdf_path) + if not raw_text.strip(): + raise ValueError("The PDF contains no readable text.") + clean_text = re.sub(r"\s+", " ", raw_text.replace("\n", " ")).strip() + text_chunks = [clean_text[i:i + chunk_size] for i in range(0, len(clean_text), chunk_size)] + return text_chunks + except Exception as e: + print(f"Error extracting text: {e}") + return [] + +def generate_embeddings(text_chunks): + """ + Generate embeddings for text chunks. + """ + model = SentenceTransformer('all-MiniLM-L6-v2') + embeddings = model.encode(text_chunks) + return embeddings + +def store_embeddings(embeddings): + """ + Store embeddings in a FAISS index. + """ + embedding_dim = len(embeddings[0]) + index = faiss.IndexFlatL2(embedding_dim) + index.add(np.array(embeddings)) + return index + + diff --git a/EasyLearn/model/questions.py b/EasyLearn/model/questions.py new file mode 100644 index 000000000..65df8e826 --- /dev/null +++ b/EasyLearn/model/questions.py @@ -0,0 +1,183 @@ +# from langchain_ollama import ChatOllama + +# def generate_questions_and_answers(summary, model="qwen2.5:0.5b", min_questions=20): +# """ +# Generate a set of questions and answers from the summary. +# :param summary: Text summary to generate questions from. +# :param model: LLM model to use. +# :param min_questions: Minimum number of questions to generate. +# :return: List of question-answer pairs. +# """ +# prompt = ( +# f"Based on the following summary, generate at least {min_questions} questions and their answers. " +# "Include questions at Basic, Intermediate, and Advanced levels:\n\n" +# f"Summary:\n{summary}\n\n" +# "Format the output as:\n" +# "- Basic Question: \n" +# "- Basic Answer: \n" +# "- Intermediate Question: \n" +# "- Intermediate Answer: \n" +# "- Advanced Question: \n" +# "- Advanced Answer: \n" +# ) +# try: +# ollama = ChatOllama(model=model) +# response = ollama.invoke([{"role": "user", "content": prompt}]) +# return response.content.strip().split("\n\n") +# except Exception as e: +# print(f"Error during question generation: {e}") +# return [] +# up date to better output + + +from langchain_ollama import ChatOllama +import json + +def generate_questions_and_answers(summary, model="qwen2.5:0.5b", min_questions=20): + """ + Refine the summary using the LLM and then generate a set of questions and answers. + :param summary: Text summary to generate questions from. + :param model: LLM model to use. + :param min_questions: Minimum number of questions to generate. + :return: List of dictionaries containing question, answer, and difficulty level. + """ + # Step 1: Refine the summary + refine_prompt = ( + f"The following summary is provided to generate questions. " + f"Please refine it to make it more concise and focused:\n\n" + f"Summary:\n{summary}\n\n" + "Provide the refined summary in plain text, without additional explanations." + ) + + try: + ollama = ChatOllama(model=model) + refine_response = ollama.invoke([{"role": "user", "content": refine_prompt}]) + refined_summary = refine_response.content.strip() + + print(f"Refined Summary: {refined_summary}") # Debug log + + except Exception as e: + print(f"Error during summary refinement: {e}") + return [] + + # Step 2: Generate questions and answers + qa_prompt = ( + f"Based on the following refined summary, generate at least {min_questions} questions and their answers. " + "Include questions at Basic, Intermediate, and Advanced levels:\n\n" + f"Refined Summary:\n{refined_summary}\n\n" + "Provide the output as a JSON array where each element has keys 'difficulty', 'question', and 'answer'." + ) + + try: + qa_response = ollama.invoke([{"role": "user", "content": qa_prompt}]) + raw_output = qa_response.content.strip() + + # Debug: Print raw response + print(f"Raw response: {raw_output}") + + # Parse the JSON response + questions_and_answers = json.loads(raw_output) + + # Validate structure + if not isinstance(questions_and_answers, list): + raise ValueError("Response is not a valid list.") + + for qa in questions_and_answers: + if not all(k in qa for k in ("difficulty", "question", "answer")): + raise ValueError(f"Invalid question structure: {qa}") + + return questions_and_answers + + except json.JSONDecodeError: + print("Error: LLM response is not valid JSON.") + return [] + except Exception as e: + print(f"Error during question generation: {e}") + return [] + + +# from langchain_ollama import ChatOllama + +# def generate_questions_and_answers(summary, model="qwen2.5:0.5b", min_questions=20): +# """ +# Generate questions and answers from the provided summary using an LLM. +# :param summary: Summarized text to generate questions from. +# :param model: LLM model to use. +# :param min_questions: Minimum number of questions to generate. +# :return: List of question-answer pairs. +# """ +# prompt = ( +# f"Generate at least {min_questions} questions and answers based on the following summary.\n\n" +# f"Summary:\n{summary}\n\n" +# "Ensure that questions are evenly distributed across the following levels:\n" +# "- Basic: Simple factual questions.\n" +# "- Intermediate: Reasoning or explanation-based questions.\n" +# "- Advanced: Analytical or critical thinking questions.\n\n" +# "Format:\n" +# "- Basic Question: \n" +# "- Basic Answer: \n" +# "- Intermediate Question: \n" +# "- Intermediate Answer: \n" +# "- Advanced Question: \n" +# "- Advanced Answer: \n" +# ) + +# try: +# ollama = ChatOllama(model=model) +# response = ollama.invoke([{"role": "user", "content": prompt}]) +# questions_and_answers = response.content.strip().split("\n\n") +# return questions_and_answers +# except Exception as e: +# print(f"Error during question generation: {e}") +# return [] + + +# def classify_questions_with_llm(questions_and_answers, model="qwen2.5:0.5b"): +# """ +# Classify questions into Basic, Intermediate, and Advanced using LLM. +# :param questions_and_answers: List of questions and answers to classify. +# :param model: LLM model to use for classification. +# :return: Dictionary with classified questions. +# """ +# prompt = ( +# f"Classify the following questions into Basic, Intermediate, and Advanced levels. " +# f"Ensure classification is based on complexity and depth of thought required to answer.\n\n" +# f"Questions and Answers:\n\n" +# + "\n\n".join(questions_and_answers) + +# "\n\n" +# "Format:\n" +# "- Basic: [List of Basic Questions and Answers]\n" +# "- Intermediate: [List of Intermediate Questions and Answers]\n" +# "- Advanced: [List of Advanced Questions and Answers]\n" +# ) + +# try: +# ollama = ChatOllama(model=model) +# response = ollama.invoke([{"role": "user", "content": prompt}]) +# classification = {"basic": [], "intermediate": [], "advanced": []} + +# # Parse LLM's response into the classification dictionary +# sections = response.content.strip().split("\n\n") +# for section in sections: +# if section.startswith("- Basic:"): +# classification["basic"] = section.replace("- Basic:", "").strip().split("\n") +# elif section.startswith("- Intermediate:"): +# classification["intermediate"] = section.replace("- Intermediate:", "").strip().split("\n") +# elif section.startswith("- Advanced:"): +# classification["advanced"] = section.replace("- Advanced:", "").strip().split("\n") + +# return classification +# except Exception as e: +# print(f"Error during classification: {e}") +# return {"basic": [], "intermediate": [], "advanced": []} + + + + +# from langchain_ollama import ChatOllama + +# def generate_questions_and_answers(summary, model="qwen2.5:0.5b", min_questions=20): +# prompt = f"Generate {min_questions} questions with answers from the summary:\n{summary}" +# ollama = ChatOllama(model=model) +# response = ollama.invoke([{"role": "user", "content": prompt}]) +# return response.content.strip().split("\n") diff --git a/EasyLearn/model/storage.py b/EasyLearn/model/storage.py new file mode 100644 index 000000000..e77c1095c --- /dev/null +++ b/EasyLearn/model/storage.py @@ -0,0 +1,5 @@ +import json + +def save_outputs(summary, questions_and_answers, file_path): + with open(file_path, "w") as f: + json.dump({"summary": summary, "questions_and_answers": questions_and_answers}, f) diff --git a/EasyLearn/model/summarize.py b/EasyLearn/model/summarize.py new file mode 100644 index 000000000..66bf0418a --- /dev/null +++ b/EasyLearn/model/summarize.py @@ -0,0 +1,39 @@ +from langchain_ollama import ChatOllama + +def generate_segmented_summary(text_chunks, model="qwen2.5:0.5b", segment_size=5): + """ + Generate a summary by segmenting the context into smaller parts. + :param text_chunks: List of text chunks. + :param model: The LLM model to use. + :param segment_size: Number of chunks per segment. + :return: Comprehensive summary. + """ + try: + ollama = ChatOllama(model=model) + + # Summarize each segment + segment_summaries = [] + for i in range(0, len(text_chunks), segment_size): + segment = "\n".join(text_chunks[i:i + segment_size]) + prompt = ( + f"Summarize the following text in a clear and detailed way:\n\n{segment}\n\n" + "Provide the summary in simple and comprehensive language." + ) + response = ollama.invoke([{"role": "user", "content": prompt}]) + if response.content.strip(): + segment_summaries.append(response.content.strip()) + + # Combine segment summaries into a final summary + combined_context = "\n".join(segment_summaries) + final_prompt = ( + f"Combine and summarize the following segment summaries into one cohesive summary:\n\n{combined_context}\n\n" + "Ensure the summary is clear, detailed, and covers all key points." + ) + final_response = ollama.invoke([{"role": "user", "content": final_prompt}]) + return final_response.content.strip() + + except Exception as e: + print(f"Error during segmented summarization: {e}") + return "An error occurred during segmented summarization." + +