diff --git a/core/ai_2.py b/core/ai_2.py new file mode 100644 index 0000000..2ba6c1e --- /dev/null +++ b/core/ai_2.py @@ -0,0 +1,465 @@ +from openai import OpenAI +import os +import json +from functools import lru_cache +# portkey +from portkey_ai import PORTKEY_GATEWAY_URL, createHeaders, Portkey +# llama index imports +from llama_index.core import ( + SimpleDirectoryReader, StorageContext, load_index_from_storage, VectorStoreIndex, + Document, Settings, PromptTemplate +) +from llama_index.llms.openai import OpenAI +from llama_index.embeddings.openai import OpenAIEmbedding +from utils.bhashini_utils import bhashini_translate +from dotenv import load_dotenv +# from llama_index.legacy.query_engine import FLAREInstructQueryEngine + +# pip install -U sentence-transformers +# from sentence_transformers import SentenceTransformer + +from utils.bhashini_utils import bhashini_translate # bhashini_asr,# bhashini_tts) + +import logging +from typing import Optional +from llama_index.core.query_engine import RetrieverQueryEngine +from llama_index.core.postprocessor import SimilarityPostprocessor +# from llama_index.callbacks import CallbackManager, LlamaDebugHandler + +# from llama_index.response.pprint_utils import pprint_response +# pprint_response(response, show_source=True) + +# Set up logging +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +from langchain import hub +# from llama_index.core.prompts import LangchainPromptTemplate +from llama_index.core import PromptTemplate + +# TO ARTIFICALLY GENERATE Q&A +from llama_index.core.evaluation import generate_question_context_pairs +# Prompt to generate questions +qa_generate_prompt_tmpl = """\ +Context information is below. + +--------------------- +{context_str} +--------------------- + +Given the context information and not prior knowledge. +generate only questions based on the below query. + +You are a Professor. Your task is to setup \ +{num_questions_per_chunk} questions for an upcoming \ +quiz/examination. The questions should be diverse in nature \ +across the document. The questions should not contain options, not start with Q1/ Q2. \ +Restrict the questions to the context information provided.\ +""" + +class CustomQueryEngine(RetrieverQueryEngine): + def __init__(self, custom_str, refine_str): + self.custom_str = custom_str + self.refine_str = refine_str + + def custom_query(self, query_str: str): + # Retrieve nodes relevant to the query + nodes = self.retriever.retrieve(query_str) + + qa_dataset = generate_question_context_pairs( + nodes, llm=llm, num_questions_per_chunk=2, qa_generate_prompt_tmpl=qa_generate_prompt_tmpl + ) + # The returned result is a EmbeddingQAFinetuneDataset object (containing queries, relevant_docs, and corpus). + + # Generate the context string + context_str = "\n\n".join([n.node.get_context() for n in nodes]) + + # print or log the context string + print("Context string:", context_str) + + # Call the superclass's query method to generate a response + return super().query(query_str) + +# Templates +QA_TEMPLATE = PromptTemplate( + "Context information is below.\n" + "---------------------\n" + "{context_str}\n" + "---------------------\n" + "Given this information, please answer the question: {query_str}\n" + "If you don't know the answer, just say that you don't know. Don't try to make up an answer.\n" + "Provide a detailed response and explain your reasoning step by step." +) + +REFINE_TEMPLATE = PromptTemplate( + "The original question is as follows: {query_str}\n" + "We have provided an existing answer: {existing_answer}\n" + "We have the opportunity to refine the existing answer " + "(only if needed) with some more context below.\n" + "------------\n" + "{context_msg}\n" + "------------\n" + "Given the new context, refine the original answer to better " + "answer the question. If the context isn't useful, return the original answer." +) + +# Load environment variables +load_dotenv(dotenv_path="ops/.env") +# llm = OpenAI() +# Constants +PERSIST_DIR = "./storage" +DATA_FILE = 'data/Haq_data_v4.txt' +PORTKEY_HEADERS = { + "x-portkey-api-key": os.getenv("PORTKEY_API_KEY"), + "x-portkey-provider": "openai", + "Content-Type": "application/json" +} +# Initialize settings +Settings.chunk_size = 512 +Settings.llm = OpenAI( + model=os.getenv("MODEL_NAME"), + temperature=0.1, + api_base=os.getenv("PORTKEY_GATEWAY_URL"), + default_headers=PORTKEY_HEADERS +) +Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-small") + +# openai_api_key = os.getenv("OPENAI_API_KEY") +# port_api_key = os.getenv("PORTKEY_API_KEY") +# model = os.getenv("MODEL_NAME") + +@lru_cache(maxsize=1) +def get_or_create_index(): + if os.path.exists(PERSIST_DIR): + return load_index_from_storage(StorageContext.from_defaults(persist_dir=PERSIST_DIR)) + + documents = SimpleDirectoryReader(input_files=[DATA_FILE]).load_data() + document = Document(text="\n\n".join(doc.text for doc in documents)) + index = VectorStoreIndex.from_documents([document]) + index.storage_context.persist(persist_dir=PERSIST_DIR) + return index + +SIMILARITY_CUTOFF = 0.7 +TOP_K = 3 + +# RETREIVER PART +from llama_index.core.evaluation import RetrieverEvaluator + +def create_custom_query_engine(index: VectorStoreIndex) -> RetrieverQueryEngine: + """Create a custom query engine with advanced retrieval and postprocessing.""" + retriever = index.as_retriever(similarity_top_k=TOP_K) + retriever_evaluator = RetrieverEvaluator.from_metric_names( + ["mrr", "hit_rate"], + retriever=retriever) + + # retriever_evaluator.evaluate( + # query="query", expected_ids=["node_id1", "node_id2"] + # ) + # eval_results = await retriever_evaluator.aevaluate_dataset(qa_dataset) + + return RetrieverQueryEngine.from_args( + retriever, + node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=SIMILARITY_CUTOFF)], + text_qa_template=QA_TEMPLATE, + refine_template=REFINE_TEMPLATE, + ) + + query_engine = RetrieverQueryEngine.from_args( + retriever, + node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=SIMILARITY_CUTOFF)], + text_qa_template=lc_prompt_tmpl, + ) + return query_engine + +def semantic_cache(func): + """Simple semantic caching decorator.""" + cache = {} + + def wrapper(*args, **kwargs): + query = args[0] if args else kwargs.get('input_message', '') + for cached_query, cached_response in cache.items(): + if semantic_similarity(query, cached_query) > 0.9: # Adjust threshold as needed + logger.info("Using cached response for similar query.") + return cached_response + result = func(*args, **kwargs) + cache[query] = result + return result + + return wrapper + +def semantic_similarity(query1: str, query2: str) -> float: + """Compute semantic similarity between two queries.""" + # Implement semantic similarity calculation here + # This is a placeholder and should be replaced with actual implementation + return 0.5 # Placeholder value + + +# @lru_cache(maxsize=100) +@semantic_cache +def llama_index_rag(input_message): + # query_engine = get_or_create_index().as_query_engine(similarity_top_k=2) + query_engine = create_custom_query_engine(get_or_create_index()) + # debug_handler = LlamaDebugHandler(print_trace_on_end=True) + # callback_manager = CallbackManager([debug_handler]) + try: + response = query_engine.query(input_message) + logger.info(f"Query: {input_message}") + logger.info(f"Response: {response}") + return str(response) + except Exception as e: + logger.error(f"Error during query processing: {e}") + return "An error occurred while processing your query. Please try again." + + # response = query_engine.query(input_message) + # return str(response) + + # for streaming + # stream = llm.stream_complete(input_message) + # for r in stream: + # print(r.delta, end="", flush = True) + +def ragindex(chat_id: str, input_message: str) -> str: + """Wrapper function to call llama_index_rag.""" + res = llama_index_rag(input_message) + logger.info(f"Chat ID: {chat_id}, Query: {input_message}") + logger.info(f"Response type: {type(res)}, Response: {res}") + return res + + +def bhashini_text_chat(chat_id, text, lang): + input_message = bhashini_translate(text, lang, "en") + response_en = ragindex(chat_id, input_message) + response = bhashini_translate(response_en, "en", lang) + return response, response_enfrom openai import OpenAI +import os +import json +from functools import lru_cache +# portkey +from portkey_ai import PORTKEY_GATEWAY_URL, createHeaders, Portkey +# llama index imports +from llama_index.core import ( + SimpleDirectoryReader, StorageContext, load_index_from_storage, VectorStoreIndex, + Document, Settings, PromptTemplate +) +from llama_index.llms.openai import OpenAI +from llama_index.embeddings.openai import OpenAIEmbedding +from utils.bhashini_utils import bhashini_translate +from dotenv import load_dotenv +# from llama_index.legacy.query_engine import FLAREInstructQueryEngine + +# pip install -U sentence-transformers +# from sentence_transformers import SentenceTransformer + +from utils.bhashini_utils import bhashini_translate # bhashini_asr,# bhashini_tts) + +import logging +from typing import Optional +from llama_index.core.query_engine import RetrieverQueryEngine +from llama_index.core.postprocessor import SimilarityPostprocessor +# from llama_index.callbacks import CallbackManager, LlamaDebugHandler + +# from llama_index.response.pprint_utils import pprint_response +# pprint_response(response, show_source=True) + +# Set up logging +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +from langchain import hub +# from llama_index.core.prompts import LangchainPromptTemplate +from llama_index.core import PromptTemplate + +# TO ARTIFICALLY GENERATE Q&A +from llama_index.core.evaluation import generate_question_context_pairs +# Prompt to generate questions +qa_generate_prompt_tmpl = """\ +Context information is below. + +--------------------- +{context_str} +--------------------- + +Given the context information and not prior knowledge. +generate only questions based on the below query. + +You are a Professor. Your task is to setup \ +{num_questions_per_chunk} questions for an upcoming \ +quiz/examination. The questions should be diverse in nature \ +across the document. The questions should not contain options, not start with Q1/ Q2. \ +Restrict the questions to the context information provided.\ +""" + +class CustomQueryEngine(RetrieverQueryEngine): + def __init__(self, custom_str, refine_str): + self.custom_str = custom_str + self.refine_str = refine_str + + def custom_query(self, query_str: str): + # Retrieve nodes relevant to the query + nodes = self.retriever.retrieve(query_str) + + qa_dataset = generate_question_context_pairs( + nodes, llm=llm, num_questions_per_chunk=2, qa_generate_prompt_tmpl=qa_generate_prompt_tmpl + ) + # The returned result is a EmbeddingQAFinetuneDataset object (containing queries, relevant_docs, and corpus). + + # Generate the context string + context_str = "\n\n".join([n.node.get_context() for n in nodes]) + + # print or log the context string + print("Context string:", context_str) + + # Call the superclass's query method to generate a response + return super().query(query_str) + +# Templates +QA_TEMPLATE = PromptTemplate( + "Context information is below.\n" + "---------------------\n" + "{context_str}\n" + "---------------------\n" + "Given this information, please answer the question: {query_str}\n" + "If you don't know the answer, just say that you don't know. Don't try to make up an answer.\n" + "Provide a detailed response and explain your reasoning step by step." +) + +REFINE_TEMPLATE = PromptTemplate( + "The original question is as follows: {query_str}\n" + "We have provided an existing answer: {existing_answer}\n" + "We have the opportunity to refine the existing answer " + "(only if needed) with some more context below.\n" + "------------\n" + "{context_msg}\n" + "------------\n" + "Given the new context, refine the original answer to better " + "answer the question. If the context isn't useful, return the original answer." +) + +# Load environment variables +load_dotenv(dotenv_path="ops/.env") +# llm = OpenAI() +# Constants +PERSIST_DIR = "./storage" +DATA_FILE = 'data/Haq_data_v4.txt' +PORTKEY_HEADERS = { + "x-portkey-api-key": os.getenv("PORTKEY_API_KEY"), + "x-portkey-provider": "openai", + "Content-Type": "application/json" +} +# Initialize settings +Settings.chunk_size = 512 +Settings.llm = OpenAI( + model=os.getenv("MODEL_NAME"), + temperature=0.1, + api_base=os.getenv("PORTKEY_GATEWAY_URL"), + default_headers=PORTKEY_HEADERS +) +Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-small") + +# openai_api_key = os.getenv("OPENAI_API_KEY") +# port_api_key = os.getenv("PORTKEY_API_KEY") +# model = os.getenv("MODEL_NAME") + +@lru_cache(maxsize=1) +def get_or_create_index(): + if os.path.exists(PERSIST_DIR): + return load_index_from_storage(StorageContext.from_defaults(persist_dir=PERSIST_DIR)) + + documents = SimpleDirectoryReader(input_files=[DATA_FILE]).load_data() + document = Document(text="\n\n".join(doc.text for doc in documents)) + index = VectorStoreIndex.from_documents([document]) + index.storage_context.persist(persist_dir=PERSIST_DIR) + return index + +SIMILARITY_CUTOFF = 0.7 +TOP_K = 3 + +# RETREIVER PART +from llama_index.core.evaluation import RetrieverEvaluator + +def create_custom_query_engine(index: VectorStoreIndex) -> RetrieverQueryEngine: + """Create a custom query engine with advanced retrieval and postprocessing.""" + retriever = index.as_retriever(similarity_top_k=TOP_K) + retriever_evaluator = RetrieverEvaluator.from_metric_names( + ["mrr", "hit_rate"], + retriever=retriever) + + # retriever_evaluator.evaluate( + # query="query", expected_ids=["node_id1", "node_id2"] + # ) + # eval_results = await retriever_evaluator.aevaluate_dataset(qa_dataset) + + return RetrieverQueryEngine.from_args( + retriever, + node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=SIMILARITY_CUTOFF)], + text_qa_template=QA_TEMPLATE, + refine_template=REFINE_TEMPLATE, + ) + + query_engine = RetrieverQueryEngine.from_args( + retriever, + node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=SIMILARITY_CUTOFF)], + text_qa_template=lc_prompt_tmpl, + ) + return query_engine + +def semantic_cache(func): + """Simple semantic caching decorator.""" + cache = {} + + def wrapper(*args, **kwargs): + query = args[0] if args else kwargs.get('input_message', '') + for cached_query, cached_response in cache.items(): + if semantic_similarity(query, cached_query) > 0.9: # Adjust threshold as needed + logger.info("Using cached response for similar query.") + return cached_response + result = func(*args, **kwargs) + cache[query] = result + return result + + return wrapper + +def semantic_similarity(query1: str, query2: str) -> float: + """Compute semantic similarity between two queries.""" + # Implement semantic similarity calculation here + # This is a placeholder and should be replaced with actual implementation + return 0.5 # Placeholder value + + +# @lru_cache(maxsize=100) +@semantic_cache +def llama_index_rag(input_message): + # query_engine = get_or_create_index().as_query_engine(similarity_top_k=2) + query_engine = create_custom_query_engine(get_or_create_index()) + # debug_handler = LlamaDebugHandler(print_trace_on_end=True) + # callback_manager = CallbackManager([debug_handler]) + try: + response = query_engine.query(input_message) + logger.info(f"Query: {input_message}") + logger.info(f"Response: {response}") + return str(response) + except Exception as e: + logger.error(f"Error during query processing: {e}") + return "An error occurred while processing your query. Please try again." + + # response = query_engine.query(input_message) + # return str(response) + + # for streaming + # stream = llm.stream_complete(input_message) + # for r in stream: + # print(r.delta, end="", flush = True) + +def ragindex(chat_id: str, input_message: str) -> str: + """Wrapper function to call llama_index_rag.""" + res = llama_index_rag(input_message) + logger.info(f"Chat ID: {chat_id}, Query: {input_message}") + logger.info(f"Response type: {type(res)}, Response: {res}") + return res + + +def bhashini_text_chat(chat_id, text, lang): + input_message = bhashini_translate(text, lang, "en") + response_en = ragindex(chat_id, input_message) + response = bhashini_translate(response_en, "en", lang) + return response, response_en \ No newline at end of file