From 7e6a32985a3932daf71178230220993553a5e893 Mon Sep 17 00:00:00 2001
From: berkecanrizai <63911408+berkecanrizai@users.noreply.github.com>
Date: Fri, 17 May 2024 14:51:55 +0300
Subject: [PATCH] llm-app example updates and version bumps (#6471)

GitOrigin-RevId: e291a674bebd9f3f7e16e75fe3ac39754ccd51e6
---
 .../demo-document-indexing/README.md          |   2 +-
 .../demo-document-indexing/requirements.txt   |   4 +-
 .../demo-question-answering/README.md         |  28 +-
 .../pipelines/demo-question-answering/app.py  | 242 ++----------------
 .../demo-question-answering/requirements.txt  |   4 +-
 .../pipelines/gpt_4o_multimodal_rag/README.md | 140 +++++-----
 .../pipelines/gpt_4o_multimodal_rag/app.py    |  33 +--
 .../gpt_4o_multimodal_rag/requirements.txt    |   2 +-
 8 files changed, 103 insertions(+), 352 deletions(-)

diff --git a/examples/pipelines/demo-document-indexing/README.md b/examples/pipelines/demo-document-indexing/README.md
index 20dbe50..23eda8e 100644
--- a/examples/pipelines/demo-document-indexing/README.md
+++ b/examples/pipelines/demo-document-indexing/README.md
@@ -128,7 +128,7 @@ Alternatively, you can launch just the indexing pipeline as a single Docker cont
 
 ```bash
 docker build -t vector_indexer .
-docker run -v `pwd`/files-for-indexing:/app/files-for-indexing vector_indexer
+docker run -v `pwd`/files-for-indexing:/app/files-for-indexing -p 8000:8000 vector_indexer
 ```
 
 The volume overlay is important - without it, docker will not see changes to files under the `files-for-indexing` folder.
diff --git a/examples/pipelines/demo-document-indexing/requirements.txt b/examples/pipelines/demo-document-indexing/requirements.txt
index 7727537..78c7f36 100644
--- a/examples/pipelines/demo-document-indexing/requirements.txt
+++ b/examples/pipelines/demo-document-indexing/requirements.txt
@@ -1,5 +1,3 @@
-pathway==0.8.2
+pathway[all]~=0.11.0
 python-dotenv==1.0.1
-litellm==1.17.3
-unstructured[all-docs]==0.10.28
 mpmath==1.3.0
diff --git a/examples/pipelines/demo-question-answering/README.md b/examples/pipelines/demo-question-answering/README.md
index 928ee0b..3b64958 100644
--- a/examples/pipelines/demo-question-answering/README.md
+++ b/examples/pipelines/demo-question-answering/README.md
@@ -39,7 +39,6 @@ This example spawns a lightweight webserver that accepts queries on six possible
 ### LLM and RAG capabilities
 - `/v1/pw_ai_answer` to ask questions about your documents, or directly talk with your LLM;
 - `/v1/pw_ai_summary` to summarize a list of texts;
-- `/v1/pw_ai_aggregate_responses` to make a summary of a question for different documents and answers;
 
 See the [using the app section](###Using-the-app) to learn how to use the provided endpoints.
 
@@ -274,8 +273,7 @@ curl -X 'POST' \
   -H 'Content-Type: application/json' \
   -d '{
   "query": "string",
-  "metadata_filter": "string",
-  "k": 0
+  "k": 2
 }'
 ```
 
@@ -336,28 +334,6 @@ curl -X 'POST' \
 
 Specifying the GPT model with `"model": "gpt-4"` is also possible.
 
-#### Aggregating different responses
-
-Aggregating is useful when you have a number of responses for different texts or files for a given question.
-It organizes responses and creates an executive outlook.
-
-An example curl query is as follows:
-
-```bash
-curl -X 'POST' \
-  'http://0.0.0.0:8000/v1/pw_ai_aggregate_responses' \
-  -H 'accept: */*' \
-  -H 'Content-Type: application/json' \
-  -d '{
-  "question": "Is there any action required from the marketing team?",
-  "answers": [
-    "File a.pdf - We need approval for social media campaign from marketing dept.",
-    "File b.pdf - There are no action points.",
-    "Budget approval is needed from head of marketing."
-  ]
-}'
-```
-
 This endpoint also supports setting different models in the query by default.
 
 To execute similar curl queries as above, you can visit [ai-pipelines page](https://pathway.com/solutions/ai-pipelines/) and try out the queries from the Swagger UI.
@@ -367,4 +343,4 @@ To execute similar curl queries as above, you can visit [ai-pipelines page](http
 
 First, you can try adding your files and seeing changes in the index. To test index updates, simply add more files to the `data` folder.
 
-If you are using Google Drive or other sources, simply upload your files there.
\ No newline at end of file
+If you are using Google Drive or other sources, simply upload your files there.
diff --git a/examples/pipelines/demo-question-answering/app.py b/examples/pipelines/demo-question-answering/app.py
index 2c19578..4e03604 100644
--- a/examples/pipelines/demo-question-answering/app.py
+++ b/examples/pipelines/demo-question-answering/app.py
@@ -1,85 +1,36 @@
-import json
+import logging
 import sys
-from enum import Enum
 
 import click
 import pathway as pw
-import pathway.io.fs as io_fs
-import pathway.io.gdrive as io_gdrive
 import yaml
 from dotenv import load_dotenv
-from pathway.internals.udfs import DiskCache, ExponentialBackoffRetryStrategy
-from pathway.xpacks.llm import embedders, llms, prompts
-from pathway.xpacks.llm.parsers import ParseUnstructured
-from pathway.xpacks.llm.splitters import TokenCountSplitter
+from pathway.udfs import DiskCache, ExponentialBackoffRetryStrategy
+from pathway.xpacks.llm import embedders, llms, parsers, splitters
+from pathway.xpacks.llm.question_answering import BaseRAGQuestionAnswerer
 from pathway.xpacks.llm.vector_store import VectorStoreServer
 
-load_dotenv()
-
-
-class AIResponseType(Enum):
-    SHORT = "short"
-    LONG = "long"
-
-
-def _unwrap_udf(func):
-    if isinstance(func, pw.UDF):
-        return func.__wrapped__
-    return func
-
-
-@pw.udf
-def prep_rag_prompt(
-    prompt: str, docs: list[pw.Json], filter: str | None, response_type: str
-) -> str:
-    if filter is None:
-        return prompt
-
-    docs = docs.value  # type: ignore
-
-    try:
-        docs = [{"text": doc["text"], "path": doc["metadata"]["path"]} for doc in docs]
-
-    except Exception:
-        print("No context was found.")
-
-    if response_type == AIResponseType.SHORT.value:
-        prompt_func = _unwrap_udf(prompts.prompt_short_qa)
-    else:
-        prompt_func = _unwrap_udf(prompts.prompt_citing_qa)
-    return prompt_func(prompt, docs)
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s %(name)s %(levelname)s %(message)s",
+    datefmt="%Y-%m-%d %H:%M:%S",
+)
 
-
-@pw.udf
-def prompt_aggregate(question: str, answers: list[str]) -> str:
-    summary_data = "\n".join(answers)
-
-    summaries_str = json.dumps(summary_data, indent=2)
-
-    prompt = f"""Given a json with client names and responses
-    to the question: "{question}".
-    Categorize clients stance according to their policy and list them separately.
-    Use the question and answers to separate them with good logic according to question.
-    Use Markdown formatting starting with header level 2 (##).
-
-    Company Policies: ```{summaries_str}```
-    Answer:"""
-
-    return prompt
+load_dotenv()
 
 
 def data_sources(source_configs) -> list[pw.Table]:
     sources = []
     for source_config in source_configs:
         if source_config["kind"] == "local":
-            source = io_fs.read(
+            source = pw.io.fs.read(
                 **source_config["config"],
                 format="binary",
                 with_metadata=True,
             )
             sources.append(source)
         elif source_config["kind"] == "gdrive":
-            source = io_gdrive.read(
+            source = pw.io.gdrive.read(
                 **source_config["config"],
                 with_metadata=True,
             )
@@ -100,148 +51,6 @@ def data_sources(source_configs) -> list[pw.Table]:
     return sources
 
 
-class PathwayRAG:
-    class PWAIQuerySchema(pw.Schema):
-        prompt: str
-        filters: str | None = pw.column_definition(default_value=None)
-        model: str | None = pw.column_definition(default_value="gpt-3.5-turbo")
-        response_type: str = pw.column_definition(default_value="short")  # short | long
-
-    class SummarizeQuerySchema(pw.Schema):
-        text_list: list[str]
-        model: str | None = pw.column_definition(default_value="gpt-3.5-turbo")
-
-    class AggregateQuerySchema(pw.Schema):
-        question: str
-        answers: list[str]
-        model: str | None = pw.column_definition(default_value="gpt-3.5-turbo")
-
-    def __init__(
-        self,
-        *docs: pw.Table,
-        llm: pw.UDF,
-        embedder: pw.UDF,
-        splitter: pw.UDF,
-        parser: pw.UDF = ParseUnstructured(),
-        doc_post_processors=None,
-    ) -> None:
-        self.llm = llm
-
-        self.embedder = embedder
-
-        self.vector_server = VectorStoreServer(
-            *docs,
-            embedder=embedder,
-            splitter=splitter,
-            parser=parser,
-            doc_post_processors=doc_post_processors,
-        )
-
-    @pw.table_transformer
-    def pw_ai_query(self, pw_ai_queries: pw.Table[PWAIQuerySchema]) -> pw.Table:
-        """Main function for RAG applications that answer questions
-        based on available information."""
-
-        pw_ai_results = pw_ai_queries + self.vector_server.retrieve_query(
-            pw_ai_queries.select(
-                metadata_filter=pw.this.filters,
-                filepath_globpattern=pw.cast(str | None, None),
-                query=pw.this.prompt,
-                k=6,
-            )
-        ).select(
-            docs=pw.this.result,
-        )
-
-        pw_ai_results += pw_ai_results.select(
-            rag_prompt=prep_rag_prompt(
-                pw.this.prompt, pw.this.docs, pw.this.filters, pw.this.response_type
-            )
-        )
-        pw_ai_results += pw_ai_results.select(
-            result=self.llm(
-                llms.prompt_chat_single_qa(pw.this.rag_prompt),
-                model=pw.this.model,
-            )
-        )
-        return pw_ai_results
-
-    @pw.table_transformer
-    def summarize_query(
-        self, summarize_queries: pw.Table[SummarizeQuerySchema]
-    ) -> pw.Table:
-        summarize_results = summarize_queries.select(
-            pw.this.model,
-            prompt=prompts.prompt_summarize(pw.this.text_list),
-        )
-        summarize_results += summarize_results.select(
-            result=self.llm(
-                llms.prompt_chat_single_qa(pw.this.prompt),
-                model=pw.this.model,
-            )
-        )
-        return summarize_results
-
-    @pw.table_transformer
-    def aggregate_query(
-        self, aggregate_queries: pw.Table[AggregateQuerySchema]
-    ) -> pw.Table:
-        aggregate_results = aggregate_queries.select(
-            pw.this.model,
-            prompt=prompt_aggregate(pw.this.question, pw.this.answers),
-        )
-        aggregate_results += aggregate_results.select(
-            result=self.llm(
-                llms.prompt_chat_single_qa(pw.this.prompt),
-                model=pw.this.model,
-            )
-        )
-        return aggregate_results
-
-    def build_server(self, host: str, port: int) -> None:
-        """Adds HTTP connectors to input tables"""
-
-        webserver = pw.io.http.PathwayWebserver(host=host, port=port)
-
-        # connect http endpoint to output writer
-        def serve(route, schema, handler):
-            queries, writer = pw.io.http.rest_connector(
-                webserver=webserver,
-                route=route,
-                schema=schema,
-                autocommit_duration_ms=50,
-                delete_completed_queries=True,
-            )
-            writer(handler(queries))
-
-        serve(
-            "/v1/retrieve",
-            self.vector_server.RetrieveQuerySchema,
-            self.vector_server.retrieve_query,
-        )
-        serve(
-            "/v1/statistics",
-            self.vector_server.StatisticsQuerySchema,
-            self.vector_server.statistics_query,
-        )
-        serve(
-            "/v1/pw_list_documents",
-            self.vector_server.InputsQuerySchema,
-            self.vector_server.inputs_query,
-        )
-        serve("/v1/pw_ai_answer", self.PWAIQuerySchema, self.pw_ai_query)
-        serve(
-            "/v1/pw_ai_summary",
-            self.SummarizeQuerySchema,
-            self.summarize_query,
-        )
-        serve(
-            "/v1/pw_ai_aggregate_responses",
-            self.AggregateQuerySchema,
-            self.aggregate_query,
-        )
-
-
 @click.command()
 @click.option("--config_file", default="config.yaml", help="Config file to be used.")
 def run(
@@ -257,8 +66,6 @@ def run(
         cache_strategy=DiskCache(),
     )
 
-    text_splitter = TokenCountSplitter(max_tokens=400)
-
     chat = llms.OpenAIChat(
         model=GPT_MODEL,
         retry_strategy=ExponentialBackoffRetryStrategy(max_retries=6),
@@ -269,31 +76,18 @@ def run(
     host_config = configuration["host_config"]
     host, port = host_config["host"], host_config["port"]
 
-    rag_app = PathwayRAG(
+    doc_store = VectorStoreServer(
         *data_sources(configuration["sources"]),
         embedder=embedder,
-        llm=chat,
-        splitter=text_splitter,
+        splitter=splitters.TokenCountSplitter(max_tokens=400),
+        parser=parsers.ParseUnstructured(),
     )
 
-    rag_app.build_server(host=host, port=port)
+    rag_app = BaseRAGQuestionAnswerer(llm=chat, indexer=doc_store)
 
-    if configuration["cache_options"].get("with_cache", True):
-        print("Running with cache enabled.")
-        cache_backend = pw.persistence.Backend.filesystem(
-            configuration["cache_options"].get("cache_folder", "./Cache")
-        )
-        persistence_config = pw.persistence.Config.simple_config(
-            cache_backend,
-            persistence_mode=pw.PersistenceMode.UDF_CACHING,
-        )
-    else:
-        persistence_config = None
+    rag_app.build_server(host=host, port=port)
 
-    pw.run(
-        monitoring_level=pw.MonitoringLevel.NONE,
-        persistence_config=persistence_config,
-    )
+    rag_app.run_server(with_cache=True, terminate_on_error=False)
 
 
 if __name__ == "__main__":
diff --git a/examples/pipelines/demo-question-answering/requirements.txt b/examples/pipelines/demo-question-answering/requirements.txt
index 40262e5..f8fd3e5 100644
--- a/examples/pipelines/demo-question-answering/requirements.txt
+++ b/examples/pipelines/demo-question-answering/requirements.txt
@@ -1,5 +1,3 @@
-pathway==0.8.4
+pathway[all]>=0.11.0
 python-dotenv==1.0.1
-litellm==1.17.3
-unstructured[all-docs]==0.10.28
 mpmath==1.3.0
diff --git a/examples/pipelines/gpt_4o_multimodal_rag/README.md b/examples/pipelines/gpt_4o_multimodal_rag/README.md
index 60d2aa0..3eaf862 100644
--- a/examples/pipelines/gpt_4o_multimodal_rag/README.md
+++ b/examples/pipelines/gpt_4o_multimodal_rag/README.md
@@ -1,39 +1,80 @@
-## Multimodal RAG with Pathway
+## Multimodal RAG with Pathway: Process your Financial Reports and Tables with GPT-4o
 
-Get started with multimodal RAG using `GPT-4o` and Pathway. This showcase demonstrates a document processing pipeline that utilizes LLMs in the parsing stage. Pathway extracts information from unstructured financial documents, updating results as documents change or new ones arrive. 
+Get started with multimodal RAG using `GPT-4o` and Pathway. This showcase demonstrates how you can launch a document processing pipeline that utilizes `GPT-4o` in the parsing stage. Pathway extracts information from unstructured financial documents in your folders, updating results as documents change or new ones arrive. 
+
+Using this approach, you can make your AI application run in permanent connection with your drive, in sync with your documents which include visually formatted elements: tables, charts, etc. 
 
 We specifically use `GPT-4o` to improve the table data extraction accuracy and demonstrate how this approach outperforms the industry-standard RAG toolkits.
 
-We focused on the finance domain because financial documents often rely heavily on tables in various forms. This showcase highlights the limitations of traditional RAG setups, which struggle to answer questions based on table data. By contrast, our multimodal RAG approach excels in extracting accurate information from tables.
+In this showcase, we focused on the finance domain because financial documents often rely heavily on tables in various forms. This showcase highlights the limitations of traditional RAG setups, which struggle to answer questions based on table data. By contrast, our multimodal RAG approach excels in extracting accurate information from tables.
 
-We use the `GPT-4o` in two parts:
-- Extracting and understanding the tables inside the PDF
-- Answering questions with the retrieved context
+## Using the service
 
-![Architecture](gpt4o.gif)
+Follow the [steps below](#running-the-app) to set up the service. This will create a REST endpoint on your selected host and port, running a service that is connected to your file folder, and ready to answer your questions. There are no extra dependencies.
 
-## Introduction
+In this demo, we run the service on localhost (`0.0.0.0:8000`). You can connect your own front end or application to this endpoint. Here, we test the service with `curl`.
 
-We will use `BaseRAGQuestionAnswerer` provided under `pathway.xpacks` to get started on our RAG application with very minimal overhead. This module brings together the foundational building bricks for the RAG application. 
+First, let's check the files contained in your folder are currently indexed:
+```bash
+curl -X 'POST'   'http://0.0.0.0:8000/v1/pw_list_documents'   -H 'accept: */*'   -H 'Content-Type: application/json'
+```
 
-It includes ingesting the data from the sources, calling the LLM, parsing and chunking the documents, creating and querying the database (index) and also serving the app on an endpoint. 
+This will return the list of files e.g. if you start with the [data folder](./data) provided in the demo, the answer will be as follows:
+> `[{"modified_at": 1715765613, "owner": "berke", "path": "data/20230203_alphabet_10K.pdf", "seen_at": 1715768762}]`
 
-For more advanced RAG options, make sure to check out [rerankers](https://pathway.com/developers/api-docs/pathway-xpacks-llm/rerankers) and the [adaptive rag example](../adaptive-rag/).
+In the default app setup, the connected folder is a local file folder. You can add more folders and file sources, such as [Google Drive](https://pathway.com/developers/user-guide/connectors/gdrive-connector/#google-drive-connector) or [Sharepoint](https://pathway.com/developers/user-guide/connecting-to-data/connectors/#tutorials), by adding a line of code to the template.
 
+If you now add or remove files from your connected folder, you can repeat the request and see the index file list has been updated automatically. You can look into the logs of the service to see the progress of the indexing of new and modified files. PDF files of 100 pages should normally take under 10 seconds to sync, and the indexing parallelizes if multiple files are added at a single time.
 
-## Modifying the code
+Now, let's ask a question from one of the tables inside the report. In our tests, regular RAG applications struggled with the tables and couldn't answer to this question correctly.
 
-Under the main function, we define:
-- input folders
-- LLM
-- embedder
-- index
-- host and port to run the app
-- run options (caching, cache folder)
+```bash
+curl -X 'POST'   'http://0.0.0.0:8000/v1/pw_ai_answer'   -H 'accept: */*'   -H 'Content-Type: application/json'   -d '{
+  "prompt": "How much was Operating lease cost in 2021?" 
+}'
+```
+> `$2,699 million`
 
-You can modify any of the components by checking the options from the imported modules: `from pathway.xpacks.llm import embedders, llms, parsers, splitters`.
+This response was correct thanks to the initial LLM parsing step. 
+When we check the context that is sent to the LLM, we see that Pathway included the table in the context where as other RAG applications failed to include the table.
 
-It is also possible to easily create new components by extending the [`pw.UDF`](https://pathway.com/developers/user-guide/data-transformation/user-defined-functions) class and implementing the `__wrapped__` function.
+The following GIF shows a snippet from our experiments:
+
+![Regular RAG vs Pathway Multimodal comparison](gpt4o_with_pathway_comparison.gif)
+
+Let's try another one,
+
+```bash
+curl -X 'POST'   'http://0.0.0.0:8000/v1/pw_ai_answer'   -H 'accept: */*'   -H 'Content-Type: application/json'   -d '{
+  "prompt": "What is the operating income for the fiscal year of 2022?" 
+}'
+```
+> `$74,842 million`
+
+Another example, let's ask a question that can be answered from the table on the 48th page of the PDF.
+
+```bash
+curl -X 'POST'   'http://0.0.0.0:8000/v1/pw_ai_answer'   -H 'accept: */*'   -H 'Content-Type: application/json'   -d '{
+  "prompt": "How much was Marketable securities worth in 2021 in the consolidated balance sheets?"                                              
+}'
+```
+> `$118,704 million`
+
+Looking good!
+
+## Architecture
+
+We use `GPT-4o` in two separate places in the flow of data:
+- Extracting and understanding the tables inside the PDF
+- Answering questions with the retrieved context
+
+![Architecture](gpt4o.gif)
+
+We will use the `BaseRAGQuestionAnswerer` class provided under `pathway.xpacks` to get started on our RAG application with minimal overhead. This module brings together the foundational building bricks for the RAG application. 
+
+It includes ingesting the data from the sources, calling the LLM, parsing and chunking the documents, creating and querying the database (index) and also serving the app on an endpoint. 
+
+For more advanced RAG options, make sure to check out [rerankers](https://pathway.com/developers/api-docs/pathway-xpacks-llm/rerankers) and the [adaptive rag example](../adaptive-rag/).
 
 
 ## Running the app
@@ -42,7 +83,7 @@ It is also possible to easily create new components by extending the [`pw.UDF`](
 
 First, make sure to install the requirements by running:
 ```bash
-pip install -r requirements.txt
+pip install -r requirements.txt -U
 ```
 Then, create a `.env` file in this directory and put your API key with `OPENAI_API_KEY=sk-...`, or add the `api_key` argument to `OpenAIChat` and `OpenAIEmbedder`. 
 
@@ -72,51 +113,20 @@ docker build -t rag .
 docker run -v `pwd`/data:/app/data -p 8000:8000 rag
 ```
 
-## Using the app
-
-After running the app, you will see the logs about the files being processed, after the logs stop streaming, the app is ready to receive requests.
-
-First, let's check the files that are currently indexed:
-```bash
-curl -X 'POST'   'http://0.0.0.0:8000/v1/pw_list_documents'   -H 'accept: */*'   -H 'Content-Type: application/json'
-```
-
-This will return the list of files as follows:
-> `[{"modified_at": 1715765613, "owner": "berke", "path": "data/20230203_alphabet_10K.pdf", "seen_at": 1715768762}]`
-
-Now, let's ask a question from one of the tables inside the report. In our tests, regular RAG applications struggled with the tables and couldn't answer to this question correctly.
-
-```bash
-curl -X 'POST'   'http://0.0.0.0:8000/v1/pw_ai_answer'   -H 'accept: */*'   -H 'Content-Type: application/json'   -d '{
-  "prompt": "How much was Operating lease cost in 2021?" 
-}'
-```
-> `$2,699 million`
-
-This response was correct thanks to the initial LLM parsing step. 
-When we check the context that is sent to the LLM, we see that Pathway included the table in the context where as other RAG applications failed to include the table.
-
-The following GIF shows a snippet from our experiments:
-
-![Regular RAG vs Pathway Multimodal comparison](gpt4o_with_pathway_comparison.gif)
+## Modifying the code
 
-Let's try another one,
+In the main function of `app.py`, we define:
+- input folders
+- LLM
+- embedder
+- index
+- host and port to run the app
+- run options (caching, cache folder)
 
-```bash
-curl -X 'POST'   'http://0.0.0.0:8000/v1/pw_ai_answer'   -H 'accept: */*'   -H 'Content-Type: application/json'   -d '{
-  "prompt": "What is the operating income for the fiscal year of 2022?" 
-}'
-```
-> `$74,842 million`
+You can modify any of the components by checking the options from the imported modules: `from pathway.xpacks.llm import embedders, llms, parsers, splitters`.
 
-Another example, let's ask a question that can be answered from the table on the 48th page of the PDF.
+It is also possible to easily create new components by extending the [`pw.UDF`](https://pathway.com/developers/user-guide/data-transformation/user-defined-functions) class and implementing the `__wrapped__` function.
 
-```bash
-curl -X 'POST'   'http://0.0.0.0:8000/v1/pw_ai_answer'   -H 'accept: */*'   -H 'Content-Type: application/json'   -d '{
-  "prompt": "How much was Marketable securities worth in 2021 in the consolidated balance sheets?"                                              
-}'
-```
-> `$118,704 million`
 
 ## Conclusion
 
@@ -134,6 +144,6 @@ RAG applications are most effective when tailored to your specific use case. Her
 - Indexing and retrieval strategies: Choose the most efficient approach for your data and search needs.
 - User Interface (UI): Design a user-friendly interface that caters to your end users' workflows.
 
-Ready to Get Started?
+Ready to Get Started? 
 
-Let's discuss how we can help you build a powerful, customized RAG application. [Reach us here!](https://pathway.com/solutions/enterprise-generative-ai?modal=requestdemo)
+Let's discuss how we can help you build a powerful, customized RAG application. [Reach us here to talk or request a demo!](https://pathway.com/solutions/enterprise-generative-ai?modal=requestdemo)
diff --git a/examples/pipelines/gpt_4o_multimodal_rag/app.py b/examples/pipelines/gpt_4o_multimodal_rag/app.py
index cbfafd6..66b7deb 100644
--- a/examples/pipelines/gpt_4o_multimodal_rag/app.py
+++ b/examples/pipelines/gpt_4o_multimodal_rag/app.py
@@ -9,7 +9,7 @@
 import pathway as pw
 from dotenv import load_dotenv
 from pathway.udfs import DiskCache, ExponentialBackoffRetryStrategy
-from pathway.xpacks.llm import embedders, llms  # , parsers, splitters
+from pathway.xpacks.llm import embedders, llms, prompts  # , parsers, splitters
 from pathway.xpacks.llm.question_answering import BaseRAGQuestionAnswerer
 from pathway.xpacks.llm.vector_store import VectorStoreServer
 from src.ext_parsers import OpenParse
@@ -23,33 +23,6 @@
 )
 
 
-class RAGApp(BaseRAGQuestionAnswerer):
-    @pw.table_transformer
-    def pw_ai_query(self, pw_ai_queries: pw.Table) -> pw.Table:
-        """Main function for RAG applications that answer questions
-        based on available information."""
-
-        pw_ai_results = pw_ai_queries + self.indexer.retrieve_query(
-            pw_ai_queries.select(
-                metadata_filter=pw.this.filters,
-                filepath_globpattern=pw.cast(str | None, None),
-                query=pw.this.prompt,
-                k=6,
-            )
-        ).select(
-            docs=pw.this.result,
-        )
-
-        pw_ai_results += pw_ai_results.select(
-            rag_prompt=self.long_prompt_template(pw.this.prompt, pw.this.docs),
-        )
-
-        pw_ai_results += pw_ai_results.select(
-            result=self.llm(llms.prompt_chat_single_qa(pw.this.rag_prompt))
-        )
-        return pw_ai_results
-
-
 if __name__ == "__main__":
     path = "./data/20230203_alphabet_10K.pdf"
 
@@ -83,9 +56,11 @@ def pw_ai_query(self, pw_ai_queries: pw.Table) -> pw.Table:
         parser=parser,
     )
 
-    app = RAGApp(
+    app = BaseRAGQuestionAnswerer(
         llm=chat,
         indexer=doc_store,
+        search_topk=6,
+        short_prompt_template=prompts.prompt_qa,
     )
 
     app.build_server(host=app_host, port=app_port)
diff --git a/examples/pipelines/gpt_4o_multimodal_rag/requirements.txt b/examples/pipelines/gpt_4o_multimodal_rag/requirements.txt
index 0f64f9c..c832808 100644
--- a/examples/pipelines/gpt_4o_multimodal_rag/requirements.txt
+++ b/examples/pipelines/gpt_4o_multimodal_rag/requirements.txt
@@ -1,4 +1,4 @@
-pathway[xpack-llm]
+pathway[xpack-llm]>=0.11.0
 openparse==0.5.6
 python-dotenv==1.0.1
 unstructured[all-docs]==0.10.28