From 67856227f483213ac1029bd384e7c98c24bf6cb4 Mon Sep 17 00:00:00 2001
From: Dmitrii Cherkasov <dmitrii.cherkasov@oracle.com>
Date: Thu, 20 Feb 2025 15:34:28 -0800
Subject: [PATCH 1/2] Adds documentation for the AQUA HTTP client

---
 .../large_language_model/aqua_client.rst      | 132 ++++++++++++++++++
 .../user_guide/large_language_model/index.rst |  30 +---
 .../large_language_model/training_llm.rst     |   9 +-
 3 files changed, 145 insertions(+), 26 deletions(-)
 create mode 100644 docs/source/user_guide/large_language_model/aqua_client.rst

diff --git a/docs/source/user_guide/large_language_model/aqua_client.rst b/docs/source/user_guide/large_language_model/aqua_client.rst
new file mode 100644
index 000000000..70a163e10
--- /dev/null
+++ b/docs/source/user_guide/large_language_model/aqua_client.rst
@@ -0,0 +1,132 @@
+AI Quick Actions HTTP Client
+****************************
+
+.. versionadded:: 2.13.0
+
+The AI Quick Actions client is a centralized, reusable component for interacting with the OCI Model Deployment service.
+
+**Implementation Highlights:**
+
+- Offers both synchronous (Client) and asynchronous (AsyncClient) implementations using the ``httpx`` library.
+- Implements a custom OCIAuth authentication mechanism that integrates with OCI Signer for secure and compliant communications.
+- Utilizes the ``tenacity`` library to enable robust retry behavior when handling transient errors.
+
+Authentication
+==============
+
+The AI Quick Actions client supports the same authentication methods as other OCI services, including API Key, session token, instance principal, and resource principal. For additional details, please refer to the `authentication guide <https://accelerated-data-science.readthedocs.io/en/latest/user_guide/cli/authentication.html>`_. Ensure you have the necessary `access policies <https://docs.oracle.com/en-us/iaas/data-science/using/model-dep-policies-auth.htm>`_ to connect to the OCI Data Science Model Deployment endpoint.
+
+Usage
+=====
+
+Sync Usage
+----------
+
+**Text Completion**
+
+.. code-block:: python3
+
+    from ads.aqua import Client
+    ads.set_auth(auth="security_token", profile="<replace-with-your-profile>")
+
+    client = Client(endpoint="https://<MD_OCID>/predict")
+    response = client.generate(
+        prompt="Tell me a joke",
+        payload={"model": "odsc-llm"},
+        stream=False,
+    )
+    print(response)
+
+**Chat Completion**
+
+.. code-block:: python3
+
+    from ads.aqua import Client
+    ads.set_auth(auth="security_token", profile="<replace-with-your-profile>")
+
+    client = Client(endpoint="https://<MD_OCID>/predict")
+    response = client.chat(
+        messages=[{"role": "user", "content": "Tell me a joke."}],
+        payload={"model": "odsc-llm"},
+        stream=False,
+    )
+    print(response)
+
+**Streaming**
+
+.. code-block:: python3
+
+    from ads.aqua import Client
+    ads.set_auth(auth="security_token", profile="<replace-with-your-profile>")
+
+    client = Client(endpoint="https://<MD_OCID>/predict")
+    response = client.chat(
+        messages=[{"role": "user", "content": "Tell me a joke."}],
+        payload={"model": "odsc-llm"},
+        stream=True,
+    )
+
+    for chunk in response:
+        print(chunk)
+
+**Embedding**
+
+.. code-block:: python3
+
+    from ads.aqua import Client
+    ads.set_auth(auth="security_token", profile="<replace-with-your-profile>")
+
+    client = Client(endpoint="https://<MD_OCID>/predict")
+    response = client.embeddings(
+        input=["one", "two"]
+    )
+    print(response)
+
+
+Async Usage
+-----------
+
+The following examples demonstrate how to perform the same operations using the asynchronous client with Python's async/await syntax.
+
+**Text Completion**
+
+.. code-block:: python3
+
+    from ads.aqua import AsyncClient
+    ads.set_auth(auth="security_token", profile="<replace-with-your-profile>")
+
+    client = AsyncClient(endpoint="https://<MD_OCID>/predict")
+    response = await client.generate(
+        prompt="Tell me a joke",
+        payload={"model": "odsc-llm"},
+        stream=False,
+    )
+    print(response)
+
+**Streaming**
+
+.. code-block:: python3
+
+    from ads.aqua import AsyncClient
+    ads.set_auth(auth="security_token", profile="<replace-with-your-profile>")
+
+    client = AsyncClient(endpoint="https://<MD_OCID>/predict")
+    async for chunk in await client.generate(
+        prompt="Tell me a joke",
+        payload={"model": "odsc-llm"},
+        stream=True,
+    ):
+        print(chunk)
+
+**Embedding**
+
+.. code-block:: python3
+
+    from ads.aqua import AsyncClient
+    ads.set_auth(auth="security_token", profile="<replace-with-your-profile>")
+
+    client = AsyncClient(endpoint="https://<MD_OCID>/predict")
+    response = await client.embeddings(
+        input=["one", "two"]
+    )
+    print(response)
diff --git a/docs/source/user_guide/large_language_model/index.rst b/docs/source/user_guide/large_language_model/index.rst
index 2ba43655c..56d72b2ef 100644
--- a/docs/source/user_guide/large_language_model/index.rst
+++ b/docs/source/user_guide/large_language_model/index.rst
@@ -4,39 +4,19 @@
 Large Language Model
 ####################
 
-Oracle Cloud Infrastructure (OCI) provides fully managed infrastructure to work with Large Language Model (LLM).
+Oracle Cloud Infrastructure (OCI) `Data Science <https://www.oracle.com/artificial-intelligence/data-science>`_ is a fully managed, serverless platform that empowers data science teams to build, train, and manage machine learning models on Oracle Cloud Infrastructure.
 
-Train and Deploy LLM
-********************
-You can train LLM at scale with multi-node and multi-GPU using `Data Science Jobs (Jobs) <https://docs.oracle.com/en-us/iaas/data-science/using/jobs-about.htm>`_, and deploy it with `Data Science Model Deployment (Model Deployments) <https://docs.oracle.com/en-us/iaas/data-science/using/model-dep-about.htm>`_. The following blog posts show examples training and deploying Llama2 models:
+The platform features `AI Quick Actions <https://docs.oracle.com/en-us/iaas/data-science/using/ai-quick-actions.htm>`_, which enable you to deploy, evaluate, and fine-tune foundation models directly within OCI Data Science. Designed for users eager to quickly harness AI capabilities, these actions provide a streamlined, code-free, and efficient environment for working with foundation models. You can access AI Quick Actions directly from the Data Science Notebook.
 
-* `Multi-GPU multinode fine-tuning Llama2 on OCI Data Science <https://blogs.oracle.com/ai-and-datascience/post/multi-gpu-multi-node-finetuning-llama2-oci>`_
-* `Deploy Llama 2 in OCI Data Science <https://blogs.oracle.com/ai-and-datascience/post/llama2-oci-data-science-cloud-platform>`_
-* `Quantize and deploy Llama 2 70B on cost-effective NVIDIA A10 Tensor Core GPUs in OCI Data Science <https://blogs.oracle.com/ai-and-datascience/post/quantize-deploy-llama2-70b-costeffective-a10s-oci>`_
-
-
-Integration with LangChain
-**************************
-ADS is designed to work with LangChain, enabling developers to incorporate various LangChain components and models deployed on OCI seamlessly into their applications. Additionally, ADS can package LangChain applications and deploy it as a REST API endpoint using OCI Data Science Model Deployment.
-
-* `Bridging cloud and conversational AI: LangChain and OCI Data Science platform <https://blogs.oracle.com/ai-and-datascience/post/cloud-conversational-ai-langchain-oci-data-science>`_
-* `Deploy LangChain applications as OCI model deployments <https://blogs.oracle.com/ai-and-datascience/post/deploy-langchain-application-as-model-deployment>`_
-
-
-.. admonition:: Installation
-  :class: note
-
-  Install ADS and other dependencies for LLM integrations.
-
-  .. code-block:: bash
-
-    $ python3 -m pip install "oracle-ads[llm]"
+Detailed documentation on deploying LLM models in OCI Data Science using AI Quick Actions is available `here <https://github.com/oracle-samples/oci-data-science-ai-samples/blob/main/ai-quick-actions/model-deployment-tips.md>`_ and `here <https://docs.oracle.com/en-us/iaas/data-science/using/ai-quick-actions-model-deploy.htm>`_.
 
+This section provides comprehensive information on integrating OCI with **LangChain, Autogen, LlamaIndex**, and other third-party **LLM frameworks**.
 
 
 .. toctree::
     :maxdepth: 2
 
+    aqua_client
     training_llm
     langchain_models
     autogen_integration
diff --git a/docs/source/user_guide/large_language_model/training_llm.rst b/docs/source/user_guide/large_language_model/training_llm.rst
index 8b5ee3d71..7af9d26e1 100644
--- a/docs/source/user_guide/large_language_model/training_llm.rst
+++ b/docs/source/user_guide/large_language_model/training_llm.rst
@@ -1,6 +1,14 @@
 Training Large Language Model
 *****************************
 
+.. admonition:: Note
+  :class: note
+
+  The example provided below is obsolete. Instead, use the `AI Quick Actions <https://docs.oracle.com/en-us/iaas/data-science/using/ai-quick-actions.htm>`_, which can be used to deploy, evaluate, and fine-tune foundation models in OCI Data Science.
+
+
+
+
 .. versionadded:: 2.8.8
 
 Oracle Cloud Infrastructure (OCI) `Data Science Jobs (Jobs) <https://docs.oracle.com/en-us/iaas/data-science/using/jobs-about.htm>`_
@@ -55,4 +63,3 @@ The same training script also support Parameter-Efficient Fine-Tuning (PEFT). Yo
     torchrun llama_finetuning.py --enable_fsdp --use_peft --peft_method lora \
     --pure_bf16 --batch_size_training 1 \
     --model_name meta-llama/Llama-2-7b-hf --output_dir /home/datascience/outputs
-

From 489e4031fc7a705b7ad15208e08b373ded1f981b Mon Sep 17 00:00:00 2001
From: Dmitrii Cherkasov <dmitrii.cherkasov@oracle.com>
Date: Fri, 21 Feb 2025 15:40:10 -0800
Subject: [PATCH 2/2] Update aqua_client.rst

---
 docs/source/user_guide/large_language_model/aqua_client.rst | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/docs/source/user_guide/large_language_model/aqua_client.rst b/docs/source/user_guide/large_language_model/aqua_client.rst
index 70a163e10..ad3d7038e 100644
--- a/docs/source/user_guide/large_language_model/aqua_client.rst
+++ b/docs/source/user_guide/large_language_model/aqua_client.rst
@@ -7,9 +7,8 @@ The AI Quick Actions client is a centralized, reusable component for interacting
 
 **Implementation Highlights:**
 
-- Offers both synchronous (Client) and asynchronous (AsyncClient) implementations using the ``httpx`` library.
-- Implements a custom OCIAuth authentication mechanism that integrates with OCI Signer for secure and compliant communications.
-- Utilizes the ``tenacity`` library to enable robust retry behavior when handling transient errors.
+- Offers both synchronous (Client) and asynchronous (AsyncClient)
+- Integrates with OCI Authentication patterns
 
 Authentication
 ==============