From f648f63bee7d33ee61e5a5c48d397b4e61b25c03 Mon Sep 17 00:00:00 2001
From: nguyenhoangthuan99 <thuanhppro12@gmail.com>
Date: Fri, 8 Nov 2024 12:19:09 +0700
Subject: [PATCH 1/5] chore/embeddings-docs

---
 docs/docs/capabilities/embeddings.md |   7 +-
 docs/static/openapi/cortex.json      | 849 ++++++++++++++++++++++-----
 2 files changed, 698 insertions(+), 158 deletions(-)

diff --git a/docs/docs/capabilities/embeddings.md b/docs/docs/capabilities/embeddings.md
index 2c2fb4d54..12c6d05c6 100644
--- a/docs/docs/capabilities/embeddings.md
+++ b/docs/docs/capabilities/embeddings.md
@@ -1,7 +1,8 @@
 ---
 title: Embeddings
 ---
-
 :::info
-🚧 Cortex is currently under development, and this page is a stub for future development. 
-:::
\ No newline at end of file
+🚧 Cortex is currently under development, and this page is a stub for future development.
+:::
+
+cortex.cpp now support embeddings endpoint with fully OpenAI compatible.
diff --git a/docs/static/openapi/cortex.json b/docs/static/openapi/cortex.json
index 763337b5c..302ea984f 100644
--- a/docs/static/openapi/cortex.json
+++ b/docs/static/openapi/cortex.json
@@ -22,7 +22,9 @@
             "description": "The assistant has been successfully created."
           }
         },
-        "tags": ["Assistants"]
+        "tags": [
+          "Assistants"
+        ]
       },
       "get": {
         "operationId": "AssistantsController_findAll",
@@ -81,7 +83,9 @@
             }
           }
         },
-        "tags": ["Assistants"]
+        "tags": [
+          "Assistants"
+        ]
       }
     },
     "/v1/assistants/{id}": {
@@ -112,7 +116,9 @@
             }
           }
         },
-        "tags": ["Assistants"]
+        "tags": [
+          "Assistants"
+        ]
       },
       "delete": {
         "operationId": "AssistantsController_remove",
@@ -141,7 +147,9 @@
             }
           }
         },
-        "tags": ["Assistants"]
+        "tags": [
+          "Assistants"
+        ]
       }
     },
     "/healthz": {
@@ -158,7 +166,9 @@
             }
           }
         },
-        "tags": ["Server"]
+        "tags": [
+          "Server"
+        ]
       }
     },
     "/processManager/destroy": {
@@ -175,7 +185,112 @@
             }
           }
         },
-        "tags": ["Server"]
+        "tags": [
+          "Server"
+        ]
+      }
+    },
+    "v1/embeddings": {
+      "post": {
+        "summary": "Create embeddings",
+        "description": "Creates an embedding vector representing the input text.",
+        "requestBody": {
+          "required": true,
+          "content": {
+            "application/json": {
+              "schema": {
+                "type": "object",
+                "properties": {
+                  "input": {
+                    "oneOf": [
+                      {
+                        "type": "string"
+                      },
+                      {
+                        "type": "array",
+                        "items": {
+                          "type": "string"
+                        }
+                      },
+                      {
+                        "type": "array",
+                        "items": {
+                          "type": "integer"
+                        }
+                      },
+                      {
+                        "type": "array",
+                        "items": {
+                          "type": "array",
+                          "items": {
+                            "type": "integer"
+                          }
+                        }
+                      }
+                    ],
+                    "description": "Input text to embed, encoded as a string or array of tokens. Cannot be empty."
+                  },
+                  "model": {
+                    "type": "string",
+                    "description": "ID of the model to use.",
+                    "example": "text-embedding-ada-002"
+                  },
+                  "encoding_format": {
+                    "type": "string",
+                    "description": "The format to return the embeddings in.",
+                    "enum": [
+                      "float",
+                      "base64"
+                    ],
+                    "default": "float"
+                  }
+                },
+                "required": [
+                  "input",
+                  "model"
+                ]
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "A list of embedding vectors",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "data": {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "index": {
+                            "type": "integer",
+                            "description": "The index of the embedding in the list of embeddings."
+                          },
+                          "embedding": {
+                            "type": "array",
+                            "items": {
+                              "type": "number"
+                            },
+                            "description": "The embedding vector, which is a list of floats."
+                          },
+                          "object": {
+                            "type": "string",
+                            "description": "The object type, which is always 'embedding'.",
+                            "example": "embedding"
+                          }
+                        }
+                      }
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
       }
     },
     "/v1/chat/completions": {
@@ -215,7 +330,9 @@
             }
           }
         },
-        "tags": ["Chat"]
+        "tags": [
+          "Chat"
+        ]
       }
     },
     "/v1/models/pull": {
@@ -314,10 +431,14 @@
             }
           }
         },
-        "tags": ["Pulling Models"]
+        "tags": [
+          "Pulling Models"
+        ]
       },
       "delete": {
-        "tags": ["Pulling Models"],
+        "tags": [
+          "Pulling Models"
+        ],
         "summary": "Stop model download",
         "description": "Stops the download of a model with the corresponding taskId provided in the request body",
         "operationId": "ModelsController_stopModelDownload",
@@ -333,7 +454,9 @@
                     "description": "The unique identifier of the download task to be stopped"
                   }
                 },
-                "required": ["taskId"]
+                "required": [
+                  "taskId"
+                ]
               }
             }
           }
@@ -428,7 +551,9 @@
             }
           }
         },
-        "tags": ["Running Models"]
+        "tags": [
+          "Running Models"
+        ]
       }
     },
     "/v1/models/start": {
@@ -461,7 +586,9 @@
             }
           }
         },
-        "tags": ["Running Models"]
+        "tags": [
+          "Running Models"
+        ]
       }
     },
     "/v1/models/stop": {
@@ -494,7 +621,9 @@
             }
           }
         },
-        "tags": ["Running Models"]
+        "tags": [
+          "Running Models"
+        ]
       }
     },
     "/v1/models/{id}": {
@@ -525,7 +654,9 @@
             }
           }
         },
-        "tags": ["Running Models"]
+        "tags": [
+          "Running Models"
+        ]
       },
       "delete": {
         "operationId": "ModelsController_remove",
@@ -554,7 +685,9 @@
             }
           }
         },
-        "tags": ["Running Models"]
+        "tags": [
+          "Running Models"
+        ]
       }
     },
     "/v1/models/{model}": {
@@ -594,7 +727,9 @@
             }
           }
         },
-        "tags": ["Running Models"]
+        "tags": [
+          "Running Models"
+        ]
       }
     },
     "/v1/models/import": {
@@ -635,14 +770,18 @@
             }
           }
         },
-        "tags": ["Pulling Models"]
+        "tags": [
+          "Pulling Models"
+        ]
       }
     },
     "/v1/threads": {
       "post": {
         "operationId": "ThreadsController_create",
         "summary": "Create thread",
-        "tags": ["Threads"],
+        "tags": [
+          "Threads"
+        ],
         "description": "Creates a new thread.",
         "parameters": [],
         "requestBody": {
@@ -671,7 +810,9 @@
       "get": {
         "operationId": "ThreadsController_findAll",
         "summary": "List threads",
-        "tags": ["Threads"],
+        "tags": [
+          "Threads"
+        ],
         "description": "Lists all the available threads along with its configurations.",
         "parameters": [],
         "responses": {
@@ -695,7 +836,9 @@
       "get": {
         "operationId": "ThreadsController_retrieveMessage",
         "summary": "Retrieve message",
-        "tags": ["Messages"],
+        "tags": [
+          "Messages"
+        ],
         "description": "Retrieves a message.",
         "parameters": [
           {
@@ -731,7 +874,9 @@
       "post": {
         "operationId": "ThreadsController_updateMessage",
         "summary": "Modify message",
-        "tags": ["Messages"],
+        "tags": [
+          "Messages"
+        ],
         "description": "Modifies a message.",
         "responses": {
           "201": {
@@ -778,7 +923,9 @@
         "operationId": "ThreadsController_deleteMessage",
         "summary": "Delete message",
         "description": "Deletes a message.",
-        "tags": ["Messages"],
+        "tags": [
+          "Messages"
+        ],
         "parameters": [
           {
             "name": "thread_id",
@@ -815,7 +962,9 @@
       "get": {
         "operationId": "ThreadsController_getMessagesOfThread",
         "summary": "List messages",
-        "tags": ["Messages"],
+        "tags": [
+          "Messages"
+        ],
         "description": "Returns a list of messages for a given thread.",
         "parameters": [
           {
@@ -883,7 +1032,9 @@
       "post": {
         "operationId": "ThreadsController_createMessageInThread",
         "summary": "Create message",
-        "tags": ["Messages"],
+        "tags": [
+          "Messages"
+        ],
         "description": "Create a message.",
         "responses": {
           "201": {
@@ -924,7 +1075,9 @@
         "operationId": "ThreadsController_cleanThread",
         "summary": "Clean thread",
         "description": "Deletes all messages in a thread.",
-        "tags": ["Threads"],
+        "tags": [
+          "Threads"
+        ],
         "parameters": [
           {
             "name": "thread_id",
@@ -946,7 +1099,9 @@
       "get": {
         "operationId": "ThreadsController_retrieveThread",
         "summary": "Retrieve thread",
-        "tags": ["Threads"],
+        "tags": [
+          "Threads"
+        ],
         "description": "Retrieves a thread.",
         "parameters": [
           {
@@ -974,7 +1129,9 @@
       "post": {
         "operationId": "ThreadsController_modifyThread",
         "summary": "Modify thread",
-        "tags": ["Threads"],
+        "tags": [
+          "Threads"
+        ],
         "description": "Modifies a thread.",
         "parameters": [
           {
@@ -1015,7 +1172,9 @@
       "delete": {
         "operationId": "ThreadsController_remove",
         "summary": "Delete thread",
-        "tags": ["Threads"],
+        "tags": [
+          "Threads"
+        ],
         "description": "Deletes a specific thread defined by a thread `id` .",
         "parameters": [
           {
@@ -1052,7 +1211,9 @@
             "description": ""
           }
         },
-        "tags": ["System"]
+        "tags": [
+          "System"
+        ]
       },
       "get": {
         "operationId": "SystemController_get",
@@ -1064,7 +1225,9 @@
             "description": "Ok"
           }
         },
-        "tags": ["System"]
+        "tags": [
+          "System"
+        ]
       }
     },
     "/v1/system/events/download": {
@@ -1085,7 +1248,9 @@
             }
           }
         },
-        "tags": ["System"]
+        "tags": [
+          "System"
+        ]
       }
     },
     "/v1/system/events/model": {
@@ -1106,7 +1271,9 @@
             }
           }
         },
-        "tags": ["System"]
+        "tags": [
+          "System"
+        ]
       }
     },
     "/v1/system/events/resources": {
@@ -1127,7 +1294,9 @@
             }
           }
         },
-        "tags": ["System"]
+        "tags": [
+          "System"
+        ]
       }
     },
     "/v1/engines/{name}": {
@@ -1142,7 +1311,11 @@
             "required": true,
             "schema": {
               "type": "string",
-              "enum": ["llama-cpp", "onnxruntime", "tensorrt-llm"],
+              "enum": [
+                "llama-cpp",
+                "onnxruntime",
+                "tensorrt-llm"
+              ],
               "default": "llama-cpp"
             },
             "description": "The type of engine"
@@ -1189,7 +1362,9 @@
             }
           }
         },
-        "tags": ["Engines"]
+        "tags": [
+          "Engines"
+        ]
       },
       "post": {
         "summary": "Install an engine",
@@ -1201,7 +1376,11 @@
             "required": true,
             "schema": {
               "type": "string",
-              "enum": ["llama-cpp", "onnxruntime", "tensorrt-llm"],
+              "enum": [
+                "llama-cpp",
+                "onnxruntime",
+                "tensorrt-llm"
+              ],
               "default": "llama-cpp"
             },
             "description": "The type of engine"
@@ -1235,7 +1414,9 @@
             }
           }
         },
-        "tags": ["Engines"]
+        "tags": [
+          "Engines"
+        ]
       },
       "delete": {
         "summary": "Uninstall an engine",
@@ -1247,7 +1428,11 @@
             "required": true,
             "schema": {
               "type": "string",
-              "enum": ["llama-cpp", "onnxruntime", "tensorrt-llm"],
+              "enum": [
+                "llama-cpp",
+                "onnxruntime",
+                "tensorrt-llm"
+              ],
               "default": "llama-cpp"
             },
             "description": "The type of engine"
@@ -1324,7 +1509,9 @@
             }
           }
         },
-        "tags": ["Engines"]
+        "tags": [
+          "Engines"
+        ]
       }
     },
     "/v1/engines/{name}/default": {
@@ -1338,7 +1525,11 @@
             "required": true,
             "schema": {
               "type": "string",
-              "enum": ["llama-cpp", "onnxruntime", "tensorrt-llm"],
+              "enum": [
+                "llama-cpp",
+                "onnxruntime",
+                "tensorrt-llm"
+              ],
               "default": "llama-cpp"
             },
             "description": "The type of engine"
@@ -1370,7 +1561,9 @@
             }
           }
         },
-        "tags": ["Engines"]
+        "tags": [
+          "Engines"
+        ]
       },
       "post": {
         "summary": "Set default engine variant",
@@ -1382,7 +1575,11 @@
             "required": true,
             "schema": {
               "type": "string",
-              "enum": ["llama-cpp", "onnxruntime", "tensorrt-llm"],
+              "enum": [
+                "llama-cpp",
+                "onnxruntime",
+                "tensorrt-llm"
+              ],
               "default": "llama-cpp"
             },
             "description": "The type of engine"
@@ -1424,7 +1621,9 @@
             }
           }
         },
-        "tags": ["Engines"]
+        "tags": [
+          "Engines"
+        ]
       }
     },
     "/v1/engines/{name}/load": {
@@ -1438,7 +1637,11 @@
             "required": true,
             "schema": {
               "type": "string",
-              "enum": ["llama-cpp", "onnxruntime", "tensorrt-llm"],
+              "enum": [
+                "llama-cpp",
+                "onnxruntime",
+                "tensorrt-llm"
+              ],
               "default": "llama-cpp"
             },
             "description": "The name of the engine to update"
@@ -1462,7 +1665,9 @@
             }
           }
         },
-        "tags": ["Engines"]
+        "tags": [
+          "Engines"
+        ]
       },
       "delete": {
         "summary": "Unload engine",
@@ -1474,7 +1679,11 @@
             "required": true,
             "schema": {
               "type": "string",
-              "enum": ["llama-cpp", "onnxruntime", "tensorrt-llm"],
+              "enum": [
+                "llama-cpp",
+                "onnxruntime",
+                "tensorrt-llm"
+              ],
               "default": "llama-cpp"
             },
             "description": "The name of the engine to update"
@@ -1498,7 +1707,9 @@
             }
           }
         },
-        "tags": ["Engines"]
+        "tags": [
+          "Engines"
+        ]
       }
     },
     "/v1/engines/{name}/update": {
@@ -1512,7 +1723,11 @@
             "required": true,
             "schema": {
               "type": "string",
-              "enum": ["llama-cpp", "onnxruntime", "tensorrt-llm"],
+              "enum": [
+                "llama-cpp",
+                "onnxruntime",
+                "tensorrt-llm"
+              ],
               "default": "llama-cpp"
             },
             "description": "The name of the engine to update"
@@ -1536,7 +1751,9 @@
             }
           }
         },
-        "tags": ["Engines"]
+        "tags": [
+          "Engines"
+        ]
       }
     },
     "/v1/configs": {
@@ -1556,7 +1773,10 @@
                       "items": {
                         "type": "string"
                       },
-                      "example": ["http://localhost:39281", "https://cortex.so"]
+                      "example": [
+                        "http://localhost:39281",
+                        "https://cortex.so"
+                      ]
                     },
                     "cors": {
                       "type": "boolean",
@@ -1575,10 +1795,14 @@
             }
           }
         },
-        "tags": ["Configurations"]
+        "tags": [
+          "Configurations"
+        ]
       },
       "patch": {
-        "tags": ["Configurations"],
+        "tags": [
+          "Configurations"
+        ],
         "summary": "Update configuration settings",
         "requestBody": {
           "required": true,
@@ -1598,7 +1822,10 @@
                       "type": "string"
                     },
                     "description": "List of allowed origins.",
-                    "example": ["http://localhost:39281", "https://cortex.so"]
+                    "example": [
+                      "http://localhost:39281",
+                      "https://cortex.so"
+                    ]
                   }
                 }
               }
@@ -1656,6 +1883,10 @@
       "name": "Chat",
       "description": "This endpoint initiates interaction with a Large Language Models (LLM)."
     },
+    {
+      "name": "Embeddings",
+      "description": "This endpoint create embeddings for a given input text or tokens."
+    },
     {
       "name": "Assistants",
       "description": "These endpoints manage the lifecycle of an Assistant within a conversation thread."
@@ -1698,6 +1929,7 @@
       "name": "CORTEX",
       "tags": [
         "Chat",
+        "Embeddings",
         "Engines",
         "Events",
         "Pulling Models",
@@ -1880,7 +2112,11 @@
             "description": "Indicates whether the assistant was successfully deleted."
           }
         },
-        "required": ["id", "object", "deleted"]
+        "required": [
+          "id",
+          "object",
+          "deleted"
+        ]
       },
       "Message": {
         "type": "object",
@@ -1897,14 +2133,21 @@
         "properties": {
           "role": {
             "type": "string",
-            "enum": ["system", "user", "assistant", "tool"]
+            "enum": [
+              "system",
+              "user",
+              "assistant",
+              "tool"
+            ]
           },
           "name": {
             "type": "string",
             "description": "An optional name for the participant. Provides the model information to differentiate between participants of the same role."
           }
         },
-        "required": ["role"]
+        "required": [
+          "role"
+        ]
       },
       "SystemMessage": {
         "allOf": [
@@ -1933,7 +2176,10 @@
                 "description": "An optional name for the participant. Provides the model information to differentiate between participants of the same role."
               }
             },
-            "required": ["content", "role"]
+            "required": [
+              "content",
+              "role"
+            ]
           }
         ]
       },
@@ -1984,7 +2230,10 @@
                 "description": "An optional name for the participant. Provides the model information to differentiate between participants of the same role."
               }
             },
-            "required": ["content", "role"]
+            "required": [
+              "content",
+              "role"
+            ]
           }
         ]
       },
@@ -2096,7 +2345,10 @@
                 "type": "string"
               }
             },
-            "required": ["content", "tool_call_id"]
+            "required": [
+              "content",
+              "tool_call_id"
+            ]
           }
         ]
       },
@@ -2113,26 +2365,36 @@
         "properties": {
           "type": {
             "type": "string",
-            "enum": ["text"]
+            "enum": [
+              "text"
+            ]
           },
           "text": {
             "type": "string"
           }
         },
-        "required": ["type", "text"]
+        "required": [
+          "type",
+          "text"
+        ]
       },
       "ImageContentPart": {
         "type": "object",
         "properties": {
           "type": {
             "type": "string",
-            "enum": ["image_url"]
+            "enum": [
+              "image_url"
+            ]
           },
           "image_url": {
             "$ref": "#/components/schemas/ImageUrl"
           }
         },
-        "required": ["type", "image_url"]
+        "required": [
+          "type",
+          "image_url"
+        ]
       },
       "AudioContentPart": {
         "type": "object",
@@ -2145,7 +2407,10 @@
             "$ref": "#/components/schemas/InputAudio"
           }
         },
-        "required": ["type", "input_audio"]
+        "required": [
+          "type",
+          "input_audio"
+        ]
       },
       "RefusalContentPart": {
         "type": "object",
@@ -2157,7 +2422,10 @@
             "type": "string"
           }
         },
-        "required": ["type", "refusal"]
+        "required": [
+          "type",
+          "refusal"
+        ]
       },
       "ImageUrl": {
         "type": "object",
@@ -2172,7 +2440,9 @@
             "description": "Specifies the detail level of the image. Defaults to `auto`."
           }
         },
-        "required": ["url"]
+        "required": [
+          "url"
+        ]
       },
       "InputAudio": {
         "type": "object",
@@ -2183,11 +2453,17 @@
           },
           "format": {
             "type": "string",
-            "enum": ["wav", "mp3"],
+            "enum": [
+              "wav",
+              "mp3"
+            ],
             "description": "The format of the encoded audio data. Currently supports `wav` and `mp3`."
           }
         },
-        "required": ["data", "format"]
+        "required": [
+          "data",
+          "format"
+        ]
       },
       "Audio": {
         "type": "object",
@@ -2198,7 +2474,9 @@
             "description": "Unique identifier for a previous audio response from the model."
           }
         },
-        "required": ["id"]
+        "required": [
+          "id"
+        ]
       },
       "ToolCall": {
         "type": "object",
@@ -2213,7 +2491,11 @@
             "$ref": "#/components/schemas/FunctionCall"
           }
         },
-        "required": ["id", "type", "function"]
+        "required": [
+          "id",
+          "type",
+          "function"
+        ]
       },
       "FunctionCall": {
         "type": "object",
@@ -2225,7 +2507,10 @@
             "type": "string"
           }
         },
-        "required": ["name", "arguments"]
+        "required": [
+          "name",
+          "arguments"
+        ]
       },
       "CreateChatCompletionDto": {
         "type": "object",
@@ -2279,7 +2564,9 @@
           },
           "stop": {
             "description": "Defines specific tokens or phrases that signal the model to stop producing further output.",
-            "example": ["End"],
+            "example": [
+              "End"
+            ],
             "type": "array",
             "items": {
               "type": "string"
@@ -2309,10 +2596,15 @@
             "type": "array",
             "items": {
               "type": "string",
-              "enum": ["text", "audio"]
+              "enum": [
+                "text",
+                "audio"
+              ]
             },
             "description": "Specifies the modalities (types of input) supported by the model. Currently, cortex only support text modalities. We are actively working on this feature to bring cortex as fully OpenAI compatible platform. Planning and roadmap for this feature can be found [**here**](https://github.com/janhq/cortex.cpp/issues/1582).",
-            "example": ["text"]
+            "example": [
+              "text"
+            ]
           },
           "audio": {
             "description": "Parameters for audio output. Required when audio output is requested with `modalities: ['audio']`. We are actively working on this feature to bring cortex as fully OpenAI compatible platform. Planning and roadmap for this feature can be found [**here**](https://github.com/janhq/cortex.cpp/issues/1582).",
@@ -2325,10 +2617,19 @@
               "format": {
                 "type": "string",
                 "description": "Specifies the output audio format. Must be one of `wav`, `mp3`, `flac`, `opus`, or `pcm16`.",
-                "enum": ["mp3", "wav", "flac", "opus", "pcm16"]
+                "enum": [
+                  "mp3",
+                  "wav",
+                  "flac",
+                  "opus",
+                  "pcm16"
+                ]
               }
             },
-            "required": ["voice", "format"]
+            "required": [
+              "voice",
+              "format"
+            ]
           },
           "store": {
             "type": "boolean",
@@ -2375,10 +2676,16 @@
               "type": {
                 "type": "string",
                 "description": "The format of the generated output. Must be one of `text`, `json_schema` or `json_object`.",
-                "enum": ["text", "json_object", "json_schema"]
+                "enum": [
+                  "text",
+                  "json_object",
+                  "json_schema"
+                ]
               }
             },
-            "required": ["type"]
+            "required": [
+              "type"
+            ]
           },
           "seed": {
             "type": "number",
@@ -2408,26 +2715,37 @@
             "properties": {
               "type": {
                 "type": "string",
-                "enum": ["function"]
+                "enum": [
+                  "function"
+                ]
               },
               "function": {
                 "$ref": "#/components/schemas/Function"
               }
             },
-            "required": ["type", "function"]
+            "required": [
+              "type",
+              "function"
+            ]
           },
           "tool_choice": {
             "anyOf": [
               {
                 "type": "string",
-                "enum": ["none", "auto", "required"]
+                "enum": [
+                  "none",
+                  "auto",
+                  "required"
+                ]
               },
               {
                 "type": "object",
                 "properties": {
                   "type": {
                     "type": "string",
-                    "enum": ["function"]
+                    "enum": [
+                      "function"
+                    ]
                   },
                   "function": {
                     "type": "object",
@@ -2436,10 +2754,15 @@
                         "type": "string"
                       }
                     },
-                    "required": ["name"]
+                    "required": [
+                      "name"
+                    ]
                   }
                 },
-                "required": ["type", "function"]
+                "required": [
+                  "type",
+                  "function"
+                ]
               }
             ]
           },
@@ -2514,7 +2837,10 @@
             "description": "Minimum number of tokens to keep. This parameter only supported by `llama-cpp` engine."
           }
         },
-        "required": ["messages", "model"]
+        "required": [
+          "messages",
+          "model"
+        ]
       },
       "Function": {
         "type": "object",
@@ -2534,7 +2860,9 @@
             "default": false
           }
         },
-        "required": ["name"]
+        "required": [
+          "name"
+        ]
       },
       "MessageDto": {
         "type": "object",
@@ -2548,7 +2876,10 @@
             "description": "The role of the participant in the chat, such as 'user' or 'system', indicating who is the sender of the message."
           }
         },
-        "required": ["content", "role"]
+        "required": [
+          "content",
+          "role"
+        ]
       },
       "ChoiceDto": {
         "type": "object",
@@ -2570,7 +2901,11 @@
             ]
           }
         },
-        "required": ["finish_reason", "index", "message"]
+        "required": [
+          "finish_reason",
+          "index",
+          "message"
+        ]
       },
       "UsageDto": {
         "type": "object",
@@ -2588,7 +2923,11 @@
             "description": "The total number of tokens used in both the prompt and the completion, summarizing the entire token count of the chat operation."
           }
         },
-        "required": ["completion_tokens", "prompt_tokens", "total_tokens"]
+        "required": [
+          "completion_tokens",
+          "prompt_tokens",
+          "total_tokens"
+        ]
       },
       "ChatCompletionResponseDto": {
         "type": "object",
@@ -2615,11 +2954,17 @@
                   "type": "object",
                   "properties": {
                     "content": {
-                      "type": ["string", "null"],
+                      "type": [
+                        "string",
+                        "null"
+                      ],
                       "description": "The contents of the message."
                     },
                     "refusal": {
-                      "type": ["string", "null"],
+                      "type": [
+                        "string",
+                        "null"
+                      ],
                       "description": "The refusal message generated by the model."
                     },
                     "tool_calls": {
@@ -2648,10 +2993,17 @@
                                 "description": "The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function."
                               }
                             },
-                            "required": ["name", "arguments"]
+                            "required": [
+                              "name",
+                              "arguments"
+                            ]
                           }
                         },
-                        "required": ["id", "type", "function"]
+                        "required": [
+                          "id",
+                          "type",
+                          "function"
+                        ]
                       }
                     },
                     "role": {
@@ -2672,7 +3024,10 @@
                           "description": "The name of the function to call."
                         }
                       },
-                      "required": ["arguments", "name"]
+                      "required": [
+                        "arguments",
+                        "name"
+                      ]
                     },
                     "audio": {
                       "type": "object",
@@ -2695,17 +3050,27 @@
                           "description": "Transcript of the audio generated by the model."
                         }
                       },
-                      "required": ["id", "expires_at", "data", "transcript"]
+                      "required": [
+                        "id",
+                        "expires_at",
+                        "data",
+                        "transcript"
+                      ]
                     }
                   },
-                  "required": ["role"]
+                  "required": [
+                    "role"
+                  ]
                 },
                 "logprobs": {
                   "type": "object",
                   "description": "Log probability information for the choice.",
                   "properties": {
                     "content": {
-                      "type": ["array", "null"],
+                      "type": [
+                        "array",
+                        "null"
+                      ],
                       "description": "A list of message content tokens with log probability information.",
                       "items": {
                         "type": "object",
@@ -2719,11 +3084,17 @@
                             "description": "The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely."
                           },
                           "bytes": {
-                            "type": ["array", "null"],
+                            "type": [
+                              "array",
+                              "null"
+                            ],
                             "description": "A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token."
                           }
                         },
-                        "required": ["token", "logprob"]
+                        "required": [
+                          "token",
+                          "logprob"
+                        ]
                       }
                     },
                     "top_logprobs": {
@@ -2741,15 +3112,24 @@
                             "description": "The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely."
                           },
                           "bytes": {
-                            "type": ["array", "null"],
+                            "type": [
+                              "array",
+                              "null"
+                            ],
                             "description": "A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token."
                           }
                         },
-                        "required": ["token", "logprob"]
+                        "required": [
+                          "token",
+                          "logprob"
+                        ]
                       }
                     },
                     "refusal": {
-                      "type": ["array", "null"],
+                      "type": [
+                        "array",
+                        "null"
+                      ],
                       "description": "A list of message refusal tokens with log probability information.",
                       "items": {
                         "type": "object",
@@ -2763,17 +3143,27 @@
                             "description": "The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely."
                           },
                           "bytes": {
-                            "type": ["array", "null"],
+                            "type": [
+                              "array",
+                              "null"
+                            ],
                             "description": "A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token."
                           }
                         },
-                        "required": ["token", "logprob"]
+                        "required": [
+                          "token",
+                          "logprob"
+                        ]
                       }
                     }
                   }
                 }
               },
-              "required": ["finish_reason", "index", "message"]
+              "required": [
+                "finish_reason",
+                "index",
+                "message"
+              ]
             }
           },
           "created": {
@@ -2785,7 +3175,10 @@
             "description": "The model used for the chat completion."
           },
           "service_tier": {
-            "type": ["string", "null"],
+            "type": [
+              "string",
+              "null"
+            ],
             "description": "The service tier used for processing the request. This field is only included if the service_tier parameter is specified in the request."
           },
           "system_fingerprint": {
@@ -2825,7 +3218,10 @@
                     "description": "Tokens generated by the model for reasoning."
                   }
                 },
-                "required": ["audio_tokens", "reasoning_tokens"]
+                "required": [
+                  "audio_tokens",
+                  "reasoning_tokens"
+                ]
               },
               "prompt_tokens_details": {
                 "type": "object",
@@ -2840,7 +3236,10 @@
                     "description": "Cached tokens present in the prompt."
                   }
                 },
-                "required": ["audio_tokens", "cached_tokens"]
+                "required": [
+                  "audio_tokens",
+                  "cached_tokens"
+                ]
               }
             },
             "required": [
@@ -2880,7 +3279,10 @@
                   "description": "A chat completion delta generated by streamed model responses.",
                   "properties": {
                     "content": {
-                      "type": ["string", "null"],
+                      "type": [
+                        "string",
+                        "null"
+                      ],
                       "description": "The contents of the chunk message."
                     },
                     "function_call": {
@@ -2918,10 +3320,18 @@
                                 "description": "The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function."
                               }
                             },
-                            "required": ["name", "arguments"]
+                            "required": [
+                              "name",
+                              "arguments"
+                            ]
                           }
                         },
-                        "required": ["index", "id", "type", "function"]
+                        "required": [
+                          "index",
+                          "id",
+                          "type",
+                          "function"
+                        ]
                       }
                     },
                     "role": {
@@ -2929,7 +3339,10 @@
                       "description": "The role of the author of this message."
                     },
                     "refusal": {
-                      "type": ["string", "null"],
+                      "type": [
+                        "string",
+                        "null"
+                      ],
                       "description": "The refusal message generated by the model."
                     }
                   }
@@ -2939,7 +3352,10 @@
                   "description": "Log probability information for the choice.",
                   "properties": {
                     "content": {
-                      "type": ["array", "null"],
+                      "type": [
+                        "array",
+                        "null"
+                      ],
                       "description": "A list of message content tokens with log probability information.",
                       "items": {
                         "type": "object",
@@ -2953,11 +3369,17 @@
                             "description": "The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely."
                           },
                           "bytes": {
-                            "type": ["array", "null"],
+                            "type": [
+                              "array",
+                              "null"
+                            ],
                             "description": "A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token."
                           }
                         },
-                        "required": ["token", "logprob"]
+                        "required": [
+                          "token",
+                          "logprob"
+                        ]
                       }
                     },
                     "top_logprobs": {
@@ -2975,15 +3397,24 @@
                             "description": "The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely."
                           },
                           "bytes": {
-                            "type": ["array", "null"],
+                            "type": [
+                              "array",
+                              "null"
+                            ],
                             "description": "A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token."
                           }
                         },
-                        "required": ["token", "logprob"]
+                        "required": [
+                          "token",
+                          "logprob"
+                        ]
                       }
                     },
                     "refusal": {
-                      "type": ["array", "null"],
+                      "type": [
+                        "array",
+                        "null"
+                      ],
                       "description": "A list of message refusal tokens with log probability information.",
                       "items": {
                         "type": "object",
@@ -2997,17 +3428,26 @@
                             "description": "The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely."
                           },
                           "bytes": {
-                            "type": ["array", "null"],
+                            "type": [
+                              "array",
+                              "null"
+                            ],
                             "description": "A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token."
                           }
                         },
-                        "required": ["token", "logprob"]
+                        "required": [
+                          "token",
+                          "logprob"
+                        ]
                       }
                     }
                   }
                 },
                 "finish_reason": {
-                  "type": ["string", "null"],
+                  "type": [
+                    "string",
+                    "null"
+                  ],
                   "description": "The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool, or function_call (deprecated) if the model called a function."
                 },
                 "index": {
@@ -3015,7 +3455,10 @@
                   "description": "The index of the choice in the list of choices."
                 }
               },
-              "required": ["delta", "index"]
+              "required": [
+                "delta",
+                "index"
+              ]
             }
           },
           "created": {
@@ -3027,7 +3470,10 @@
             "description": "The model used to generate the completion."
           },
           "service_tier": {
-            "type": ["string", "null"],
+            "type": [
+              "string",
+              "null"
+            ],
             "description": "The service tier used for processing the request. This field is only included if the service_tier parameter is specified in the request."
           },
           "system_fingerprint": {
@@ -3055,7 +3501,11 @@
                 "description": "Total number of tokens used in the request (prompt + completion)."
               }
             },
-            "required": ["completion_tokens", "prompt_tokens", "total_tokens"]
+            "required": [
+              "completion_tokens",
+              "prompt_tokens",
+              "total_tokens"
+            ]
           }
         },
         "required": [
@@ -3076,7 +3526,9 @@
             "description": "The name of the embedding model to be used."
           },
           "input": {
-            "example": ["Hello World"],
+            "example": [
+              "Hello World"
+            ],
             "description": "The text or token array(s) to be embedded. This can be a single string, an array of strings, or an array of token arrays to embed multiple inputs in one request.",
             "type": "array",
             "items": {
@@ -3094,7 +3546,10 @@
             "description": "Defines the number of dimensions for the output embeddings. This feature is supported by certain models only. This field is optional."
           }
         },
-        "required": ["model", "input"]
+        "required": [
+          "model",
+          "input"
+        ]
       },
       "EmbeddingsResponseDto": {
         "type": "object",
@@ -3123,11 +3578,18 @@
             ]
           }
         },
-        "required": ["object", "model", "embedding", "usage"]
+        "required": [
+          "object",
+          "model",
+          "embedding",
+          "usage"
+        ]
       },
       "PullModelRequest": {
         "type": "object",
-        "required": ["model"],
+        "required": [
+          "model"
+        ],
         "properties": {
           "model": {
             "type": "string",
@@ -3185,7 +3647,9 @@
           },
           "files": {
             "description": "The URL sources from which the model downloaded or accessed.",
-            "example": ["https://huggingface.co/cortexso/mistral/tree/gguf"],
+            "example": [
+              "https://huggingface.co/cortexso/mistral/tree/gguf"
+            ],
             "oneOf": [
               {
                 "type": "array",
@@ -3205,7 +3669,9 @@
           },
           "stop": {
             "description": "Defines specific tokens or phrases that signal the model to stop producing further output.",
-            "example": ["End"],
+            "example": [
+              "End"
+            ],
             "type": "array",
             "items": {
               "type": "string"
@@ -3275,7 +3741,10 @@
             "default": ""
           }
         },
-        "required": ["model", "files"]
+        "required": [
+          "model",
+          "files"
+        ]
       },
       "StartModelSuccessDto": {
         "type": "object",
@@ -3289,7 +3758,10 @@
             "description": "The unique identifier of the model."
           }
         },
-        "required": ["message", "modelId"]
+        "required": [
+          "message",
+          "modelId"
+        ]
       },
       "ModelStartDto": {
         "type": "object",
@@ -3336,7 +3808,9 @@
             "example": "/tmp/model.gguf"
           }
         },
-        "required": ["model"]
+        "required": [
+          "model"
+        ]
       },
       "ModelStopDto": {
         "type": "object",
@@ -3347,7 +3821,9 @@
             "description": "A downloaded model name."
           }
         },
-        "required": ["model"]
+        "required": [
+          "model"
+        ]
       },
       "ImportModelRequest": {
         "type": "object",
@@ -3367,10 +3843,16 @@
           "option": {
             "type": "string",
             "description": "Import options such as symlink or copy.",
-            "enum": ["symlink", "copy"]
+            "enum": [
+              "symlink",
+              "copy"
+            ]
           }
         },
-        "required": ["model", "modelPath"]
+        "required": [
+          "model",
+          "modelPath"
+        ]
       },
       "ImportModelResponse": {
         "type": "object",
@@ -3389,7 +3871,11 @@
             "example": "OK"
           }
         },
-        "required": ["message", "modelHandle", "result"]
+        "required": [
+          "message",
+          "modelHandle",
+          "result"
+        ]
       },
       "CommonResponseDto": {
         "type": "object",
@@ -3399,7 +3885,9 @@
             "description": "The response success or error message."
           }
         },
-        "required": ["message"]
+        "required": [
+          "message"
+        ]
       },
       "EngineUninstallationResponseDto": {
         "type": "object",
@@ -3455,7 +3943,11 @@
             "example": "OK"
           }
         },
-        "required": ["data", "object", "result"]
+        "required": [
+          "data",
+          "object",
+          "result"
+        ]
       },
       "Engine": {
         "type": "object",
@@ -3485,7 +3977,12 @@
             "example": "0.1.34"
           }
         },
-        "required": ["description", "name", "productName", "status"]
+        "required": [
+          "description",
+          "name",
+          "productName",
+          "status"
+        ]
       },
       "ModelDto": {
         "type": "object",
@@ -3501,7 +3998,9 @@
             "description": "A predefined text or framework that guides the AI model's response generation."
           },
           "stop": {
-            "example": ["End"],
+            "example": [
+              "End"
+            ],
             "description": "Defines specific tokens or phrases that signal the model to stop producing further output.",
             "type": "array",
             "items": {
@@ -3613,7 +4112,9 @@
             "example": "llamacpp"
           }
         },
-        "required": ["id"]
+        "required": [
+          "id"
+        ]
       },
       "ListModelsResponseDto": {
         "type": "object",
@@ -3621,7 +4122,9 @@
           "object": {
             "type": "string",
             "example": "list",
-            "enum": ["list"]
+            "enum": [
+              "list"
+            ]
           },
           "data": {
             "description": "List of models",
@@ -3631,7 +4134,10 @@
             }
           }
         },
-        "required": ["object", "data"]
+        "required": [
+          "object",
+          "data"
+        ]
       },
       "UpdateModelDto": {
         "type": "object",
@@ -3650,7 +4156,9 @@
             "items": {
               "type": "string"
             },
-            "example": ["</s>"]
+            "example": [
+              "</s>"
+            ]
           },
           "stream": {
             "type": "boolean",
@@ -3809,7 +4317,11 @@
             "description": "Indicates whether the model was successfully deleted."
           }
         },
-        "required": ["id", "object", "deleted"]
+        "required": [
+          "id",
+          "object",
+          "deleted"
+        ]
       },
       "CreateThreadAssistantDto": {
         "type": "object",
@@ -3899,7 +4411,10 @@
           "tool_resources": {
             "type": "object",
             "example": {
-              "resources": ["database1", "database2"]
+              "resources": [
+                "database1",
+                "database2"
+              ]
             },
             "description": "Tool resources for the assistant."
           }
@@ -3927,7 +4442,9 @@
             }
           }
         },
-        "required": ["assistants"]
+        "required": [
+          "assistants"
+        ]
       },
       "ContentDto": {
         "type": "object",
@@ -3946,7 +4463,10 @@
             "description": "Text content of the message along with any annotations."
           }
         },
-        "required": ["type", "text"]
+        "required": [
+          "type",
+          "text"
+        ]
       },
       "GetMessageResponseDto": {
         "type": "object",
@@ -4120,7 +4640,13 @@
             "description": "Indicates whether there are more messages to retrieve."
           }
         },
-        "required": ["object", "data", "first_id", "last_id", "has_more"]
+        "required": [
+          "object",
+          "data",
+          "first_id",
+          "last_id",
+          "has_more"
+        ]
       },
       "CreateMessageDto": {
         "type": "object",
@@ -4136,7 +4662,10 @@
             "description": "The text contents of the message."
           }
         },
-        "required": ["role", "content"]
+        "required": [
+          "role",
+          "content"
+        ]
       },
       "UpdateMessageDto": {
         "type": "object",
@@ -4162,7 +4691,11 @@
             "description": "Indicates whether the message was successfully deleted."
           }
         },
-        "required": ["id", "object", "deleted"]
+        "required": [
+          "id",
+          "object",
+          "deleted"
+        ]
       },
       "GetThreadResponseDto": {
         "type": "object",
@@ -4183,7 +4716,9 @@
             "description": "Unix timestamp representing the creation time of the thread."
           },
           "assistants": {
-            "example": ["assistant-001"],
+            "example": [
+              "assistant-001"
+            ],
             "description": "List of assistants involved in the thread.",
             "type": "array",
             "items": {
@@ -4237,8 +4772,12 @@
             "description": "Indicates whether the thread was successfully deleted."
           }
         },
-        "required": ["id", "object", "deleted"]
+        "required": [
+          "id",
+          "object",
+          "deleted"
+        ]
       }
     }
   }
-}
+}
\ No newline at end of file

From c8eab8acf997f823896056b557603af5c9b3b563 Mon Sep 17 00:00:00 2001
From: nguyenhoangthuan99 <thuanhppro12@gmail.com>
Date: Fri, 8 Nov 2024 12:53:39 +0700
Subject: [PATCH 2/5] chore: add embedding capabilities

---
 docs/docs/capabilities/embeddings.md          |  95 +++++++
 docs/static/openapi/cortex.json               |  14 +-
 .../extensions/remote-engine/remote_engine.cc | 231 ++++++++++++++++++
 .../extensions/remote-engine/remote_engine.h  |  65 +++++
 4 files changed, 402 insertions(+), 3 deletions(-)
 create mode 100644 engine/extensions/remote-engine/remote_engine.cc
 create mode 100644 engine/extensions/remote-engine/remote_engine.h

diff --git a/docs/docs/capabilities/embeddings.md b/docs/docs/capabilities/embeddings.md
index 12c6d05c6..44f153556 100644
--- a/docs/docs/capabilities/embeddings.md
+++ b/docs/docs/capabilities/embeddings.md
@@ -6,3 +6,98 @@ title: Embeddings
 :::
 
 cortex.cpp now support embeddings endpoint with fully OpenAI compatible.
+
+For embeddings API usage please refer to [API references](/api-reference#tag/chat/POST/v1/embeddings). This tutorial show you how to use embeddings in cortex with openai python SDK.
+
+## Embedding with openai compatible
+
+### 1. Start server and run model
+
+```
+cortex run llama3.1:8b-gguf-q4-km
+```
+
+### 2. Create script `embeddings.py` with this content
+
+```
+from datetime import datetime
+from openai import OpenAI
+from pydantic import BaseModel
+ENDPOINT = "http://localhost:39281/v1"
+MODEL = "llama3.1:8bb-gguf-q4-km"
+client = OpenAI(
+    base_url=ENDPOINT,
+    api_key="not-needed"
+)
+```
+
+### 3. Create embeddings
+
+```
+response = client.embeddings.create(input = "embedding", model=MODEL, encoding_format="base64")
+print(response)
+```
+
+The reponse will be like this
+
+```
+CreateEmbeddingResponse(
+    data=[
+        Embedding(
+            embedding='hjuAPOD8TryuPU8...',
+            index=0,
+            object='embedding'
+        )
+    ],
+    model='meta-llama3.1-8b-instruct',
+    object='list',
+    usage=Usage(
+        prompt_tokens=2,
+        total_tokens=2
+    )
+)
+```
+
+
+The output embeddings is encoded as base64 string. Default the model will output the embeddings in float mode.
+
+```
+response = client.embeddings.create(input = "embedding", model=MODEL)
+print(response)
+```
+
+Result will be
+
+```
+CreateEmbeddingResponse(
+    data=[
+        Embedding(
+            embedding=[0.1, 0.3, 0.4 ....],
+            index=0,
+            object='embedding'
+        )
+    ],
+    model='meta-llama3.1-8b-instruct',
+    object='list',
+    usage=Usage(
+        prompt_tokens=2,
+        total_tokens=2
+    )
+)
+```
+
+Cortex also supports all input types as [OpenAI](https://platform.openai.com/docs/api-reference/embeddings/create#embeddings-create-input).
+
+```sh
+# input as string
+response = client.embeddings.create(input = "embedding", model=MODEL)
+
+# input as array of string
+response = client.embeddings.create(input = ["embedding"], model=MODEL)
+
+# input as array of tokens
+response = client.embeddings.create(input = [12,44,123], model=MODEL)
+
+# input as array of arrays contain tokens
+response = client.embeddings.create(input = [[912,312,54],[12,433,1241]], model=MODEL)
+```
diff --git a/docs/static/openapi/cortex.json b/docs/static/openapi/cortex.json
index 302ea984f..c1ef736db 100644
--- a/docs/static/openapi/cortex.json
+++ b/docs/static/openapi/cortex.json
@@ -190,7 +190,7 @@
         ]
       }
     },
-    "v1/embeddings": {
+    "/v1/embeddings": {
       "post": {
         "summary": "Create embeddings",
         "description": "Creates an embedding vector representing the input text.",
@@ -204,22 +204,27 @@
                   "input": {
                     "oneOf": [
                       {
-                        "type": "string"
+                        "type": "string",
+                        "description":"The string that will be turned into an embedding."
                       },
                       {
                         "type": "array",
+                        "description" : "The array of strings that will be turned into an embedding.",
                         "items": {
                           "type": "string"
                         }
                       },
                       {
                         "type": "array",
+                        "description": "The array of integers that will be turned into an embedding.",
                         "items": {
                           "type": "integer"
+                          
                         }
                       },
                       {
                         "type": "array",
+                        "description" : "The array of arrays containing integers that will be turned into an embedding.",
                         "items": {
                           "type": "array",
                           "items": {
@@ -290,7 +295,10 @@
               }
             }
           }
-        }
+        },
+        "tags": [
+          "Embeddings"
+        ]
       }
     },
     "/v1/chat/completions": {
diff --git a/engine/extensions/remote-engine/remote_engine.cc b/engine/extensions/remote-engine/remote_engine.cc
new file mode 100644
index 000000000..e3d2b5679
--- /dev/null
+++ b/engine/extensions/remote-engine/remote_engine.cc
@@ -0,0 +1,231 @@
+#include "remote_engine.h"
+#include <sstream>
+
+// Static callback function for CURL
+static size_t WriteCallback(char* ptr, size_t size, size_t nmemb, std::string* data) {
+    data->append(ptr, size * nmemb);
+    return size * nmemb;
+}
+
+RemoteEngine::RemoteEngine() {
+    curl_global_init(CURL_GLOBAL_ALL);
+}
+
+RemoteEngine::~RemoteEngine() {
+    curl_global_cleanup();
+}
+
+CurlResponse RemoteEngine::makeRequest(const std::string& url, 
+                                     const std::string& api_key,
+                                     const std::string& body,
+                                     const std::string& method) {
+    CURL* curl = curl_easy_init();
+    CurlResponse response;
+    
+    if (!curl) {
+        response.error = true;
+        response.error_message = "Failed to initialize CURL";
+        return response;
+    }
+
+    // Set up headers
+    struct curl_slist* headers = nullptr;
+    if (!api_key.empty()) {
+        std::string auth_header = renderTemplate(config_.api_key_template, {{"api_key", api_key}});
+        headers = curl_slist_append(headers, auth_header.c_str());
+    }
+    headers = curl_slist_append(headers, "Content-Type: application/json");
+
+    curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
+    curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
+    
+    if (method == "POST") {
+        curl_easy_setopt(curl, CURLOPT_POSTFIELDS, body.c_str());
+    }
+
+    std::string response_string;
+    curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteCallback);
+    curl_easy_setopt(curl, CURLOPT_WRITEDATA, &response_string);
+
+    CURLcode res = curl_easy_perform(curl);
+    if (res != CURLE_OK) {
+        response.error = true;
+        response.error_message = curl_easy_strerror(res);
+    } else {
+        response.body = response_string;
+    }
+
+    curl_slist_free_all(headers);
+    curl_easy_cleanup(curl);
+    return response;
+}
+
+std::string RemoteEngine::renderTemplate(const std::string& templ, 
+                                       const std::unordered_map<std::string, std::string>& values) {
+    std::string result = templ;
+    for (const auto& [key, value] : values) {
+        std::string placeholder = "{{" + key + "}}";
+        size_t pos = result.find(placeholder);
+        if (pos != std::string::npos) {
+            result.replace(pos, placeholder.length(), value);
+        }
+    }
+    return result;
+}
+
+Json::Value RemoteEngine::transformRequest(const Json::Value& input, const std::string& type) {
+    if (!config_.transform_req.isMember(type)) {
+        return input;
+    }
+
+    Json::Value output = input;
+    const Json::Value& transforms = config_.transform_req[type];
+    
+    for (const auto& transform : transforms) {
+        if (transform.isString()) {
+            // Handle template-based transformation
+            if (transform.asString().find("template") != std::string::npos) {
+                // Implement template rendering logic here
+                continue;
+            }
+        } else if (transform.isObject()) {
+            // Handle key mapping transformations
+            for (const auto& key : transform.getMemberNames()) {
+                if (input.isMember(key)) {
+                    output[transform[key].asString()] = input[key];
+                    output.removeMember(key);
+                }
+            }
+        }
+    }
+    return output;
+}
+
+void RemoteEngine::GetModels(std::shared_ptr<Json::Value> json_body,
+                           std::function<void(Json::Value&&, Json::Value&&)>&& callback) {
+    if (!json_body->isMember("url") || !json_body->isMember("api_key")) {
+        Json::Value error;
+        error["error"] = "Missing required fields: url or api_key";
+        callback(Json::Value(), std::move(error));
+        return;
+    }
+
+    const std::string& url = (*json_body)["url"].asString();
+    const std::string& api_key = (*json_body)["api_key"].asString();
+
+    auto response = makeRequest(url, api_key, "", "GET");
+    
+    if (response.error) {
+        Json::Value error;
+        error["error"] = response.error_message;
+        callback(Json::Value(), std::move(error));
+        return;
+    }
+
+    Json::Value response_json;
+    Json::Reader reader;
+    if (!reader.parse(response.body, response_json)) {
+        Json::Value error;
+        error["error"] = "Failed to parse response";
+        callback(Json::Value(), std::move(error));
+        return;
+    }
+
+    callback(std::move(response_json), Json::Value());
+}
+
+void RemoteEngine::HandleChatCompletion(std::shared_ptr<Json::Value> json_body,
+                                      std::function<void(Json::Value&&, Json::Value&&)>&& callback) {
+    if (!json_body->isMember("url") || !json_body->isMember("api_key") || 
+        !json_body->isMember("request_body")) {
+        Json::Value error;
+        error["error"] = "Missing required fields: url, api_key, or request_body";
+        callback(Json::Value(), std::move(error));
+        return;
+    }
+
+    const std::string& url = (*json_body)["url"].asString();
+    const std::string& api_key = (*json_body)["api_key"].asString();
+    
+    Json::Value transformed_request = transformRequest((*json_body)["request_body"], "chat_completion");
+    
+    Json::FastWriter writer;
+    std::string request_body = writer.write(transformed_request);
+
+    auto response = makeRequest(url, api_key, request_body);
+    
+    if (response.error) {
+        Json::Value error;
+        error["error"] = response.error_message;
+        callback(Json::Value(), std::move(error));
+        return;
+    }
+
+    Json::Value response_json;
+    Json::Reader reader;
+    if (!reader.parse(response.body, response_json)) {
+        Json::Value error;
+        error["error"] = "Failed to parse response";
+        callback(Json::Value(), std::move(error));
+        return;
+    }
+
+    callback(std::move(response_json), Json::Value());
+}
+
+bool RemoteEngine::LoadConfig(const std::string& yaml_path) {
+    try {
+        YAML::Node config = YAML::LoadFile(yaml_path);
+        
+        if (config["api_key_template"]) {
+            config_.api_key_template = config["api_key_template"].as<std::string>();
+        }
+
+        if (config["TransformReq"]) {
+            Json::Reader reader;
+            reader.parse(config["TransformReq"].as<std::string>(), config_.transform_req);
+        }
+
+        if (config["TransformResp"]) {
+            Json::Reader reader;
+            reader.parse(config["TransformResp"].as<std::string>(), config_.transform_resp);
+        }
+
+        return true;
+    } catch (const YAML::Exception& e) {
+        LOG_ERROR << "Failed to load config: " << e.what();
+        return false;
+    }
+}
+
+// Implement other virtual functions with minimal functionality
+void RemoteEngine::HandleEmbedding(std::shared_ptr<Json::Value>, 
+                                 std::function<void(Json::Value&&, Json::Value&&)>&& callback) {
+    callback(Json::Value(), Json::Value());
+}
+
+void RemoteEngine::LoadModel(std::shared_ptr<Json::Value>,
+                           std::function<void(Json::Value&&, Json::Value&&)>&& callback) {
+    callback(Json::Value(), Json::Value());
+}
+
+void RemoteEngine::UnloadModel(std::shared_ptr<Json::Value>,
+                             std::function<void(Json::Value&&, Json::Value&&)>&& callback) {
+    callback(Json::Value(), Json::Value());
+}
+
+void RemoteEngine::GetModelStatus(std::shared_ptr<Json::Value>,
+                                std::function<void(Json::Value&&, Json::Value&&)>&& callback) {
+    callback(Json::Value(), Json::Value());
+}
+
+bool RemoteEngine::IsSupported(const std::string&) {
+    return true;
+}
+
+bool RemoteEngine::SetFileLogger(int, const std::string&) {
+    return true;
+}
+
+void RemoteEngine::SetLogLevel(trantor::Logger::LogLevel) {
+}
\ No newline at end of file
diff --git a/engine/extensions/remote-engine/remote_engine.h b/engine/extensions/remote-engine/remote_engine.h
new file mode 100644
index 000000000..a2f58874c
--- /dev/null
+++ b/engine/extensions/remote-engine/remote_engine.h
@@ -0,0 +1,65 @@
+#pragma once
+
+#include "cortex-common/EngineI.h"
+#include <json/json.h>
+#include <curl/curl.h>
+#include <yaml-cpp/yaml.h>
+#include <string>
+#include <unordered_map>
+
+// Helper for CURL response
+struct CurlResponse {
+    std::string body;
+    bool error{false};
+    std::string error_message;
+};
+
+class RemoteEngine : public EngineI {
+private:
+    // Store config from YAML
+    struct Config {
+        std::string api_key_template;
+        Json::Value transform_req;
+        Json::Value transform_resp;
+    };
+    
+    Config config_;
+    
+    // Helper functions
+    CurlResponse makeRequest(const std::string& url, 
+                           const std::string& api_key,
+                           const std::string& body,
+                           const std::string& method = "POST");
+
+    std::string renderTemplate(const std::string& templ, 
+                             const std::unordered_map<std::string, std::string>& values);
+
+    Json::Value transformRequest(const Json::Value& input, const std::string& type);
+
+public:
+    RemoteEngine();
+    ~RemoteEngine();
+
+    // Main interface implementations
+    void GetModels(std::shared_ptr<Json::Value> json_body,
+                  std::function<void(Json::Value&&, Json::Value&&)>&& callback) override;
+
+    void HandleChatCompletion(std::shared_ptr<Json::Value> json_body,
+                            std::function<void(Json::Value&&, Json::Value&&)>&& callback) override;
+
+    // Config loading
+    bool LoadConfig(const std::string& yaml_path);
+
+    // Other required virtual functions
+    void HandleEmbedding(std::shared_ptr<Json::Value> json_body,
+                        std::function<void(Json::Value&&, Json::Value&&)>&& callback) override;
+    void LoadModel(std::shared_ptr<Json::Value> json_body,
+                  std::function<void(Json::Value&&, Json::Value&&)>&& callback) override;
+    void UnloadModel(std::shared_ptr<Json::Value> json_body,
+                    std::function<void(Json::Value&&, Json::Value&&)>&& callback) override;
+    void GetModelStatus(std::shared_ptr<Json::Value> json_body,
+                       std::function<void(Json::Value&&, Json::Value&&)>&& callback) override;
+    bool IsSupported(const std::string& feature) override;
+    bool SetFileLogger(int max_log_lines, const std::string& log_path) override;
+    void SetLogLevel(trantor::Logger::LogLevel logLevel) override;
+};
\ No newline at end of file

From 71da65fd330744b3fa4a7efc9c625bdbd1bb97a3 Mon Sep 17 00:00:00 2001
From: nguyenhoangthuan99 <thuanhppro12@gmail.com>
Date: Fri, 8 Nov 2024 12:55:50 +0700
Subject: [PATCH 3/5] chore: remove un developed file

---
 .../extensions/remote-engine/remote_engine.cc | 231 ------------------
 .../extensions/remote-engine/remote_engine.h  |  65 -----
 2 files changed, 296 deletions(-)
 delete mode 100644 engine/extensions/remote-engine/remote_engine.cc
 delete mode 100644 engine/extensions/remote-engine/remote_engine.h

diff --git a/engine/extensions/remote-engine/remote_engine.cc b/engine/extensions/remote-engine/remote_engine.cc
deleted file mode 100644
index e3d2b5679..000000000
--- a/engine/extensions/remote-engine/remote_engine.cc
+++ /dev/null
@@ -1,231 +0,0 @@
-#include "remote_engine.h"
-#include <sstream>
-
-// Static callback function for CURL
-static size_t WriteCallback(char* ptr, size_t size, size_t nmemb, std::string* data) {
-    data->append(ptr, size * nmemb);
-    return size * nmemb;
-}
-
-RemoteEngine::RemoteEngine() {
-    curl_global_init(CURL_GLOBAL_ALL);
-}
-
-RemoteEngine::~RemoteEngine() {
-    curl_global_cleanup();
-}
-
-CurlResponse RemoteEngine::makeRequest(const std::string& url, 
-                                     const std::string& api_key,
-                                     const std::string& body,
-                                     const std::string& method) {
-    CURL* curl = curl_easy_init();
-    CurlResponse response;
-    
-    if (!curl) {
-        response.error = true;
-        response.error_message = "Failed to initialize CURL";
-        return response;
-    }
-
-    // Set up headers
-    struct curl_slist* headers = nullptr;
-    if (!api_key.empty()) {
-        std::string auth_header = renderTemplate(config_.api_key_template, {{"api_key", api_key}});
-        headers = curl_slist_append(headers, auth_header.c_str());
-    }
-    headers = curl_slist_append(headers, "Content-Type: application/json");
-
-    curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
-    curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
-    
-    if (method == "POST") {
-        curl_easy_setopt(curl, CURLOPT_POSTFIELDS, body.c_str());
-    }
-
-    std::string response_string;
-    curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteCallback);
-    curl_easy_setopt(curl, CURLOPT_WRITEDATA, &response_string);
-
-    CURLcode res = curl_easy_perform(curl);
-    if (res != CURLE_OK) {
-        response.error = true;
-        response.error_message = curl_easy_strerror(res);
-    } else {
-        response.body = response_string;
-    }
-
-    curl_slist_free_all(headers);
-    curl_easy_cleanup(curl);
-    return response;
-}
-
-std::string RemoteEngine::renderTemplate(const std::string& templ, 
-                                       const std::unordered_map<std::string, std::string>& values) {
-    std::string result = templ;
-    for (const auto& [key, value] : values) {
-        std::string placeholder = "{{" + key + "}}";
-        size_t pos = result.find(placeholder);
-        if (pos != std::string::npos) {
-            result.replace(pos, placeholder.length(), value);
-        }
-    }
-    return result;
-}
-
-Json::Value RemoteEngine::transformRequest(const Json::Value& input, const std::string& type) {
-    if (!config_.transform_req.isMember(type)) {
-        return input;
-    }
-
-    Json::Value output = input;
-    const Json::Value& transforms = config_.transform_req[type];
-    
-    for (const auto& transform : transforms) {
-        if (transform.isString()) {
-            // Handle template-based transformation
-            if (transform.asString().find("template") != std::string::npos) {
-                // Implement template rendering logic here
-                continue;
-            }
-        } else if (transform.isObject()) {
-            // Handle key mapping transformations
-            for (const auto& key : transform.getMemberNames()) {
-                if (input.isMember(key)) {
-                    output[transform[key].asString()] = input[key];
-                    output.removeMember(key);
-                }
-            }
-        }
-    }
-    return output;
-}
-
-void RemoteEngine::GetModels(std::shared_ptr<Json::Value> json_body,
-                           std::function<void(Json::Value&&, Json::Value&&)>&& callback) {
-    if (!json_body->isMember("url") || !json_body->isMember("api_key")) {
-        Json::Value error;
-        error["error"] = "Missing required fields: url or api_key";
-        callback(Json::Value(), std::move(error));
-        return;
-    }
-
-    const std::string& url = (*json_body)["url"].asString();
-    const std::string& api_key = (*json_body)["api_key"].asString();
-
-    auto response = makeRequest(url, api_key, "", "GET");
-    
-    if (response.error) {
-        Json::Value error;
-        error["error"] = response.error_message;
-        callback(Json::Value(), std::move(error));
-        return;
-    }
-
-    Json::Value response_json;
-    Json::Reader reader;
-    if (!reader.parse(response.body, response_json)) {
-        Json::Value error;
-        error["error"] = "Failed to parse response";
-        callback(Json::Value(), std::move(error));
-        return;
-    }
-
-    callback(std::move(response_json), Json::Value());
-}
-
-void RemoteEngine::HandleChatCompletion(std::shared_ptr<Json::Value> json_body,
-                                      std::function<void(Json::Value&&, Json::Value&&)>&& callback) {
-    if (!json_body->isMember("url") || !json_body->isMember("api_key") || 
-        !json_body->isMember("request_body")) {
-        Json::Value error;
-        error["error"] = "Missing required fields: url, api_key, or request_body";
-        callback(Json::Value(), std::move(error));
-        return;
-    }
-
-    const std::string& url = (*json_body)["url"].asString();
-    const std::string& api_key = (*json_body)["api_key"].asString();
-    
-    Json::Value transformed_request = transformRequest((*json_body)["request_body"], "chat_completion");
-    
-    Json::FastWriter writer;
-    std::string request_body = writer.write(transformed_request);
-
-    auto response = makeRequest(url, api_key, request_body);
-    
-    if (response.error) {
-        Json::Value error;
-        error["error"] = response.error_message;
-        callback(Json::Value(), std::move(error));
-        return;
-    }
-
-    Json::Value response_json;
-    Json::Reader reader;
-    if (!reader.parse(response.body, response_json)) {
-        Json::Value error;
-        error["error"] = "Failed to parse response";
-        callback(Json::Value(), std::move(error));
-        return;
-    }
-
-    callback(std::move(response_json), Json::Value());
-}
-
-bool RemoteEngine::LoadConfig(const std::string& yaml_path) {
-    try {
-        YAML::Node config = YAML::LoadFile(yaml_path);
-        
-        if (config["api_key_template"]) {
-            config_.api_key_template = config["api_key_template"].as<std::string>();
-        }
-
-        if (config["TransformReq"]) {
-            Json::Reader reader;
-            reader.parse(config["TransformReq"].as<std::string>(), config_.transform_req);
-        }
-
-        if (config["TransformResp"]) {
-            Json::Reader reader;
-            reader.parse(config["TransformResp"].as<std::string>(), config_.transform_resp);
-        }
-
-        return true;
-    } catch (const YAML::Exception& e) {
-        LOG_ERROR << "Failed to load config: " << e.what();
-        return false;
-    }
-}
-
-// Implement other virtual functions with minimal functionality
-void RemoteEngine::HandleEmbedding(std::shared_ptr<Json::Value>, 
-                                 std::function<void(Json::Value&&, Json::Value&&)>&& callback) {
-    callback(Json::Value(), Json::Value());
-}
-
-void RemoteEngine::LoadModel(std::shared_ptr<Json::Value>,
-                           std::function<void(Json::Value&&, Json::Value&&)>&& callback) {
-    callback(Json::Value(), Json::Value());
-}
-
-void RemoteEngine::UnloadModel(std::shared_ptr<Json::Value>,
-                             std::function<void(Json::Value&&, Json::Value&&)>&& callback) {
-    callback(Json::Value(), Json::Value());
-}
-
-void RemoteEngine::GetModelStatus(std::shared_ptr<Json::Value>,
-                                std::function<void(Json::Value&&, Json::Value&&)>&& callback) {
-    callback(Json::Value(), Json::Value());
-}
-
-bool RemoteEngine::IsSupported(const std::string&) {
-    return true;
-}
-
-bool RemoteEngine::SetFileLogger(int, const std::string&) {
-    return true;
-}
-
-void RemoteEngine::SetLogLevel(trantor::Logger::LogLevel) {
-}
\ No newline at end of file
diff --git a/engine/extensions/remote-engine/remote_engine.h b/engine/extensions/remote-engine/remote_engine.h
deleted file mode 100644
index a2f58874c..000000000
--- a/engine/extensions/remote-engine/remote_engine.h
+++ /dev/null
@@ -1,65 +0,0 @@
-#pragma once
-
-#include "cortex-common/EngineI.h"
-#include <json/json.h>
-#include <curl/curl.h>
-#include <yaml-cpp/yaml.h>
-#include <string>
-#include <unordered_map>
-
-// Helper for CURL response
-struct CurlResponse {
-    std::string body;
-    bool error{false};
-    std::string error_message;
-};
-
-class RemoteEngine : public EngineI {
-private:
-    // Store config from YAML
-    struct Config {
-        std::string api_key_template;
-        Json::Value transform_req;
-        Json::Value transform_resp;
-    };
-    
-    Config config_;
-    
-    // Helper functions
-    CurlResponse makeRequest(const std::string& url, 
-                           const std::string& api_key,
-                           const std::string& body,
-                           const std::string& method = "POST");
-
-    std::string renderTemplate(const std::string& templ, 
-                             const std::unordered_map<std::string, std::string>& values);
-
-    Json::Value transformRequest(const Json::Value& input, const std::string& type);
-
-public:
-    RemoteEngine();
-    ~RemoteEngine();
-
-    // Main interface implementations
-    void GetModels(std::shared_ptr<Json::Value> json_body,
-                  std::function<void(Json::Value&&, Json::Value&&)>&& callback) override;
-
-    void HandleChatCompletion(std::shared_ptr<Json::Value> json_body,
-                            std::function<void(Json::Value&&, Json::Value&&)>&& callback) override;
-
-    // Config loading
-    bool LoadConfig(const std::string& yaml_path);
-
-    // Other required virtual functions
-    void HandleEmbedding(std::shared_ptr<Json::Value> json_body,
-                        std::function<void(Json::Value&&, Json::Value&&)>&& callback) override;
-    void LoadModel(std::shared_ptr<Json::Value> json_body,
-                  std::function<void(Json::Value&&, Json::Value&&)>&& callback) override;
-    void UnloadModel(std::shared_ptr<Json::Value> json_body,
-                    std::function<void(Json::Value&&, Json::Value&&)>&& callback) override;
-    void GetModelStatus(std::shared_ptr<Json::Value> json_body,
-                       std::function<void(Json::Value&&, Json::Value&&)>&& callback) override;
-    bool IsSupported(const std::string& feature) override;
-    bool SetFileLogger(int max_log_lines, const std::string& log_path) override;
-    void SetLogLevel(trantor::Logger::LogLevel logLevel) override;
-};
\ No newline at end of file

From df3648a239aced02dc70e177c0e574f6a526661c Mon Sep 17 00:00:00 2001
From: nguyenhoangthuan99 <thuanhppro12@gmail.com>
Date: Fri, 8 Nov 2024 13:54:45 +0700
Subject: [PATCH 4/5] fix: gemma2 chat template renderer

---
 engine/config/chat_template_renderer.h | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/engine/config/chat_template_renderer.h b/engine/config/chat_template_renderer.h
index 63a47ecf3..e2719710e 100644
--- a/engine/config/chat_template_renderer.h
+++ b/engine/config/chat_template_renderer.h
@@ -48,10 +48,11 @@
 #include <vector>
 namespace config {
 
-#if (defined(_MSC_VER) && _MSC_VER >= 1900 && defined(__cpp_char8_t)) || __cplusplus >= 202002L
-    #define LU8(x) reinterpret_cast<const char*>(u8##x)
+#if (defined(_MSC_VER) && _MSC_VER >= 1900 && defined(__cpp_char8_t)) || \
+    __cplusplus >= 202002L
+#define LU8(x) reinterpret_cast<const char*>(u8##x)
 #else
-    #define LU8(x) u8##x
+#define LU8(x) u8##x
 #endif
 
 typedef struct llama_chat_message {
@@ -167,13 +168,10 @@ static int32_t llama_chat_apply_template_internal(
     std::string system_prompt = "";
     for (auto message : chat) {
       std::string role(message->role);
-      if (role == "system") {
-        // there is no system message for gemma, but we will merge it with user prompt, so nothing is broken
-        system_prompt = trim(message->content);
-        continue;
-      }
       // in gemma, "assistant" is "model"
       role = role == "assistant" ? "model" : message->role;
+      // in gemma2, "system" is "user"
+      role = role =="system"? "user" : role;
       ss << "<start_of_turn>" << role << "\n";
       if (!system_prompt.empty() && role != "model") {
         ss << system_prompt << "\n\n";

From 2718c9b0c2f1e4d3ece0b7085241dcee177b53a9 Mon Sep 17 00:00:00 2001
From: nguyenhoangthuan99 <thuanhppro12@gmail.com>
Date: Fri, 8 Nov 2024 14:28:34 +0700
Subject: [PATCH 5/5] format code

---
 engine/config/chat_template_renderer.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/engine/config/chat_template_renderer.h b/engine/config/chat_template_renderer.h
index e2719710e..881186a9d 100644
--- a/engine/config/chat_template_renderer.h
+++ b/engine/config/chat_template_renderer.h
@@ -171,7 +171,7 @@ static int32_t llama_chat_apply_template_internal(
       // in gemma, "assistant" is "model"
       role = role == "assistant" ? "model" : message->role;
       // in gemma2, "system" is "user"
-      role = role =="system"? "user" : role;
+      role = role == "system" ? "user" : role;
       ss << "<start_of_turn>" << role << "\n";
       if (!system_prompt.empty() && role != "model") {
         ss << system_prompt << "\n\n";