diff --git a/README.md b/README.md index 546622bbc..335bbc5b6 100644 --- a/README.md +++ b/README.md @@ -285,7 +285,7 @@ Cortex releases 2 preview versions for advanced users to try new features early #### Windows 1. Clone the Cortex.cpp repository [here](https://github.com/janhq/cortex.cpp). -2. Navigate to the `engine > vcpkg` folder. +2. Navigate to the `engine` folder. 3. Configure the vpkg: ```bash @@ -294,16 +294,16 @@ cd vcpkg vcpkg install ``` -4. Build the Cortex.cpp inside the `build` folder: +4. Build the Cortex.cpp inside the `engine/build` folder: ```bash mkdir build cd build -cmake .. -DBUILD_SHARED_LIBS=OFF -DCMAKE_TOOLCHAIN_FILE=path_to_vcpkg_folder/vcpkg/scripts/buildsystems/vcpkg.cmake -DVCPKG_TARGET_TRIPLET=x64-windows-static +cmake .. -DBUILD_SHARED_LIBS=OFF -DCMAKE_TOOLCHAIN_FILE=path_to_vcpkg_folder_in_cortex_repo/vcpkg/scripts/buildsystems/vcpkg.cmake -DVCPKG_TARGET_TRIPLET=x64-windows-static +cmake --build . --config Release ``` -5. Use Visual Studio with the C++ development kit to build the project using the files generated in the `build` folder. -6. Verify that Cortex.cpp is installed correctly by getting help information. +5. Verify that Cortex.cpp is installed correctly by getting help information. ```sh cortex -h @@ -312,7 +312,7 @@ cortex -h #### MacOS 1. Clone the Cortex.cpp repository [here](https://github.com/janhq/cortex.cpp). -2. Navigate to the `engine > vcpkg` folder. +2. Navigate to the `engine` folder. 3. Configure the vpkg: ```bash @@ -321,17 +321,16 @@ cd vcpkg vcpkg install ``` -4. Build the Cortex.cpp inside the `build` folder: +4. Build the Cortex.cpp inside the `engine/build` folder: ```bash mkdir build cd build -cmake .. -DCMAKE_TOOLCHAIN_FILE=path_to_vcpkg_folder/vcpkg/scripts/buildsystems/vcpkg.cmake +cmake .. -DCMAKE_TOOLCHAIN_FILE=path_to_vcpkg_folder_in_cortex_repo/vcpkg/scripts/buildsystems/vcpkg.cmake make -j4 ``` -5. Use Visual Studio with the C++ development kit to build the project using the files generated in the `build` folder. -6. Verify that Cortex.cpp is installed correctly by getting help information. +5. Verify that Cortex.cpp is installed correctly by getting help information. ```sh cortex -h @@ -340,7 +339,7 @@ cortex -h #### Linux 1. Clone the Cortex.cpp repository [here](https://github.com/janhq/cortex.cpp). -2. Navigate to the `engine > vcpkg` folder. +2. Navigate to the `engine` folder. 3. Configure the vpkg: ```bash @@ -349,17 +348,16 @@ cd vcpkg vcpkg install ``` -4. Build the Cortex.cpp inside the `build` folder: +4. Build the Cortex.cpp inside the `engine/build` folder: ```bash mkdir build cd build -cmake .. -DCMAKE_TOOLCHAIN_FILE=path_to_vcpkg_folder/vcpkg/scripts/buildsystems/vcpkg.cmake +cmake .. -DCMAKE_TOOLCHAIN_FILE=path_to_vcpkg_folder_in_cortex_repo/vcpkg/scripts/buildsystems/vcpkg.cmake make -j4 ``` -5. Use Visual Studio with the C++ development kit to build the project using the files generated in the `build` folder. -6. Verify that Cortex.cpp is installed correctly by getting help information. +5. Verify that Cortex.cpp is installed correctly by getting help information. ```sh cortex -h diff --git a/docs/docs/architecture/cortex-db.md b/docs/docs/architecture/cortex-db.md deleted file mode 100644 index 09de74ab4..000000000 --- a/docs/docs/architecture/cortex-db.md +++ /dev/null @@ -1,3 +0,0 @@ ---- -title: cortex.db ---- \ No newline at end of file diff --git a/docs/docs/architecture/cortex-db.mdx b/docs/docs/architecture/cortex-db.mdx new file mode 100644 index 000000000..908897253 --- /dev/null +++ b/docs/docs/architecture/cortex-db.mdx @@ -0,0 +1,25 @@ +--- +title: cortex.db +description: cortex.db Overview. +slug: "cortex-db" +--- + +import Tabs from "@theme/Tabs"; +import TabItem from "@theme/TabItem"; + +:::warning +🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. +::: + +This document outlines the architecture of the database designed to store and manage various types of entities and their associated metadata. + +## Table Structure +### models Table +The `models` table is designed to hold metadata about various AI models. Below is the structure of the table: + +| Column Name | Data Type | Description | +|--------------------|-----------|---------------------------------------------------------| +| model_id | TEXT | A unique identifier for each model (Primary Key). | +| author_repo_id | TEXT | The identifier for the repository where the model is stored. | +| branch_name | TEXT | The branch name in the repository that contains the model. | +| path_to_model_yaml | TEXT | The file path to the YAML configuration file for the model. | \ No newline at end of file diff --git a/docs/docs/architecture/cortexrc.mdx b/docs/docs/architecture/cortexrc.mdx index 312d77986..24dc63a2d 100644 --- a/docs/docs/architecture/cortexrc.mdx +++ b/docs/docs/architecture/cortexrc.mdx @@ -16,30 +16,37 @@ Cortex.cpp supports reading its configuration from a file called `.cortexrc`. Us ## File Location The configuration file is stored in the following locations: -- **Windows**: - - Stable: `C:\Users\\.cortexrc` - - Beta: `C:\Users\\.cortexrc-beta` - - Nighty: `C:\Users\\.cortexrc-nightly` -- **Linux**: - - Stable: `/home//.cortexrc` - - Beta: `/home//.cortexrc-beta` - - Nighty: `/home//.cortexrc-nightly` -- **macOS**: - - Stable: `/Users//.cortexrc` - - Beta: `/Users//.cortexrc-beta` - - Nighty: `/Users//.cortexrc-nightly` +- **Windows**: `C:\Users\\.cortexrc` +- **Linux**: `/home//.cortexrc` +- **macOS**: `/Users//.cortexrc` ## Configuration Parameters You can configure the following parameters in the `.cortexrc` file: | Parameter | Description | Default Value | |------------------|--------------------------------------------------|--------------------------------| | `dataFolderPath` | Path to the folder where `.cortexrc` located. | User's home folder. | -| `apiServerHost` | Host address for the Cortex.cpp API server. | `127.0.0.1` | -| `apiServerPort` | Port number for the Cortex.cpp API server. | `39281` | +| `apiServerHost` | Host address for the Cortex.cpp API server. | `127.0.0.1` | +| `apiServerPort` | Port number for the Cortex.cpp API server. | `39281` | +| `logFolderPath` | Path the folder where logs are located | User's home folder. | +| `logLlamaCppPath` | The llama-cpp engine . | `./logs/cortex.log` | +| `logTensorrtLLMPath` | The tensorrt-llm engine log file path. | `./logs/cortex.log` | +| `logOnnxPath` | The onnxruntime engine log file path. | `./logs/cortex.log` | +| `maxLogLines` | The maximum log lines that write to file. | `100000` | +| `checkedForUpdateAt` | The last time for checking updates. | `0` | +| `latestRelease` | The lastest release vesion. | Empty string | +| `huggingFaceToken` | HuggingFace token. | Empty string | Example of the `.cortexrc` file: ``` +logFolderPath: /Users//cortexcpp +logLlamaCppPath: ./logs/cortex.log +logTensorrtLLMPath: ./logs/cortex.log +logOnnxPath: ./logs/cortex.log dataFolderPath: /Users//cortexcpp +maxLogLines: 100000 apiServerHost: 127.0.0.1 apiServerPort: 39281 +checkedForUpdateAt: 1730501224 +latestRelease: v1.0.1 +huggingFaceToken: "" ``` \ No newline at end of file diff --git a/docs/docs/architecture/updater.mdx b/docs/docs/architecture/updater.mdx new file mode 100644 index 000000000..d9dc7d4f8 --- /dev/null +++ b/docs/docs/architecture/updater.mdx @@ -0,0 +1,76 @@ +--- +title: Updater +description: Updater Overview. +slug: "updater" +--- + +import Tabs from "@theme/Tabs"; +import TabItem from "@theme/TabItem"; + +:::warning +🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. +::: + +This document outlines the architectural design for a C++ updater responsible for downloading and executing installers for two binaries: CLI and Server. + +## Overview + +The updater is designed to check for available updates, download the necessary installer files, and execute them to update the CLI and Server binaries. The architecture consists of several key components that work together to achieve this functionality. + +## Components + +### 1. **Version Management Module** + +- **Purpose**: Responsible for checking the current version of the installed binaries and determining if updates are available. +- **Responsibilities**: + - Retrieve the current version from local installations. + - Fetch the latest version information from a remote source, latest version information is saved to `.cortexrc`. + - Determine if an update is necessary based on version comparison. + +### 2. **Installer Download Manager** + +- **Purpose**: Handles downloading the installer files from specified URLs. +- **Responsibilities**: + - Manage HTTP requests to fetch installer files. + - Handle errors during download. + - Save downloaded installers in a temporary directory. + +### 3. **Installer Execution Module** + +- **Purpose**: Executes the downloaded installer files to perform updates. +- **Responsibilities**: + - Launch the installer with appropriate command-line arguments. + - Monitor the installation process and capture any output or errors. + - Ensure that installation completes successfully before proceeding. + +### 4. **User Interface (CLI)** + +- **Purpose**: Provides a command-line interface for users to initiate updates and view status messages. +- **Responsibilities**: + - Display current version information for CLI and Server: `cortex -v` + - Inform users about available updates and progress during downloading and installation. + - Handle user input for initiating updates: `(sudo) cortex update` + +## Workflow + +1. **Initialization**: + - The updater starts and initializes all necessary modules. + +2. **Version Check**: + - The Version Management Module retrieves the current versions of both CLI and Server from local installations. + - It fetches the latest version information from a remote source. + +3. **Update Decision**: + - If newer versions are available, the updater proceeds to download the installers; otherwise, it informs the user that no updates are necessary. + +4. **Download Process**: + - The Installer Download Manager downloads the latest installer files using HTTP requests. + - Progress is reported back to the User Interface. + +5. **Installer Execution**: + - Once downloaded, the Installer Execution Module runs the installer with silent mode. + - It monitors the installation process, capturing any output or errors. + +6. **Completion Notification**: + - After successful installation, the User Interface notifies users of completion or any issues encountered during installation. + - Logs are updated with relevant information about the update process. \ No newline at end of file diff --git a/docs/docs/hub/index.mdx b/docs/docs/hub/index.mdx index 95739ef0a..66a59918d 100644 --- a/docs/docs/hub/index.mdx +++ b/docs/docs/hub/index.mdx @@ -2,13 +2,353 @@ slug: /model-sources title: Model Sources --- -import DocCardList from '@theme/DocCardList'; +import DocCardList from "@theme/DocCardList"; :::warning 🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. ::: -Cortex.cpp allows users to pull models from multiple repositories, offering flexibility and extensive model access. Here are the supported repositories: +# Pulling Models in Cortex + +Cortex provides a streamlined way to pull (download) machine learning models from Hugging Face and other third-party sources, as well as import models from local storage. This functionality allows users to easily access a variety of pre-trained models to enhance their applications. + +## Features + +- **Model Retrieval**: Download models directly from Hugging Face or third-party repositories. +- **Local Import**: Import models stored on your local machine. +- **User-Friendly Interface**: Access models through a Command Line Interface (CLI) or an HTTP API. +- **Model Selection**: Choose your desired model from a provided selection menu in the CLI. + +## Usage + +### Pulling Models via CLI + +1. **Open the CLI**: Launch the Cortex CLI on your terminal. +2. **Select Model**: Use the selection menu to browse available models. + - Enter the corresponding number for your desired model quant. +3. **Provide Repository Handle**: Input the repository handle (e.g., `username/repo_name` for Hugging Face) when prompted. +4. **Download Model**: Cortex will handle the download process automatically. + +For pulling models from [Cortex model registry](https://huggingface.co/cortexso), simply type `cortex pull ` to your terminal. + +```sh +cortex pull tinyllama +Downloaded models: + tinyllama:1b-gguf + +Available to download: + 1. tinyllama:1b-gguf-q2-k + 2. tinyllama:1b-gguf-q3-kl + 3. tinyllama:1b-gguf-q3-km + 4. tinyllama:1b-gguf-q3-ks + 5. tinyllama:1b-gguf-q4-km + 6. tinyllama:1b-gguf-q4-ks + 7. tinyllama:1b-gguf-q5-km + 8. tinyllama:1b-gguf-q5-ks + 9. tinyllama:1b-gguf-q6-k + 10. tinyllama:1b-gguf-q8-0 + 11. tinyllama:gguf + +Select a model (1-11): +``` + +#### Pulling models with repository handle + +When user want to pull a model which is not ready in [Cortex model registry](https://huggingface.co/cortexso), user can provide the repository handle to Cortex. + +For example, we can pull model from [QuantFactory-FinanceLlama3](https://huggingface.co/QuantFactory/finance-Llama3-8B-GGUF) by enter to terminal `cortex pull QuantFactory/finance-Llama3-8B-GGUF`. + +```sh +cortex pull QuantFactory/finance-Llama3-8B-GGUF +Select an option + 1. finance-Llama3-8B.Q2_K.gguf + 2. finance-Llama3-8B.Q3_K_L.gguf + 3. finance-Llama3-8B.Q3_K_M.gguf + 4. finance-Llama3-8B.Q3_K_S.gguf + 5. finance-Llama3-8B.Q4_0.gguf + 6. finance-Llama3-8B.Q4_1.gguf + 7. finance-Llama3-8B.Q4_K_M.gguf + 8. finance-Llama3-8B.Q4_K_S.gguf + 9. finance-Llama3-8B.Q5_0.gguf + 10. finance-Llama3-8B.Q5_1.gguf + 11. finance-Llama3-8B.Q5_K_M.gguf + 12. finance-Llama3-8B.Q5_K_S.gguf + 13. finance-Llama3-8B.Q6_K.gguf + 14. finance-Llama3-8B.Q8_0.gguf + +Select an option (1-14): +``` + +#### Pulling models with direct url + +Clients can pull models directly using a URL. This allows for the direct download of models from a specified location without additional configuration. + +```sh +cortex pull https://huggingface.co/QuantFactory/OpenMath2-Llama3.1-8B-GGUF/blob/main/OpenMath2-Llama3.1-8B.Q4_0.gguf +Validating download items, please wait.. +Start downloading.. +QuantFactory:OpenMat 0%[==================================================] [00m:00s] 3.98 MB/0.00 B +``` + +### Pulling Models via HTTP API + +To pull a model using the HTTP API, make a `POST` request to the following endpoint: + +```sh +curl --request POST \ + --url http://localhost:39281/v1/models/pull \ + --header 'Content-Type: application/json' \ + --data '{ + "model": "tinyllama:gguf" +}' +``` + +#### Notes + +- Ensure that you have an active internet connection for pulling models from external repositories. +- For local model imports, specify the path to the model in your CLI command or API request. + +### Observing download progress + +Unlike the CLI, where users can observe the download progress directly in the terminal, the HTTP API must be asynchronous. Therefore, clients can monitor the download progress by listening to the Event WebSocket API at `ws://127.0.0.1:39281/events`. + +#### Download started event + +- `DownloadStarted` event will be emitted when the download starts. It will contain the `DownloadTask` object. Each `DownloadTask` will have an unique `id`, along with a type of downloading (e.g. Model, Engine, etc.). +- `DownloadTask`'s `id` will be required when client wants to stop a downloading task. + +```json +{ + "task": { + "id": "tinyllama:1b-gguf-q2-k", + "items": [ + { + "bytes": 0, + "checksum": "N/A", + "downloadUrl": "https://huggingface.co/cortexso/tinyllama/resolve/1b-gguf-q2-k/metadata.yml", + "downloadedBytes": 0, + "id": "metadata.yml", + "localPath": "/Users/jamesnguyen/cortexcpp/models/cortex.so/tinyllama/1b-gguf-q2-k/metadata.yml" + }, + { + "bytes": 0, + "checksum": "N/A", + "downloadUrl": "https://huggingface.co/cortexso/tinyllama/resolve/1b-gguf-q2-k/model.gguf", + "downloadedBytes": 0, + "id": "model.gguf", + "localPath": "/Users/jamesnguyen/cortexcpp/models/cortex.so/tinyllama/1b-gguf-q2-k/model.gguf" + }, + { + "bytes": 0, + "checksum": "N/A", + "downloadUrl": "https://huggingface.co/cortexso/tinyllama/resolve/1b-gguf-q2-k/model.yml", + "downloadedBytes": 0, + "id": "model.yml", + "localPath": "/Users/jamesnguyen/cortexcpp/models/cortex.so/tinyllama/1b-gguf-q2-k/model.yml" + } + ], + "type": "Model" + }, + "type": "DownloadStarted" +} +``` + +#### Download updated event + +- `DownloadUpdated` event will be emitted when the download is in progress. It will contain the `DownloadTask` object. Each `DownloadTask` will have an unique `id`, along with a type of downloading (e.g. Model, Engine, etc.). +- A `DownloadTask` will have a list of `DownloadItem`s. Each `DownloadItem` will have the following properties: + - `id`: the id of the download item. + - `bytes`: the total size of the download item. + - `downloadedBytes`: the number of bytes that have been downloaded so far. + - `checksum`: the checksum of the download item. +- Client can use the `downloadedBytes` and `bytes` properties to calculate the download progress. + +```json +{ + "task": { + "id": "tinyllama:1b-gguf-q2-k", + "items": [ + { + "bytes": 58, + "checksum": "N/A", + "downloadUrl": "https://huggingface.co/cortexso/tinyllama/resolve/1b-gguf-q2-k/metadata.yml", + "downloadedBytes": 58, + "id": "metadata.yml", + "localPath": "/Users/jamesnguyen/cortexcpp/models/cortex.so/tinyllama/1b-gguf-q2-k/metadata.yml" + }, + { + "bytes": 432131456, + "checksum": "N/A", + "downloadUrl": "https://huggingface.co/cortexso/tinyllama/resolve/1b-gguf-q2-k/model.gguf", + "downloadedBytes": 235619714, + "id": "model.gguf", + "localPath": "/Users/jamesnguyen/cortexcpp/models/cortex.so/tinyllama/1b-gguf-q2-k/model.gguf" + }, + { + "bytes": 562, + "checksum": "N/A", + "downloadUrl": "https://huggingface.co/cortexso/tinyllama/resolve/1b-gguf-q2-k/model.yml", + "downloadedBytes": 562, + "id": "model.yml", + "localPath": "/Users/jamesnguyen/cortexcpp/models/cortex.so/tinyllama/1b-gguf-q2-k/model.yml" + } + ], + "type": "Model" + }, + "type": "DownloadUpdated" +} +``` + +#### Download success event + +The DownloadSuccess event indicates that all items in the download task have been successfully downloaded. This event provides details about the download task and its items, including their IDs, download URLs, local paths, and other properties. In this event, the bytes and downloadedBytes properties for each item are set to 0, signifying that the download is complete and no further bytes are pending. + +```json +{ + "task": { + "id": "tinyllama:1b-gguf-q2-k", + "items": [ + { + "bytes": 0, + "checksum": "N/A", + "downloadUrl": "https://huggingface.co/cortexso/tinyllama/resolve/1b-gguf-q2-k/metadata.yml", + "downloadedBytes": 0, + "id": "metadata.yml", + "localPath": "/Users/jamesnguyen/cortexcpp/models/cortex.so/tinyllama/1b-gguf-q2-k/metadata.yml" + }, + { + "bytes": 0, + "checksum": "N/A", + "downloadUrl": "https://huggingface.co/cortexso/tinyllama/resolve/1b-gguf-q2-k/model.gguf", + "downloadedBytes": 0, + "id": "model.gguf", + "localPath": "/Users/jamesnguyen/cortexcpp/models/cortex.so/tinyllama/1b-gguf-q2-k/model.gguf" + }, + { + "bytes": 0, + "checksum": "N/A", + "downloadUrl": "https://huggingface.co/cortexso/tinyllama/resolve/1b-gguf-q2-k/model.yml", + "downloadedBytes": 0, + "id": "model.yml", + "localPath": "/Users/jamesnguyen/cortexcpp/models/cortex.so/tinyllama/1b-gguf-q2-k/model.yml" + } + ], + "type": "Model" + }, + "type": "DownloadSuccess" +} +``` + +### Importing local-models + +When clients have models that are not inside the Cortex data folder and wish to run them inside Cortex, they can import local models using either the CLI or the HTTP API. + +#### via CLI + +Use the following command to import a local model using the CLI: + +```sh +cortex models import --model_id my-tinyllama --model_path /Users/jamesnguyen/cortexcpp/models/cortex.so/tinyllama +/1b-gguf/model.gguf +``` + +Response: + +```sh +Successfully import model from '/Users/jamesnguyen/cortexcpp/models/cortex.so/tinyllama/1b-gguf/model.gguf' for modeID 'my-tinyllama'. +``` + +#### via HTTP API + +Use the following curl command to import a local model using the HTTP API: + +```sh +curl --request POST \ + --url http://127.0.0.1:39281/v1/models/import \ + --header 'Content-Type: application/json' \ + --data '{ + "model": "model-id", + "modelPath": "absolute/path/to/gguf", + "name": "model display name" +}' +``` + +### Aborting Download Task + +Clients can abort a downloading task using the task ID. Below is a sample `curl` command to abort a download task: + +```sh +curl --location --request DELETE 'http://127.0.0.1:3928/models/pull' \ +--header 'Content-Type: application/json' \ +--data '{ + "taskId": "tinyllama:1b-gguf-q2-k" +}' +``` + +An event with type `DownloadStopped` will be emitted when the task is successfully aborted. + +### Listing local-available models via CLI + +You can list your ready-to-use models via CLI using `cortex models list` command. + +```sh +cortex models list ++---------+-------------------+ +| (Index) | ID | ++---------+-------------------+ +| 1 | tinyllama:1b-gguf | ++---------+-------------------+ +``` + +For more options, use `cortex models list --help` command. + +```sh +cortex models list -h +List all local models +Usage: +cortex models [options] [subcommand] + +Positionals: + filter TEXT Filter model id + +Options: + -h,--help Print this help message and exit + -e,--engine Display engine + -v,--version Display version +``` + +### Listing local-available models via HTTP API + +This section describes how to list all models that are available locally on your system using the HTTP API. By making a GET request to the specified endpoint, you can retrieve a list of models along with their details, such as model ID, name, file paths, engine type, and version. This is useful for managing and verifying the models you have downloaded and are ready to use in your local environment. + +```sh +curl --request GET \ + --url http://127.0.0.1:39281/v1/models + +``` + +Response: + +```json +{ + "data": [ + { + "model": "tinyllama:1b-gguf", + "name": "tinyllama", + "files": [ + "models/cortex.so/tinyllama/1b-gguf/model.gguf" + ], + "engine": "llama-cpp", + "version": "1", + # Omit some configuration parameters + } + ], + "object": "list", + "result": "OK" +} +``` + +With Cortex, pulling and managing models is simplified, allowing you to focus more on building your applications! diff --git a/docs/sidebars.ts b/docs/sidebars.ts index 36a3e7420..09ad3e504 100644 --- a/docs/sidebars.ts +++ b/docs/sidebars.ts @@ -75,6 +75,7 @@ const sidebars: SidebarsConfig = { { type: "doc", id: "architecture/data-folder", label: "Cortex Data Folder" }, { type: "doc", id: "architecture/cortex-db", label: "cortex.db" }, { type: "doc", id: "architecture/cortexrc", label: ".cortexrc" }, + { type: "doc", id: "architecture/updater", label: "Updater" }, ] }, { diff --git a/docs/static/openapi/cortex.json b/docs/static/openapi/cortex.json index 78e15f820..599c2729e 100644 --- a/docs/static/openapi/cortex.json +++ b/docs/static/openapi/cortex.json @@ -3502,6 +3502,11 @@ "description": "To enable mmap, default is true", "example": true }, + "size": { + "type": "number", + "description": "The model file size in bytes", + "example": 1073741824 + }, "engine": { "type": "string", "description": "The engine to use.", diff --git a/engine/cli/commands/model_get_cmd.cc b/engine/cli/commands/model_get_cmd.cc index b5b230c9c..0eda66105 100644 --- a/engine/cli/commands/model_get_cmd.cc +++ b/engine/cli/commands/model_get_cmd.cc @@ -8,6 +8,7 @@ #include "httplib.h" #include "server_start_cmd.h" #include "utils/file_manager_utils.h" +#include "utils/json_helper.h" #include "utils/logging_utils.h" namespace commands { @@ -30,7 +31,8 @@ void ModelGetCmd::Exec(const std::string& host, int port, if (res->status == httplib::StatusCode::OK_200) { CLI_LOG(res->body); } else { - CTL_ERR("Model failed to get with status code: " << res->status); + auto root = json_helper::ParseJsonString(res->body); + CLI_LOG(root["message"].asString()); } } else { auto err = res.error(); diff --git a/engine/config/model_config.h b/engine/config/model_config.h index bc3a7ec25..044fd8dd3 100644 --- a/engine/config/model_config.h +++ b/engine/config/model_config.h @@ -58,6 +58,7 @@ struct ModelConfig { bool ignore_eos = false; int n_probs = 0; int min_keep = 0; + uint64_t size = 0; std::string grammar; void FromJson(const Json::Value& json) { @@ -70,6 +71,8 @@ struct ModelConfig { // model = json["model"].asString(); if (json.isMember("version")) version = json["version"].asString(); + if (json.isMember("size")) + size = json["size"].asUInt64(); if (json.isMember("stop") && json["stop"].isArray()) { stop.clear(); @@ -176,6 +179,7 @@ struct ModelConfig { obj["name"] = name; obj["model"] = model; obj["version"] = version; + obj["size"] = size; Json::Value stop_array(Json::arrayValue); for (const auto& s : stop) { @@ -269,6 +273,7 @@ struct ModelConfig { oss << format_utils::print_comment("END REQUIRED"); oss << format_utils::print_comment("BEGIN OPTIONAL"); + oss << format_utils::print_float("size", size); oss << format_utils::print_bool("stream", stream); oss << format_utils::print_float("top_p", top_p); oss << format_utils::print_float("temperature", temperature); diff --git a/engine/config/yaml_config.cc b/engine/config/yaml_config.cc index 99f8103d8..e4932c9c3 100644 --- a/engine/config/yaml_config.cc +++ b/engine/config/yaml_config.cc @@ -75,6 +75,8 @@ void YamlHandler::ModelConfigFromYaml() { tmp.model = yaml_node_["model"].as(); if (yaml_node_["version"]) tmp.version = yaml_node_["version"].as(); + if (yaml_node_["size"]) + tmp.size = yaml_node_["size"].as(); if (yaml_node_["engine"]) tmp.engine = yaml_node_["engine"].as(); if (yaml_node_["prompt_template"]) { @@ -266,6 +268,8 @@ void YamlHandler::UpdateModelConfig(ModelConfig new_model_config) { if (!model_config_.grammar.empty()) yaml_node_["grammar"] = model_config_.grammar; + yaml_node_["size"] = model_config_.size; + yaml_node_["created"] = std::time(nullptr); } catch (const std::exception& e) { std::cerr << "Error when update model config : " << e.what() << std::endl; @@ -318,6 +322,7 @@ void YamlHandler::WriteYamlFile(const std::string& file_path) const { outFile << "# END REQUIRED\n"; outFile << "\n"; outFile << "# BEGIN OPTIONAL\n"; + outFile << format_utils::writeKeyValue("size", yaml_node_["size"]); outFile << format_utils::writeKeyValue("stream", yaml_node_["stream"], "Default true?"); outFile << format_utils::writeKeyValue("top_p", yaml_node_["top_p"], diff --git a/engine/services/model_service.cc b/engine/services/model_service.cc index 4967b1dd9..d9656073e 100644 --- a/engine/services/model_service.cc +++ b/engine/services/model_service.cc @@ -18,7 +18,8 @@ namespace { void ParseGguf(const DownloadItem& ggufDownloadItem, std::optional author, - std::optional name) { + std::optional name, + std::optional size) { namespace fs = std::filesystem; namespace fmu = file_manager_utils; config::GGUFHandler gguf_handler; @@ -35,6 +36,7 @@ void ParseGguf(const DownloadItem& ggufDownloadItem, model_config.model = ggufDownloadItem.id; model_config.name = name.has_value() ? name.value() : gguf_handler.GetModelConfig().name; + model_config.size = size.value_or(0); yaml_handler.UpdateModelConfig(model_config); auto yaml_path{ggufDownloadItem.localPath}; @@ -284,8 +286,13 @@ cpp::result ModelService::HandleDownloadUrlAsync( }}}}; auto on_finished = [author, temp_name](const DownloadTask& finishedTask) { + // Sum downloadedBytes from all items + uint64_t model_size = 0; + for (const auto& item : finishedTask.items) { + model_size = model_size + item.bytes.value_or(0); + } auto gguf_download_item = finishedTask.items[0]; - ParseGguf(gguf_download_item, author, temp_name); + ParseGguf(gguf_download_item, author, temp_name, model_size); }; downloadTask.id = unique_model_id; @@ -349,8 +356,13 @@ cpp::result ModelService::HandleUrl( }}}}; auto on_finished = [author](const DownloadTask& finishedTask) { + // Sum downloadedBytes from all items + uint64_t model_size = 0; + for (const auto& item : finishedTask.items) { + model_size = model_size + item.bytes.value_or(0); + } auto gguf_download_item = finishedTask.items[0]; - ParseGguf(gguf_download_item, author, std::nullopt); + ParseGguf(gguf_download_item, author, std::nullopt, model_size); }; auto result = download_service_->AddDownloadTask(downloadTask, on_finished);