Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add ML Stats APIs #794

Merged
merged 3 commits into from
Jan 15, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
- Added `GET /_plugins/_ml/models/{model_id}`, `POST /_plugins/_ml/models/_search`, `POST /_plugins/_ml/models/_unload`, `_undeploy`, `_upload`, `meta`, `_register_meta`, `POST /_plugins/_ml/models/{model_id}/_load`, `_predict`, `_unload`, `chunk/{chunk_number}`, `upload_chunk/{chunk_number}`, and `PUT /_plugins/_ml/models/{model_id}` ([#733](https://github.com/opensearch-project/opensearch-api-specification/pull/733))
- Added `GET`, `POST`, `PUT`, `DELETE /_plugins/_ml/controllers/{model_id}` ([#779](https://github.com/opensearch-project/opensearch-api-specification/pull/779))
- Added `GET /_plugins/_ml/profile`, `GET /_plugins/_ml/profile/models`, `models/{model_id}`, `tasks`, `tasks/{task_id}` ([#787](https://github.com/opensearch-project/opensearch-api-specification/pull/787))
- Added `GET /_plugins/_ml/stats/`, `stats/{stat}`, `{nodeId}/stats/`, `{nodeId}/stats/{stat}` ([#794](https://github.com/opensearch-project/opensearch-api-specification/pull/794))

### Removed
- Removed unsupported `_common.mapping:SourceField`'s `mode` field and associated `_common.mapping:SourceFieldMode` enum ([#652](https://github.com/opensearch-project/opensearch-api-specification/pull/652))
Expand Down
68 changes: 68 additions & 0 deletions spec/namespaces/ml.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -689,6 +689,49 @@ paths:
responses:
'200':
$ref: '#/components/responses/ml.get_profile_tasks@200'
/_plugins/_ml/stats:
get:
operationId: ml.get_stats.0
x-operation-group: ml.get_stats
x-version-added: '1.3'
description: Get stats.
responses:
'200':
$ref: '#/components/responses/ml.get_stats@200'
/_plugins/_ml/stats/{stat}:
get:
operationId: ml.get_stats.1
x-operation-group: ml.get_stats
x-version-added: '1.3'
description: Get stats.
parameters:
- $ref: '#/components/parameters/ml.get_stats::path.stat'
responses:
'200':
$ref: '#/components/responses/ml.get_stats@200'
/_plugins/_ml/{node_id}/stats/:
get:
operationId: ml.get_stats.2
x-operation-group: ml.get_stats
x-version-added: '1.3'
description: Get stats.
parameters:
- $ref: '#/components/parameters/ml.get_stats::path.node_id'
responses:
'200':
$ref: '#/components/responses/ml.get_stats@200'
/_plugins/_ml/{node_id}/stats/{stat}:
get:
operationId: ml.get_stats.3
x-operation-group: ml.get_stats
x-version-added: '1.3'
description: Get stats.
parameters:
- $ref: '#/components/parameters/ml.get_stats::path.node_id'
- $ref: '#/components/parameters/ml.get_stats::path.stat'
responses:
'200':
$ref: '#/components/responses/ml.get_stats@200'
components:
requestBodies:
ml.register_model_group:
Expand Down Expand Up @@ -1660,6 +1703,11 @@ components:
application/json:
schema:
$ref: '../schemas/ml._common.yaml#/components/schemas/GetProfileResponse'
ml.get_stats@200:
content:
application/json:
schema:
$ref: '../schemas/ml._common.yaml#/components/schemas/GetStatsResponse'
parameters:
ml.get_model_group::path.model_group_id:
name: model_group_id
Expand Down Expand Up @@ -1901,5 +1949,25 @@ components:
name: task_id
in: path
required: true
schema:
type: string
ml.get_stats::path.stat:
name: stat
in: path
required: true
schema:
type: string
enum:
- ml_config_index_status
- ml_connector_count
- ml_connector_index_status
- ml_controller_index_status
- ml_model_count
- ml_model_index_status
- ml_task_index_status
ml.get_stats::path.node_id:
name: node_id
in: path
required: true
schema:
type: string
142 changes: 141 additions & 1 deletion spec/schemas/ml._common.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1333,6 +1333,16 @@ components:
type: integer
format: int64
description: The estimated memory size in GPU.
deploy:
$ref: '#/components/schemas/Deploy'
register:
$ref: '#/components/schemas/Register'
undeploy:
$ref: '#/components/schemas/Undeploy'
predict:
$ref: '#/components/schemas/Predict'
train:
$ref: '#/components/schemas/Train'
PredictRequestStats:
type: object
properties:
Expand Down Expand Up @@ -1367,4 +1377,134 @@ components:
Tasks:
type: object
additionalProperties:
$ref: '#/components/schemas/Task'
$ref: '#/components/schemas/Task'
GetStatsResponse:
type: object
properties:
ml_model_count:
type: integer
format: int64
description: The model count.
ml_connector_index_status:
type: string
description: The connector index status.
enum:
- green
- non-existent
- red
- yellow
ml_config_index_status:
type: string
description: The config index status.
enum:
- green
- non-existent
- red
- yellow
ml_task_index_status:
type: string
description: The task index status.
enum:
- green
- non-existent
- red
- yellow
ml_connector_count:
type: integer
format: int64
description: The connector count.
ml_model_index_status:
type: string
description: The model index status.
enum:
- green
- non-existent
- red
- yellow
ml_controller_index_status:
type: string
description: The controller index status.
enum:
- green
- non-existent
- red
- yellow
nodes:
$ref: '#/components/schemas/NodeStats'
NodeStats:
type: object
additionalProperties:
$ref: '#/components/schemas/NodeStatsDetails'
NodeStatsDetails:
type: object
properties:
ml_deployed_model_count:
type: integer
format: int64
description: The deployed model count.
ml_jvm_heap_usage:
type: integer
format: int64
description: The JVM heap usage.
ml_failure_count:
type: integer
format: int64
description: The failure count.
ml_executing_task_count:
type: integer
format: int64
description: The executing task count.
ml_circuit_breaker_trigger_count:
type: integer
format: int64
description: The circuit breaker trigger count.
ml_request_count:
type: integer
format: int64
description: The request count.
algorithms:
$ref: '#/components/schemas/Algorithms'
models:
$ref: '#/components/schemas/Models'
Algorithms:
type: object
additionalProperties:
$ref: '#/components/schemas/AlgorithmOperations'
AlgorithmOperations:
type: object
properties:
deploy:
$ref: '#/components/schemas/Deploy'
register:
$ref: '#/components/schemas/Register'
undeploy:
$ref: '#/components/schemas/Undeploy'
predict:
$ref: '#/components/schemas/Predict'
train:
$ref: '#/components/schemas/Train'
ModelStasts:
type: object
properties:
ml_action_request_count:
type: integer
format: int64
description: The request count.
ml_action_failure_count:
type: integer
format: int64
description: The failure count.
ml_executing_task_count:
type: integer
format: int64
description: The executing task count.
Deploy:
$ref: '#/components/schemas/ModelStasts'
Register:
$ref: '#/components/schemas/ModelStasts'
Undeploy:
$ref: '#/components/schemas/ModelStasts'
Predict:
$ref: '#/components/schemas/ModelStasts'
Train:
$ref: '#/components/schemas/ModelStasts'
77 changes: 77 additions & 0 deletions tests/plugins/ml/ml/stats.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
$schema: ../../../../json_schemas/test_story.schema.yaml

description: Test the retrieval of the statistics.
version: '>= 2.7'
warnings:
multiple-paths-detected: false
prologues:
- path: /_cluster/settings
method: PUT
request:
payload:
persistent:
plugins.ml_commons.jvm_heap_memory_threshold: 100
- path: /_plugins/_ml/models/_register
id: register_model
method: POST
request:
payload:
name: huggingface/sentence-transformers/msmarco-distilbert-base-tas-b
version: 1.0.1
model_format: TORCH_SCRIPT
output:
task_id: payload.task_id
- path: /_plugins/_ml/tasks/{task_id}
id: get_completed_task
method: GET
parameters:
task_id: ${register_model.task_id}
retry:
count: 3
wait: 10000
response:
status: 200
payload:
state: COMPLETED
output:
model_id: payload.model_id
node_id: payload.worker_node[0]
epilogues:
- path: /_plugins/_ml/models/{model_id}
parameters:
model_id: ${get_completed_task.model_id}
method: DELETE
status: [200, 404]
- path: /_plugins/_ml/tasks/{task_id}
parameters:
task_id: ${register_model.task_id}
method: DELETE
status: [200, 404]
chapters:
- synopsis: Get all stats for all nodes.
path: /_plugins/_ml/stats
method: GET
response:
status: 200
- synopsis: Get a specified stat for all nodes.
path: /_plugins/_ml/stats/{stat}
method: GET
parameters:
stat: ml_model_index_status
response:
status: 200
- synopsis: Get all stats for a specific node.
path: /_plugins/_ml/{node_id}/stats/
method: GET
parameters:
node_id: ${get_completed_task.node_id}
response:
status: 200
- synopsis: Get a specified stat for a specific node.
path: /_plugins/_ml/{node_id}/stats/{stat}
method: GET
parameters:
node_id: ${get_completed_task.node_id}
stat: ml_task_index_status
response:
status: 200
Loading