From ffac2418186029ecf0dbc300fa208acb06ad001c Mon Sep 17 00:00:00 2001 From: Thomas Farr Date: Mon, 6 Jan 2025 16:21:33 +1300 Subject: [PATCH] Fix ingest schemas Signed-off-by: Thomas Farr --- CHANGELOG.md | 1 + spec/schemas/ingest._common.yaml | 108 +++--------------------------- spec/schemas/ingest.simulate.yaml | 1 - spec/schemas/nodes.info.yaml | 30 +++++++-- 4 files changed, 33 insertions(+), 107 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 42c3ff268..941a60252 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -88,6 +88,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) - Fixed cluster API schemas ([#754](https://github.com/opensearch-project/opensearch-api-specification/pull/754)) - Fixed schema validation for invalid properties in `retry` configuration ([#758](https://github.com/opensearch-project/opensearch-api-specification/pull/758)) - Fixed nodes API schemas ([#761](https://github.com/opensearch-project/opensearch-api-specification/pull/761)) +- Fixed ingest API schemas ([#766](https://github.com/opensearch-project/opensearch-api-specification/pull/766)) ### Changed - Changed `tasks._common:TaskInfo` and `tasks._common:TaskGroup` to be composed of a `tasks._common:TaskInfoBase` ([#683](https://github.com/opensearch-project/opensearch-api-specification/pull/683)) diff --git a/spec/schemas/ingest._common.yaml b/spec/schemas/ingest._common.yaml index 735ad690a..eaaded4c2 100644 --- a/spec/schemas/ingest._common.yaml +++ b/spec/schemas/ingest._common.yaml @@ -45,8 +45,6 @@ components: $ref: '#/components/schemas/DateIndexNameProcessor' dot_expander: $ref: '#/components/schemas/DotExpanderProcessor' - enrich: - $ref: '#/components/schemas/EnrichProcessor' fail: $ref: '#/components/schemas/FailProcessor' foreach: @@ -97,8 +95,6 @@ components: $ref: '#/components/schemas/DropProcessor' circle: $ref: '#/components/schemas/CircleProcessor' - inference: - $ref: '#/components/schemas/InferenceProcessor' text_embedding: $ref: '#/components/schemas/TextEmbeddingProcessor' minProperties: 1 @@ -117,7 +113,8 @@ components: description: |- The number of chars being used for extraction to prevent huge fields. Use `-1` for no limit. - type: number + type: integer + format: int64 indexed_chars_field: $ref: '_common.yaml#/components/schemas/Field' properties: @@ -170,8 +167,7 @@ components: value: description: The value to be appended. Supports template snippets. type: array - items: - type: object + items: {} allow_duplicates: description: If `false`, the processor does not append values already present in the field. type: boolean @@ -188,7 +184,6 @@ components: Value used to fill empty fields. Empty fields are skipped if this is not provided. An empty field is one with no value (2 consecutive separators) or empty quotes (`""`). - type: object field: $ref: '_common.yaml#/components/schemas/Field' ignore_missing: @@ -319,38 +314,6 @@ components: type: string required: - field - EnrichProcessor: - allOf: - - $ref: '#/components/schemas/ProcessorBase' - - type: object - properties: - field: - $ref: '_common.yaml#/components/schemas/Field' - ignore_missing: - description: If `true` and `field` does not exist, the processor quietly exits without modifying the document. - type: boolean - max_matches: - description: |- - The maximum number of matched documents to include under the configured target field. - The `target_field` will be turned into a JSON array if `max_matches` is higher than 1, otherwise `target_field` will become a JSON object. - In order to avoid documents getting too large, the maximum allowed value is 128. - type: number - override: - description: |- - If processor will update fields with pre-existing non-null-valued field. - When set to `false`, such fields will not be touched. - type: boolean - policy_name: - description: The name of the enrich policy to use. - type: string - shape_relation: - $ref: '_common.yaml#/components/schemas/GeoShapeRelation' - target_field: - $ref: '_common.yaml#/components/schemas/Field' - required: - - field - - policy_name - - target_field FailProcessor: allOf: - $ref: '#/components/schemas/ProcessorBase' @@ -647,7 +610,6 @@ components: The value to be set for the field. Supports template snippets. May specify only one of `value` or `copy_from`. - type: object required: - field SortProcessor: @@ -797,6 +759,7 @@ components: error_distance: description: The difference between the resulting inscribed distance from center to side and the circle's radius (measured in meters for `geo_shape`, unit-less for `shape`). type: number + format: double field: $ref: '_common.yaml#/components/schemas/Field' ignore_missing: @@ -814,62 +777,7 @@ components: type: string enum: - geo_shape - - shape - InferenceProcessor: - allOf: - - $ref: '#/components/schemas/ProcessorBase' - - type: object - properties: - model_id: - $ref: '_common.yaml#/components/schemas/Id' - target_field: - $ref: '_common.yaml#/components/schemas/Field' - field_map: - description: |- - Maps the document field names to the known field names of the model. - This mapping takes precedence over any default mappings provided in the model configuration. - type: object - additionalProperties: - type: object - inference_config: - $ref: '#/components/schemas/InferenceConfig' - required: - - model_id - InferenceConfig: - type: object - properties: - regression: - $ref: '#/components/schemas/InferenceConfigRegression' - classification: - $ref: '#/components/schemas/InferenceConfigClassification' - minProperties: 1 - maxProperties: 1 - InferenceConfigRegression: - type: object - properties: - results_field: - $ref: '_common.yaml#/components/schemas/Field' - num_top_feature_importance_values: - description: Specifies the maximum number of feature importance values per document. - type: number - InferenceConfigClassification: - type: object - properties: - num_top_classes: - description: Specifies the number of top class predictions to return. - type: number - num_top_feature_importance_values: - description: Specifies the maximum number of feature importance values per document. - type: number - results_field: - $ref: '_common.yaml#/components/schemas/Field' - top_classes_results_field: - $ref: '_common.yaml#/components/schemas/Field' - prediction_field_type: - description: |- - Specifies the type of the predicted field to write. - Valid values are: `string`, `number`, `boolean`. - type: string + - xy_shape TextEmbeddingProcessor: allOf: - $ref: '#/components/schemas/ProcessorBase' @@ -883,9 +791,9 @@ components: type: object additionalProperties: type: string - description: - type: string - description: A brief description of the processor. + batch_size: + type: integer + format: int32 required: - field_map - model_id diff --git a/spec/schemas/ingest.simulate.yaml b/spec/schemas/ingest.simulate.yaml index 3a23f027b..62813a0d4 100644 --- a/spec/schemas/ingest.simulate.yaml +++ b/spec/schemas/ingest.simulate.yaml @@ -15,7 +15,6 @@ components: $ref: '_common.yaml#/components/schemas/IndexName' _source: description: JSON body for the document. - type: object required: - _source PipelineSimulation: diff --git a/spec/schemas/nodes.info.yaml b/spec/schemas/nodes.info.yaml index df6eaa8cb..6af21c61c 100644 --- a/spec/schemas/nodes.info.yaml +++ b/spec/schemas/nodes.info.yaml @@ -384,7 +384,6 @@ components: max_local_storage_nodes: type: string required: - - attr - name NodeInfoPath: type: object @@ -443,7 +442,7 @@ components: type: object properties: type: - type: string + $ref: '#/components/schemas/NodeInfoSettingsHttpType' type.default: type: string compression: @@ -452,6 +451,17 @@ components: $ref: '_common.yaml#/components/schemas/StringifiedInteger' required: - type + NodeInfoSettingsHttpType: + oneOf: + - title: type + type: string + - title: config + $ref: '#/components/schemas/NodeInfoSettingsHttpTypeConfig' + NodeInfoSettingsHttpTypeConfig: + type: object + properties: + default: + type: string NodeInfoBootstrap: type: object properties: @@ -463,11 +473,22 @@ components: type: object properties: type: - type: string + $ref: '#/components/schemas/NodeInfoSettingsTransportType' type.default: type: string required: - type + NodeInfoSettingsTransportType: + oneOf: + - title: type + type: string + - title: config + $ref: '#/components/schemas/NodeInfoSettingsTransportTypeConfig' + NodeInfoSettingsTransportTypeConfig: + type: object + properties: + default: + type: string NodeInfoSettingsNetwork: type: object properties: @@ -684,6 +705,3 @@ components: type: array items: $ref: '#/components/schemas/NodeInfoIngestProcessor' - required: - - request_processors - - response_processors