diff --git a/elasticsearch/_async/client/__init__.py b/elasticsearch/_async/client/__init__.py index 25f832f5d..d1ff463f1 100644 --- a/elasticsearch/_async/client/__init__.py +++ b/elasticsearch/_async/client/__init__.py @@ -628,6 +628,7 @@ async def bulk( error_trace: t.Optional[bool] = None, filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, human: t.Optional[bool] = None, + include_source_on_error: t.Optional[bool] = None, list_executed_pipelines: t.Optional[bool] = None, pipeline: t.Optional[str] = None, pretty: t.Optional[bool] = None, @@ -735,6 +736,8 @@ async def bulk( :param operations: :param index: The name of the data stream, index, or index alias to perform bulk actions on. + :param include_source_on_error: True or false if to include the document source + in the error message in case of parsing errors. :param list_executed_pipelines: If `true`, the response will include the ingest pipelines that were run for each index or create. :param pipeline: The pipeline identifier to use to preprocess incoming documents. @@ -792,6 +795,8 @@ async def bulk( __query["filter_path"] = filter_path if human is not None: __query["human"] = human + if include_source_on_error is not None: + __query["include_source_on_error"] = include_source_on_error if list_executed_pipelines is not None: __query["list_executed_pipelines"] = list_executed_pipelines if pipeline is not None: @@ -984,8 +989,8 @@ async def count(
Count search results. Get the number of documents matching a query.
-The query can either be provided using a simple query string as a parameter or using the Query DSL defined within the request body.
- The latter must be nested in a query
key, which is the same as the search API.
The query can be provided either by using a simple query string as a parameter, or by defining Query DSL within the request body.
+ The query is optional. When no query is provided, the API uses match_all
to count all the documents.
The count API supports multi-target syntax. You can run a single count API search across multiple data streams and indices.
The operation is broadcast across all shards.
For each shard ID group, a replica is chosen and the search is run against it.
@@ -1027,10 +1032,10 @@ async def count(
in the result.
:param preference: The node or shard the operation should be performed on. By
default, it is random.
- :param q: The query in Lucene query string syntax.
- :param query: Defines the search definition using the Query DSL. The query is
- optional, and when not provided, it will use `match_all` to count all the
- docs.
+ :param q: The query in Lucene query string syntax. This parameter cannot be used
+ with a request body.
+ :param query: Defines the search query using Query DSL. A request body query
+ cannot be used with the `q` query string parameter.
:param routing: A custom value used to route operations to a specific shard.
:param terminate_after: The maximum number of documents to collect for each shard.
If a query reaches this limit, Elasticsearch terminates the query early.
@@ -1116,6 +1121,7 @@ async def create(
error_trace: t.Optional[bool] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
human: t.Optional[bool] = None,
+ include_source_on_error: t.Optional[bool] = None,
pipeline: t.Optional[str] = None,
pretty: t.Optional[bool] = None,
refresh: t.Optional[
@@ -1198,6 +1204,8 @@ async def create(
:param id: A unique identifier for the document. To automatically generate a
document ID, use the `POST /
Explain a document match result. - Returns information about why a specific document matches, or doesn’t match, a query.
+ Get information about why a specific document matches, or doesn't match, a query. + It computes a score explanation for a query and a specific document. `Get a list of supported script contexts and their methods.
- `Get a list of available script types, languages, and contexts.
- `The kNN search API supports restricting the search using a filter. The search will return the top k documents that also match the filter query.
+A kNN search response has the exact same structure as a search API response. + However, certain sections have a meaning specific to kNN search:
+_score
is determined by the similarity between the query and document vector.hits.total
object contains the total number of nearest neighbor candidates considered, which is num_candidates * num_shards
. The hits.total.relation
will always be eq
, indicating an exact value.Get multiple JSON documents by ID from one or more indices. If you specify an index in the request URI, you only need to specify the document IDs in the request body. To ensure fast responses, this multi get (mget) API responds with partial results if one or more shards fail.
+Filter source fields
+By default, the _source
field is returned for every document (if stored).
+ Use the _source
and _source_include
or source_exclude
attributes to filter what fields are returned for a particular document.
+ You can include the _source
, _source_includes
, and _source_excludes
query parameters in the request URI to specify the defaults to use when there are no per-document instructions.
Get stored fields
+Use the stored_fields
attribute to specify the set of stored fields you want to retrieve.
+ Any requested fields that are not stored are ignored.
+ You can include the stored_fields
query parameter in the request URI to specify the defaults to use when there are no per-document instructions.
Run multiple templated searches.
+Run multiple templated searches with a single request.
+ If you are providing a text file or text input to curl
, use the --data-binary
flag instead of -d
to preserve newlines.
+ For example:
$ cat requests
+ { "index": "my-index" }
+ { "id": "my-search-template", "params": { "query_string": "hello world", "from": 0, "size": 10 }}
+ { "index": "my-other-index" }
+ { "id": "my-other-search-template", "params": { "query_type": "match_all" }}
+
+ $ curl -H "Content-Type: application/x-ndjson" -XGET localhost:9200/_msearch/template --data-binary "@requests"; echo
+
- `Get multiple term vectors.
-You can specify existing documents by index and ID or provide artificial documents in the body of the request. +
Get multiple term vectors with a single request.
+ You can specify existing documents by index and ID or provide artificial documents in the body of the request.
You can specify the index in the request body or request URI.
The response contains a docs
array with all the fetched termvectors.
Each element has the structure provided by the termvectors API.
Artificial documents
+You can also use mtermvectors
to generate term vectors for artificial documents provided in the body of the request.
+ The mapping used is determined by the specified _index
.
Run a script. - Runs a script and returns a result.
+Run a script.
+Runs a script and returns a result. + Use this API to build and test scripts, such as when defining a script for a runtime field. + This API requires very few dependencies and is especially useful if you don't have permissions to write documents on a cluster.
+The API uses several contexts, which control how scripts are run, what variables are available at runtime, and what the return type is.
+Each context requires a script, but additional parameters depend on the context you're using for that script.
`IMPORTANT: Results from a scrolling search reflect the state of the index at the time of the initial search request. Subsequent indexing or document changes only affect later search and scroll requests.
- `Search a vector tile.
-Search a vector tile for geospatial values.
+Search a vector tile for geospatial values. + Before using this API, you should be familiar with the Mapbox vector tile specification. + The API returns results as a binary mapbox vector tile.
+Internally, Elasticsearch translates a vector tile search API request into a search containing:
+geo_bounding_box
query on the <field>
. The query uses the <zoom>/<x>/<y>
tile as a bounding box.geotile_grid
or geohex_grid
aggregation on the <field>
. The grid_agg
parameter determines the aggregation type. The aggregation uses the <zoom>/<x>/<y>
tile as a bounding box.geo_bounds
aggregation on the <field>
. The search only includes this aggregation if the exact_bounds
parameter is true
.with_labels
is true
, the internal search will include a dynamic runtime field that calls the getLabelPosition
function of the geometry doc value. This enables the generation of new point features containing suggested geometry labels, so that, for example, multi-polygons will have only one label.For example, Elasticsearch may translate a vector tile search API request with a grid_agg
argument of geotile
and an exact_bounds
argument of true
into the following search
GET my-index/_search
+ {
+ "size": 10000,
+ "query": {
+ "geo_bounding_box": {
+ "my-geo-field": {
+ "top_left": {
+ "lat": -40.979898069620134,
+ "lon": -45
+ },
+ "bottom_right": {
+ "lat": -66.51326044311186,
+ "lon": 0
+ }
+ }
+ }
+ },
+ "aggregations": {
+ "grid": {
+ "geotile_grid": {
+ "field": "my-geo-field",
+ "precision": 11,
+ "size": 65536,
+ "bounds": {
+ "top_left": {
+ "lat": -40.979898069620134,
+ "lon": -45
+ },
+ "bottom_right": {
+ "lat": -66.51326044311186,
+ "lon": 0
+ }
+ }
+ }
+ },
+ "bounds": {
+ "geo_bounds": {
+ "field": "my-geo-field",
+ "wrap_longitude": false
+ }
+ }
+ }
+ }
+
+ The API returns results as a binary Mapbox vector tile. + Mapbox vector tiles are encoded as Google Protobufs (PBF). By default, the tile contains three layers:
+hits
layer containing a feature for each <field>
value matching the geo_bounding_box
query.aggs
layer containing a feature for each cell of the geotile_grid
or geohex_grid
. The layer only contains features for cells with matching data.geotile_grid
or geohex_grid
.The API only returns features that can display at its zoom level. + For example, if a polygon feature has no area at its zoom level, the API omits it. + The API returns errors as UTF-8 encoded JSON.
+IMPORTANT: You can specify several options for this API as either a query parameter or request body parameter. + If you specify both parameters, the query parameter takes precedence.
+Grid precision for geotile
+For a grid_agg
of geotile
, you can use cells in the aggs
layer as tiles for lower zoom levels.
+ grid_precision
represents the additional zoom levels available through these cells. The final precision is computed by as follows: <zoom> + grid_precision
.
+ For example, if <zoom>
is 7 and grid_precision
is 8, then the geotile_grid
aggregation will use a precision of 15.
+ The maximum final precision is 29.
+ The grid_precision
also determines the number of cells for the grid as follows: (2^grid_precision) x (2^grid_precision)
.
+ For example, a value of 8 divides the tile into a grid of 256 x 256 cells.
+ The aggs
layer only contains features for cells with matching data.
Grid precision for geohex
+For a grid_agg
of geohex
, Elasticsearch uses <zoom>
and grid_precision
to calculate a final precision as follows: <zoom> + grid_precision
.
This precision determines the H3 resolution of the hexagonal cells produced by the geohex
aggregation.
+ The following table maps the H3 resolution for each precision.
+ For example, if <zoom>
is 3 and grid_precision
is 3, the precision is 6.
+ At a precision of 6, hexagonal cells have an H3 resolution of 2.
+ If <zoom>
is 3 and grid_precision
is 4, the precision is 7.
+ At a precision of 7, hexagonal cells have an H3 resolution of 3.
Precision | +Unique tile bins | +H3 resolution | +Unique hex bins | +Ratio | +
---|---|---|---|---|
1 | +4 | +0 | +122 | +30.5 | +
2 | +16 | +0 | +122 | +7.625 | +
3 | +64 | +1 | +842 | +13.15625 | +
4 | +256 | +1 | +842 | +3.2890625 | +
5 | +1024 | +2 | +5882 | +5.744140625 | +
6 | +4096 | +2 | +5882 | +1.436035156 | +
7 | +16384 | +3 | +41162 | +2.512329102 | +
8 | +65536 | +3 | +41162 | +0.6280822754 | +
9 | +262144 | +4 | +288122 | +1.099098206 | +
10 | +1048576 | +4 | +288122 | +0.2747745514 | +
11 | +4194304 | +5 | +2016842 | +0.4808526039 | +
12 | +16777216 | +6 | +14117882 | +0.8414913416 | +
13 | +67108864 | +6 | +14117882 | +0.2103728354 | +
14 | +268435456 | +7 | +98825162 | +0.3681524172 | +
15 | +1073741824 | +8 | +691776122 | +0.644266719 | +
16 | +4294967296 | +8 | +691776122 | +0.1610666797 | +
17 | +17179869184 | +9 | +4842432842 | +0.2818666889 | +
18 | +68719476736 | +10 | +33897029882 | +0.4932667053 | +
19 | +274877906944 | +11 | +237279209162 | +0.8632167343 | +
20 | +1099511627776 | +11 | +237279209162 | +0.2158041836 | +
21 | +4398046511104 | +12 | +1660954464122 | +0.3776573213 | +
22 | +17592186044416 | +13 | +11626681248842 | +0.6609003122 | +
23 | +70368744177664 | +13 | +11626681248842 | +0.165225078 | +
24 | +281474976710656 | +14 | +81386768741882 | +0.2891438866 | +
25 | +1125899906842620 | +15 | +569707381193162 | +0.5060018015 | +
26 | +4503599627370500 | +15 | +569707381193162 | +0.1265004504 | +
27 | +18014398509482000 | +15 | +569707381193162 | +0.03162511259 | +
28 | +72057594037927900 | +15 | +569707381193162 | +0.007906278149 | +
29 | +288230376151712000 | +15 | +569707381193162 | +0.001976569537 | +
Hexagonal cells don't align perfectly on a vector tile. + Some cells may intersect more than one vector tile. + To compute the H3 resolution for each precision, Elasticsearch compares the average density of hexagonal bins at each resolution with the average density of tile bins at each zoom level. + Elasticsearch uses the H3 resolution that is closest to the corresponding geotile density.
`Get the search shards.
Get the indices and shards that a search request would be run against. This information can be useful for working out issues or planning optimizations with routing and shard preferences. - When filtered aliases are used, the filter is returned as part of the indices section.
+ When filtered aliases are used, the filter is returned as part of theindices
section.
+ If the Elasticsearch security features are enabled, you must have the view_index_metadata
or manage
index privilege for the target data stream, index, or alias.
Run a search with a search template.
- `Get terms in an index.
Discover terms that match a partial string in an index. - This "terms enum" API is designed for low-latency look-ups used in auto-complete scenarios.
-If the complete
property in the response is false, the returned terms set may be incomplete and should be treated as approximate.
- This can occur due to a few reasons, such as a request timeout or a node error.
NOTE: The terms enum API may return terms from deleted documents. Deleted documents are initially only marked as deleted. It is not until their segments are merged that documents are actually deleted. Until that happens, the terms enum API will return terms from these documents.
+ This API is designed for low-latency look-ups used in auto-complete scenarios. ++`info + The terms enum API may return terms from deleted documents. Deleted documents are initially only marked as deleted. It is not until their segments are merged that documents are actually deleted. Until that happens, the terms enum API will return terms from these documents.
+
Get term vector information.
Get information and statistics about terms in the fields of a particular document.
+You can retrieve term vectors for documents stored in the index or for artificial documents passed in the body of the request.
+ You can specify the fields you are interested in through the fields
parameter or by adding the fields to the request body.
+ For example:
GET /my-index-000001/_termvectors/1?fields=message
+
+ Fields can be specified using wildcards, similar to the multi match query.
+Term vectors are real-time by default, not near real-time.
+ This can be changed by setting realtime
parameter to false
.
You can request three types of values: term information, term statistics, and field statistics. + By default, all term information and field statistics are returned for all fields but term statistics are excluded.
+Term information
+positions: true
)offsets: true
)payloads: true
), as base64 encoded bytesIf the requested information wasn't stored in the index, it will be computed on the fly if possible. + Additionally, term vectors could be computed for documents not even existing in the index, but instead provided by the user.
+++warn + Start and end offsets assume UTF-16 encoding is being used. If you want to use these offsets in order to get the original text that produced this token, you should make sure that the string you are taking a sub-string of is also encoded using UTF-16.
+
Behaviour
+The term and field statistics are not accurate.
+ Deleted documents are not taken into account.
+ The information is only retrieved for the shard the requested document resides in.
+ The term and field statistics are therefore only useful as relative measures whereas the absolute numbers have no meaning in this context.
+ By default, when requesting term vectors of artificial documents, a shard to get the statistics from is randomly selected.
+ Use routing
only to hit a particular shard.
Update documents. Updates documents that match the specified query. If no query is specified, performs an update on every document in the data stream or index without modifying the source, which is useful for picking up mapping changes.
+If the Elasticsearch security features are enabled, you must have the following index privileges for the target data stream, index, or alias:
+read
index
or write
You can specify the query criteria in the request URI or the request body using the same syntax as the search API.
+When you submit an update by query request, Elasticsearch gets a snapshot of the data stream or index when it begins processing the request and updates matching documents using internal versioning.
+ When the versions match, the document is updated and the version number is incremented.
+ If a document changes between the time that the snapshot is taken and the update operation is processed, it results in a version conflict and the operation fails.
+ You can opt to count version conflicts instead of halting and returning by setting conflicts
to proceed
.
+ Note that if you opt to count version conflicts, the operation could attempt to update more documents from the source than max_docs
until it has successfully updated max_docs
documents or it has gone through every document in the source query.
NOTE: Documents with a version equal to 0 cannot be updated using update by query because internal versioning does not support 0 as a valid version number.
+While processing an update by query request, Elasticsearch performs multiple search requests sequentially to find all of the matching documents. + A bulk update request is performed for each batch of matching documents. + Any query or update failures cause the update by query request to fail and the failures are shown in the response. + Any update requests that completed successfully still stick, they are not rolled back.
+Throttling update requests
+To control the rate at which update by query issues batches of update operations, you can set requests_per_second
to any positive decimal number.
+ This pads each batch with a wait time to throttle the rate.
+ Set requests_per_second
to -1
to turn off throttling.
Throttling uses a wait time between batches so that the internal scroll requests can be given a timeout that takes the request padding into account.
+ The padding time is the difference between the batch size divided by the requests_per_second
and the time spent writing.
+ By default the batch size is 1000, so if requests_per_second
is set to 500
:
target_time = 1000 / 500 per second = 2 seconds
+ wait_time = target_time - write_time = 2 seconds - .5 seconds = 1.5 seconds
+
+ Since the batch is issued as a single _bulk request, large batch sizes cause Elasticsearch to create many requests and wait before starting the next set. + This is "bursty" instead of "smooth".
+Slicing
+Update by query supports sliced scroll to parallelize the update process. + This can improve efficiency and provide a convenient way to break the request down into smaller parts.
+Setting slices
to auto
chooses a reasonable number for most data streams and indices.
+ This setting will use one slice per shard, up to a certain limit.
+ If there are multiple source data streams or indices, it will choose the number of slices based on the index or backing index with the smallest number of shards.
Adding slices
to _update_by_query
just automates the manual process of creating sub-requests, which means it has some quirks:
slices
only contains the status of completed slices.slices
will rethrottle the unfinished sub-request proportionally.requests_per_second
and max_docs
on a request with slices are distributed proportionally to each sub-request. Combine that with the point above about distribution being uneven and you should conclude that using max_docs
with slices
might not result in exactly max_docs
documents being updated.If you're slicing manually or otherwise tuning automatic slicing, keep in mind that:
+Whether query or update performance dominates the runtime depends on the documents being reindexed and cluster resources.
+Update the document source
+Update by query supports scripts to update the document source.
+ As with the update API, you can set ctx.op
to change the operation that is performed.
Set ctx.op = "noop"
if your script decides that it doesn't have to make any changes.
+ The update by query operation skips updating the document and increments the noop
counter.
Set ctx.op = "delete"
if your script decides that the document should be deleted.
+ The update by query operation deletes the document and increments the deleted
counter.
Update by query supports only index
, noop
, and delete
.
+ Setting ctx.op
to anything else is an error.
+ Setting any other field in ctx
is an error.
+ This API enables you to only modify the source of matching documents; you cannot move them.
Get the async search status.
Get the status of a previously submitted async search request given its identifier, without retrieving search results.
- If the Elasticsearch security features are enabled, use of this API is restricted to the monitoring_user
role.
monitor
cluster privilege or greater privileges.Delete auto-follow patterns. - Delete a collection of cross-cluster replication auto-follow patterns.
+Delete auto-follow patterns.
+Delete a collection of cross-cluster replication auto-follow patterns.
`Get follower information. - Get information about all cross-cluster replication follower indices. +
Get follower information.
+Get information about all cross-cluster replication follower indices. For example, the results include follower index names, leader index names, replication options, and whether the follower indices are active or paused.
`Get follower stats. - Get cross-cluster replication follower stats. +
Get follower stats.
+Get cross-cluster replication follower stats. The API returns shard-level stats about the "following tasks" associated with each shard for the specified indices.
`Get auto-follow patterns. - Get cross-cluster replication auto-follow patterns.
+Get auto-follow patterns.
+Get cross-cluster replication auto-follow patterns.
`Pause an auto-follow pattern. - Pause a cross-cluster replication auto-follow pattern. +
Pause an auto-follow pattern.
+Pause a cross-cluster replication auto-follow pattern. When the API returns, the auto-follow pattern is inactive. New indices that are created on the remote cluster and match the auto-follow patterns are ignored.
You can resume auto-following with the resume auto-follow pattern API.
@@ -500,9 +507,11 @@ async def pause_auto_follow_pattern(
` Pause a follower.
- Pause a cross-cluster replication follower index.
+ Pause a follower. Pause a cross-cluster replication follower index.
The follower index will not fetch any additional operations from the leader index.
You can resume following with the resume follower API.
You can pause and resume a follower index to change the configuration of the following task. Resume an auto-follow pattern.
- Resume a cross-cluster replication auto-follow pattern that was paused.
+ Resume an auto-follow pattern. Resume a cross-cluster replication auto-follow pattern that was paused.
The auto-follow pattern will resume configuring following indices for newly created indices that match its patterns on the remote cluster.
Remote indices created while the pattern was paused will also be followed unless they have been deleted or closed in the interim. Get cross-cluster replication stats.
- This API returns stats about auto-following and the same shard-level stats as the get follower stats API. Get cross-cluster replication stats. This API returns stats about auto-following and the same shard-level stats as the get follower stats API. Unfollow an index.
- Convert a cross-cluster replication follower index to a regular index.
+ Unfollow an index. Convert a cross-cluster replication follower index to a regular index.
The API stops the following task associated with a follower index and removes index metadata and settings associated with cross-cluster replication.
The follower index must be paused and closed before you call the unfollow API. NOTE: Currently cross-cluster replication does not support converting an existing regular index to a follower index. Converting a follower index to a regular index is an irreversible operation. info
+ Currently cross-cluster replication does not support converting an existing regular index to a follower index. Converting a follower index to a regular index is an irreversible operation. Get remote cluster information.
- Get all of the configured remote cluster information.
- This API returns connection and endpoint information keyed by the configured remote cluster alias. Get remote cluster information. Get information about configured remote clusters.
+ The API returns connection and endpoint information keyed by the configured remote cluster alias. info
+ This API returns information that reflects current state on the local cluster.
+ The
+
`
+
`connected
field does not necessarily reflect whether a remote cluster is down or unavailable, only whether there is currently an open connection to it.
+ Elasticsearch does not spontaneously try to reconnect to a disconnected remote cluster.
+ To trigger a reconnection, attempt a cross-cluster search, ES|QL cross-cluster search, or try the resolve cluster endpoint.
Stop async ES|QL query.
+This API interrupts the query execution and returns the results so far. + If the Elasticsearch security features are enabled, only the user who first submitted the ES|QL query can stop it.
+ + + `IMPORTANT: The features installed on the node you submit this request to are the features that will be reset. Run on the master node if you have any doubts about which plugins are installed on individual nodes.
- `Returns the current global checkpoints for an index. This API is design for internal use by the fleet server project.
+Get global checkpoints.
+Get the current global checkpoints for an index. + This API is designed for internal use by the Fleet server project.
`Get the ILM status. - Get the current index lifecycle management status.
+Get the ILM status.
+Get the current index lifecycle management status.
`Add an index block. - Limits the operations allowed on an index by blocking specific operation types.
+Add an index block.
+Add an index block to an index. + Index blocks limit the operations allowed on an index by blocking specific operation types.
- `Create a data stream. - Creates a data stream. - You must have a matching index template with data stream enabled.
+Create a data stream.
+You must have a matching index template with data stream enabled.
- `Get data stream stats. - Retrieves statistics for one or more data streams.
+Get data stream stats.
+Get statistics for one or more data streams.
- `Check aliases. - Checks if one or more data stream or index aliases exist.
+Check aliases.
+Check if one or more data stream or index aliases exist.
- `Check index templates. - Check whether index templates exist.
+Check index templates.
+Check whether index templates exist.
- `Get data stream lifecycles. - Retrieves the data stream lifecycle configuration of one or more data streams.
+Get data stream lifecycles.
+Get the data stream lifecycle configuration of one or more data streams.
`Get data streams. - Retrieves information about one or more data streams.
+Get data streams.
+Get information about one or more data streams.
- `Resolve the cluster. - Resolve the specified index expressions to return information about each cluster, including the local cluster, if included. - Multiple patterns and remote clusters are supported.
+Resolve the cluster.
+Resolve the specified index expressions to return information about each cluster, including the local "querying" cluster, if included. + If no index expression is provided, the API will return information about all the remote clusters that are configured on the querying cluster.
This endpoint is useful before doing a cross-cluster search in order to determine which remote clusters should be included in a search.
You use the same index expression with this endpoint as you would for cross-cluster search. Index and cluster exclusions are also supported with this endpoint.
For each cluster in the index expression, information is returned about:
remote/info
endpoint.skip_unavailable
as true
or false
.For example, GET /_resolve/cluster/my-index-*,cluster*:my-index-*
returns information about the local cluster and all remotely configured clusters that start with the alias cluster*
.
Each cluster returns information about whether it has any indices, aliases or data streams that match my-index-*
.
Advantages of using this endpoint before a cross-cluster search
+The ability to query without an index expression was added in version 8.18, so when
+ querying remote clusters older than that, the local cluster will send the index
+ expression dummy*
to those remote clusters. Thus, if an errors occur, you may see a reference
+ to that index expression even though you didn't request it. If it causes a problem, you can
+ instead include an index expression like *:*
to bypass the issue.
You may want to exclude a cluster or index from a search when:
skip_unavailable=false
. Running a cross-cluster search under those conditions will cause the entire search to fail._resolve/cluster
response will be present. (This is also where security/permission errors will be shown.)The remote/info
endpoint is commonly used to test whether the "local" cluster (the cluster being queried) is connected to its remote clusters, but it does not necessarily reflect whether the remote cluster is available or not.
+ The remote cluster may be available, while the local cluster is not currently connected to it.
You can use the _resolve/cluster
API to attempt to reconnect to remote clusters.
+ For example with GET _resolve/cluster
or GET _resolve/cluster/*:*
.
+ The connected
field in the response will indicate whether it was successful.
+ If a connection was (re-)established, this will also cause the remote/info
endpoint to now indicate a connected status.
Perform inference on the service
+Perform inference on the service.
+This API enables you to use machine learning models to perform specific tasks on data that you provide as an input. + It returns a response with the results of the tasks. + The inference endpoint you use can perform one specific task that has been defined when the endpoint was created with the create inference API.
++`info + The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.
+
Delete GeoIP database configurations. - Delete one or more IP geolocation database configurations.
+Delete GeoIP database configurations.
+Delete one or more IP geolocation database configurations.
- `Get GeoIP database configurations. - Get information about one or more IP geolocation database configurations.
+Get GeoIP database configurations.
+Get information about one or more IP geolocation database configurations.
- `Get pipelines. - Get information about one or more ingest pipelines. +
Get pipelines.
+Get information about one or more ingest pipelines. This API returns a local reference of the pipeline.
@@ -455,11 +455,11 @@ async def put_geoip_database( """ .. raw:: html -Create or update a GeoIP database configuration. - Refer to the create or update IP geolocation database configuration API.
+Create or update a GeoIP database configuration.
+Refer to the create or update IP geolocation database configuration API.
- `Simulate a pipeline. - Run an ingest pipeline against a set of provided documents. +
Simulate a pipeline.
+Run an ingest pipeline against a set of provided documents. You can either specify an existing pipeline to use with the provided documents or supply a pipeline definition in the body of the request.
`Delete the license. - When the license expires, your subscription level reverts to Basic.
+Delete the license.
+When the license expires, your subscription level reverts to Basic.
If the operator privileges feature is enabled, only operator users can use this API.
`Get license information. - Get information about your Elastic license including its type, its status, when it was issued, and when it expires.
-NOTE: If the master node is generating a new cluster state, the get license API may return a 404 Not Found
response.
+
Get license information.
+Get information about your Elastic license including its type, its status, when it was issued, and when it expires.
++`info + If the master node is generating a new cluster state, the get license API may return a
+404 Not Found
response. If you receive an unexpected 404 response after cluster startup, wait a short period and retry the request.
Update the license. - You can update your license at runtime without shutting down your nodes. +
Update the license.
+You can update your license at runtime without shutting down your nodes. License updates take effect immediately. If the license you are installing does not support all of the features that were available with your previous license, however, you are notified in the response. You must then re-submit the API request with the acknowledge parameter set to true.
@@ -240,9 +243,9 @@ async def post( :param license: :param licenses: A sequence of one or more JSON documents containing the license information. - :param master_timeout: Period to wait for a connection to the master node. - :param timeout: Period to wait for a response. If no response is received before - the timeout expires, the request fails and returns an error. + :param master_timeout: The period to wait for a connection to the master node. + :param timeout: The period to wait for a response. If no response is received + before the timeout expires, the request fails and returns an error. """ __path_parts: t.Dict[str, str] = {} __path = "/_license" @@ -297,8 +300,8 @@ async def post_start_basic( """ .. raw:: html -Start a basic license. - Start an indefinite basic license, which gives access to all the basic features.
+Start a basic license.
+Start an indefinite basic license, which gives access to all the basic features.
NOTE: In order to start a basic license, you must not currently have a basic license.
If the basic license does not support all of the features that are available with your current license, however, you are notified in the response.
You must then re-submit the API request with the acknowledge
parameter set to true
.
Clear trained model deployment cache. - Cache will be cleared on all nodes where the trained model is assigned. +
Clear trained model deployment cache.
+Cache will be cleared on all nodes where the trained model is assigned. A trained model deployment may have an inference cache enabled. As requests are handled by each allocated node, their responses may be cached on that individual node. Calling this API clears the caches without restarting the deployment.
@@ -93,8 +93,8 @@ async def close_job( """ .. raw:: html -Close anomaly detection jobs. - A job can be opened and closed multiple times throughout its lifecycle. A closed job cannot receive data or perform analysis operations, but you can still explore and navigate results. +
Close anomaly detection jobs.
+A job can be opened and closed multiple times throughout its lifecycle. A closed job cannot receive data or perform analysis operations, but you can still explore and navigate results. When you close a job, it runs housekeeping tasks such as pruning the model history, flushing buffers, calculating final results and persisting the model snapshots. Depending upon the size of the job, it could take several minutes to close and the equivalent time to re-open. After it is closed, the job has a minimal overhead on the cluster except for maintaining its meta data. Therefore it is a best practice to close jobs that are no longer required to process data. If you close an anomaly detection job whose datafeed is running, the request first tries to stop the datafeed. This behavior is equivalent to calling stop datafeed API with the same timeout and force parameters as the close job request. When a datafeed that has a specified end date stops, it automatically closes its associated job.
@@ -161,8 +161,8 @@ async def delete_calendar( """ .. raw:: html -Delete a calendar. - Removes all scheduled events from a calendar, then deletes it.
+Delete a calendar.
+Remove all scheduled events from a calendar, then delete it.
`Delete expired ML data. - Deletes all job results, model snapshots and forecast data that have exceeded +
Delete expired ML data.
+Delete all job results, model snapshots and forecast data that have exceeded their retention days period. Machine learning state documents that are not associated with any job are also deleted. You can limit the request to a single or set of anomaly detection jobs by using a job identifier, a group name, a comma-separated list of jobs, or a wildcard expression. You can delete expired data for all anomaly detection - jobs by using _all, by specifying * as the <job_id>, or by omitting the - <job_id>.
+ jobs by using_all
, by specifying *
as the <job_id>
, or by omitting the
+ <job_id>
.
`Delete a filter. - If an anomaly detection job references the filter, you cannot delete the +
Delete a filter.
+If an anomaly detection job references the filter, you cannot delete the filter. You must update or delete the job before you can delete the filter.
@@ -533,8 +533,8 @@ async def delete_forecast( """ .. raw:: html -Delete forecasts from a job. - By default, forecasts are retained for 14 days. You can specify a +
Delete forecasts from a job.
+By default, forecasts are retained for 14 days. You can specify a
different retention period with the expires_in
parameter in the forecast
jobs API. The delete forecast API enables you to delete one or more
forecasts before they expire.
Delete an anomaly detection job. - All job configuration, model state and results are deleted. +
Delete an anomaly detection job.
+All job configuration, model state and results are deleted. It is not currently possible to delete multiple jobs using wildcards or a comma separated list. If you delete a job that has a datafeed, the request first tries to delete the datafeed. This behavior is equivalent to calling @@ -670,8 +670,8 @@ async def delete_model_snapshot( """ .. raw:: html -
Delete a model snapshot. - You cannot delete the active model snapshot. To delete that snapshot, first +
Delete a model snapshot.
+You cannot delete the active model snapshot. To delete that snapshot, first
revert to a different one. To identify the active model snapshot, refer to
the model_snapshot_id
in the results from the get jobs API.
Delete an unreferenced trained model. - The request deletes a trained inference model that is not referenced by an ingest pipeline.
+Delete an unreferenced trained model.
+The request deletes a trained inference model that is not referenced by an ingest pipeline.
`Delete a trained model alias. - This API deletes an existing model alias that refers to a trained model. If +
Delete a trained model alias.
+This API deletes an existing model alias that refers to a trained model. If
the model alias is missing or refers to a model other than the one identified
by the model_id
, this API returns an error.
Estimate job model memory usage. - Makes an estimation of the memory usage for an anomaly detection job model. - It is based on analysis configuration details for the job and cardinality +
Estimate job model memory usage.
+Make an estimation of the memory usage for an anomaly detection job model. + The estimate is based on analysis configuration details for the job and cardinality estimates for the fields it references.
- `Evaluate data frame analytics. - The API packages together commonly used evaluation metrics for various types +
Evaluate data frame analytics.
+The API packages together commonly used evaluation metrics for various types of machine learning features. This has been designed for use on indexes created by data frame analytics. Evaluation requires both a ground truth field and an analytics result field to be present.
@@ -990,8 +990,8 @@ async def explain_data_frame_analytics( """ .. raw:: html -Explain data frame analytics config. - This API provides explanations for a data frame analytics config that either +
Explain data frame analytics config.
+This API provides explanations for a data frame analytics config that either exists already or one that has not been created yet. The following explanations are provided:
Open anomaly detection jobs. - An anomaly detection job must be opened to be ready to receive and analyze +
Open anomaly detection jobs.
+An anomaly detection job must be opened to be ready to receive and analyze data. It can be opened and closed multiple times throughout its lifecycle. When you open a new job, it starts with an empty model. When you open an existing job, the most recent model state is automatically @@ -3082,7 +3082,7 @@ async def preview_data_frame_analytics( .. raw:: html
Preview features used by data frame analytics. - Previews the extracted features used by a data frame analytics config.
+ Preview the extracted features used by a data frame analytics config. `Create an anomaly detection job.
- If you include a datafeed_config
, you must have read index privileges on the source index.
+
Create an anomaly detection job.
+If you include a datafeed_config
, you must have read index privileges on the source index.
If you include a datafeed_config
but do not provide a query, the datafeed uses {"match_all": {"boost": 1}}
.
Upgrade a snapshot. - Upgrades an anomaly detection model snapshot to the latest major version. + Upgrade an anomaly detection model snapshot to the latest major version. Over time, older snapshot formats are deprecated and removed. Anomaly detection jobs support only snapshots that are from the current or previous major version. @@ -5782,7 +5782,7 @@ async def validate_detector(
Validate an anomaly detection job.
- `Get node information. - By default, the API returns all attributes and core settings for cluster nodes.
+Get node information.
+By default, the API returns all attributes and core settings for cluster nodes.
`Delete a search application. - Remove a search application and its associated alias. Indices attached to the search application are not removed.
+Delete a search application.
+Remove a search application and its associated alias. Indices attached to the search application are not removed.
`Delete a transform. - Deletes a transform.
+Delete a transform.
`Get transforms. - Retrieves configuration information for transforms.
+ Get configuration information for transforms. `Get transform stats. - Retrieves usage information for transforms.
+Get transform stats.
+Get usage information for transforms.
`Reset a transform.
- Resets a transform.
- Before you can reset it, you must stop it; alternatively, use the force
query parameter.
+
Reset a transform.
+Before you can reset it, you must stop it; alternatively, use the force
query parameter.
If the destination index was created by the transform, it is deleted.
Schedule a transform to start now. - Instantly runs a transform to process data.
-If you _schedule_now a transform, it will process the new data instantly, - without waiting for the configured frequency interval. After _schedule_now API is called, - the transform will be processed again at now + frequency unless _schedule_now API +
Schedule a transform to start now.
+Instantly run a transform to process data.
+ If you run this API, the transform will process the new data instantly,
+ without waiting for the configured frequency interval. After the API is called,
+ the transform will be processed again at now + frequency
unless the API
is called again in the meantime.
Start a transform. - Starts a transform.
+Start a transform.
When you start a transform, it creates the destination index if it does not already exist. The number_of_shards
is
set to 1
and the auto_expand_replicas
is set to 0-1
. If it is a pivot transform, it deduces the mapping
definitions for the destination index from the source indices and the transform aggregations. If fields in the
@@ -879,8 +876,8 @@ async def upgrade_transforms(
"""
.. raw:: html
-
Upgrade all transforms. - Transforms are compatible across minor versions and between supported major versions. +
Upgrade all transforms.
+Transforms are compatible across minor versions and between supported major versions. However, over time, the format of transform configuration information may change. This API identifies transforms that have a legacy configuration format and upgrades them to the latest version. It also cleans up the internal data structures that store the transform state and checkpoints. diff --git a/elasticsearch/_sync/client/__init__.py b/elasticsearch/_sync/client/__init__.py index 67187220a..705a3914d 100644 --- a/elasticsearch/_sync/client/__init__.py +++ b/elasticsearch/_sync/client/__init__.py @@ -626,6 +626,7 @@ def bulk( error_trace: t.Optional[bool] = None, filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, human: t.Optional[bool] = None, + include_source_on_error: t.Optional[bool] = None, list_executed_pipelines: t.Optional[bool] = None, pipeline: t.Optional[str] = None, pretty: t.Optional[bool] = None, @@ -733,6 +734,8 @@ def bulk( :param operations: :param index: The name of the data stream, index, or index alias to perform bulk actions on. + :param include_source_on_error: True or false if to include the document source + in the error message in case of parsing errors. :param list_executed_pipelines: If `true`, the response will include the ingest pipelines that were run for each index or create. :param pipeline: The pipeline identifier to use to preprocess incoming documents. @@ -790,6 +793,8 @@ def bulk( __query["filter_path"] = filter_path if human is not None: __query["human"] = human + if include_source_on_error is not None: + __query["include_source_on_error"] = include_source_on_error if list_executed_pipelines is not None: __query["list_executed_pipelines"] = list_executed_pipelines if pipeline is not None: @@ -982,8 +987,8 @@ def count(
Count search results. Get the number of documents matching a query.
-The query can either be provided using a simple query string as a parameter or using the Query DSL defined within the request body.
- The latter must be nested in a query
key, which is the same as the search API.
The query can be provided either by using a simple query string as a parameter, or by defining Query DSL within the request body.
+ The query is optional. When no query is provided, the API uses match_all
to count all the documents.
The count API supports multi-target syntax. You can run a single count API search across multiple data streams and indices.
The operation is broadcast across all shards.
For each shard ID group, a replica is chosen and the search is run against it.
@@ -1025,10 +1030,10 @@ def count(
in the result.
:param preference: The node or shard the operation should be performed on. By
default, it is random.
- :param q: The query in Lucene query string syntax.
- :param query: Defines the search definition using the Query DSL. The query is
- optional, and when not provided, it will use `match_all` to count all the
- docs.
+ :param q: The query in Lucene query string syntax. This parameter cannot be used
+ with a request body.
+ :param query: Defines the search query using Query DSL. A request body query
+ cannot be used with the `q` query string parameter.
:param routing: A custom value used to route operations to a specific shard.
:param terminate_after: The maximum number of documents to collect for each shard.
If a query reaches this limit, Elasticsearch terminates the query early.
@@ -1114,6 +1119,7 @@ def create(
error_trace: t.Optional[bool] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
human: t.Optional[bool] = None,
+ include_source_on_error: t.Optional[bool] = None,
pipeline: t.Optional[str] = None,
pretty: t.Optional[bool] = None,
refresh: t.Optional[
@@ -1196,6 +1202,8 @@ def create(
:param id: A unique identifier for the document. To automatically generate a
document ID, use the `POST /
Explain a document match result. - Returns information about why a specific document matches, or doesn’t match, a query.
+ Get information about why a specific document matches, or doesn't match, a query. + It computes a score explanation for a query and a specific document. `Get a list of supported script contexts and their methods.
- `Get a list of available script types, languages, and contexts.
- `The kNN search API supports restricting the search using a filter. The search will return the top k documents that also match the filter query.
+A kNN search response has the exact same structure as a search API response. + However, certain sections have a meaning specific to kNN search:
+_score
is determined by the similarity between the query and document vector.hits.total
object contains the total number of nearest neighbor candidates considered, which is num_candidates * num_shards
. The hits.total.relation
will always be eq
, indicating an exact value.Get multiple JSON documents by ID from one or more indices. If you specify an index in the request URI, you only need to specify the document IDs in the request body. To ensure fast responses, this multi get (mget) API responds with partial results if one or more shards fail.
+Filter source fields
+By default, the _source
field is returned for every document (if stored).
+ Use the _source
and _source_include
or source_exclude
attributes to filter what fields are returned for a particular document.
+ You can include the _source
, _source_includes
, and _source_excludes
query parameters in the request URI to specify the defaults to use when there are no per-document instructions.
Get stored fields
+Use the stored_fields
attribute to specify the set of stored fields you want to retrieve.
+ Any requested fields that are not stored are ignored.
+ You can include the stored_fields
query parameter in the request URI to specify the defaults to use when there are no per-document instructions.
Run multiple templated searches.
+Run multiple templated searches with a single request.
+ If you are providing a text file or text input to curl
, use the --data-binary
flag instead of -d
to preserve newlines.
+ For example:
$ cat requests
+ { "index": "my-index" }
+ { "id": "my-search-template", "params": { "query_string": "hello world", "from": 0, "size": 10 }}
+ { "index": "my-other-index" }
+ { "id": "my-other-search-template", "params": { "query_type": "match_all" }}
+
+ $ curl -H "Content-Type: application/x-ndjson" -XGET localhost:9200/_msearch/template --data-binary "@requests"; echo
+
- `Get multiple term vectors.
-You can specify existing documents by index and ID or provide artificial documents in the body of the request. +
Get multiple term vectors with a single request.
+ You can specify existing documents by index and ID or provide artificial documents in the body of the request.
You can specify the index in the request body or request URI.
The response contains a docs
array with all the fetched termvectors.
Each element has the structure provided by the termvectors API.
Artificial documents
+You can also use mtermvectors
to generate term vectors for artificial documents provided in the body of the request.
+ The mapping used is determined by the specified _index
.
Run a script. - Runs a script and returns a result.
+Run a script.
+Runs a script and returns a result. + Use this API to build and test scripts, such as when defining a script for a runtime field. + This API requires very few dependencies and is especially useful if you don't have permissions to write documents on a cluster.
+The API uses several contexts, which control how scripts are run, what variables are available at runtime, and what the return type is.
+Each context requires a script, but additional parameters depend on the context you're using for that script.
`IMPORTANT: Results from a scrolling search reflect the state of the index at the time of the initial search request. Subsequent indexing or document changes only affect later search and scroll requests.
- `Search a vector tile.
-Search a vector tile for geospatial values.
+Search a vector tile for geospatial values. + Before using this API, you should be familiar with the Mapbox vector tile specification. + The API returns results as a binary mapbox vector tile.
+Internally, Elasticsearch translates a vector tile search API request into a search containing:
+geo_bounding_box
query on the <field>
. The query uses the <zoom>/<x>/<y>
tile as a bounding box.geotile_grid
or geohex_grid
aggregation on the <field>
. The grid_agg
parameter determines the aggregation type. The aggregation uses the <zoom>/<x>/<y>
tile as a bounding box.geo_bounds
aggregation on the <field>
. The search only includes this aggregation if the exact_bounds
parameter is true
.with_labels
is true
, the internal search will include a dynamic runtime field that calls the getLabelPosition
function of the geometry doc value. This enables the generation of new point features containing suggested geometry labels, so that, for example, multi-polygons will have only one label.For example, Elasticsearch may translate a vector tile search API request with a grid_agg
argument of geotile
and an exact_bounds
argument of true
into the following search
GET my-index/_search
+ {
+ "size": 10000,
+ "query": {
+ "geo_bounding_box": {
+ "my-geo-field": {
+ "top_left": {
+ "lat": -40.979898069620134,
+ "lon": -45
+ },
+ "bottom_right": {
+ "lat": -66.51326044311186,
+ "lon": 0
+ }
+ }
+ }
+ },
+ "aggregations": {
+ "grid": {
+ "geotile_grid": {
+ "field": "my-geo-field",
+ "precision": 11,
+ "size": 65536,
+ "bounds": {
+ "top_left": {
+ "lat": -40.979898069620134,
+ "lon": -45
+ },
+ "bottom_right": {
+ "lat": -66.51326044311186,
+ "lon": 0
+ }
+ }
+ }
+ },
+ "bounds": {
+ "geo_bounds": {
+ "field": "my-geo-field",
+ "wrap_longitude": false
+ }
+ }
+ }
+ }
+
+ The API returns results as a binary Mapbox vector tile. + Mapbox vector tiles are encoded as Google Protobufs (PBF). By default, the tile contains three layers:
+hits
layer containing a feature for each <field>
value matching the geo_bounding_box
query.aggs
layer containing a feature for each cell of the geotile_grid
or geohex_grid
. The layer only contains features for cells with matching data.geotile_grid
or geohex_grid
.The API only returns features that can display at its zoom level. + For example, if a polygon feature has no area at its zoom level, the API omits it. + The API returns errors as UTF-8 encoded JSON.
+IMPORTANT: You can specify several options for this API as either a query parameter or request body parameter. + If you specify both parameters, the query parameter takes precedence.
+Grid precision for geotile
+For a grid_agg
of geotile
, you can use cells in the aggs
layer as tiles for lower zoom levels.
+ grid_precision
represents the additional zoom levels available through these cells. The final precision is computed by as follows: <zoom> + grid_precision
.
+ For example, if <zoom>
is 7 and grid_precision
is 8, then the geotile_grid
aggregation will use a precision of 15.
+ The maximum final precision is 29.
+ The grid_precision
also determines the number of cells for the grid as follows: (2^grid_precision) x (2^grid_precision)
.
+ For example, a value of 8 divides the tile into a grid of 256 x 256 cells.
+ The aggs
layer only contains features for cells with matching data.
Grid precision for geohex
+For a grid_agg
of geohex
, Elasticsearch uses <zoom>
and grid_precision
to calculate a final precision as follows: <zoom> + grid_precision
.
This precision determines the H3 resolution of the hexagonal cells produced by the geohex
aggregation.
+ The following table maps the H3 resolution for each precision.
+ For example, if <zoom>
is 3 and grid_precision
is 3, the precision is 6.
+ At a precision of 6, hexagonal cells have an H3 resolution of 2.
+ If <zoom>
is 3 and grid_precision
is 4, the precision is 7.
+ At a precision of 7, hexagonal cells have an H3 resolution of 3.
Precision | +Unique tile bins | +H3 resolution | +Unique hex bins | +Ratio | +
---|---|---|---|---|
1 | +4 | +0 | +122 | +30.5 | +
2 | +16 | +0 | +122 | +7.625 | +
3 | +64 | +1 | +842 | +13.15625 | +
4 | +256 | +1 | +842 | +3.2890625 | +
5 | +1024 | +2 | +5882 | +5.744140625 | +
6 | +4096 | +2 | +5882 | +1.436035156 | +
7 | +16384 | +3 | +41162 | +2.512329102 | +
8 | +65536 | +3 | +41162 | +0.6280822754 | +
9 | +262144 | +4 | +288122 | +1.099098206 | +
10 | +1048576 | +4 | +288122 | +0.2747745514 | +
11 | +4194304 | +5 | +2016842 | +0.4808526039 | +
12 | +16777216 | +6 | +14117882 | +0.8414913416 | +
13 | +67108864 | +6 | +14117882 | +0.2103728354 | +
14 | +268435456 | +7 | +98825162 | +0.3681524172 | +
15 | +1073741824 | +8 | +691776122 | +0.644266719 | +
16 | +4294967296 | +8 | +691776122 | +0.1610666797 | +
17 | +17179869184 | +9 | +4842432842 | +0.2818666889 | +
18 | +68719476736 | +10 | +33897029882 | +0.4932667053 | +
19 | +274877906944 | +11 | +237279209162 | +0.8632167343 | +
20 | +1099511627776 | +11 | +237279209162 | +0.2158041836 | +
21 | +4398046511104 | +12 | +1660954464122 | +0.3776573213 | +
22 | +17592186044416 | +13 | +11626681248842 | +0.6609003122 | +
23 | +70368744177664 | +13 | +11626681248842 | +0.165225078 | +
24 | +281474976710656 | +14 | +81386768741882 | +0.2891438866 | +
25 | +1125899906842620 | +15 | +569707381193162 | +0.5060018015 | +
26 | +4503599627370500 | +15 | +569707381193162 | +0.1265004504 | +
27 | +18014398509482000 | +15 | +569707381193162 | +0.03162511259 | +
28 | +72057594037927900 | +15 | +569707381193162 | +0.007906278149 | +
29 | +288230376151712000 | +15 | +569707381193162 | +0.001976569537 | +
Hexagonal cells don't align perfectly on a vector tile. + Some cells may intersect more than one vector tile. + To compute the H3 resolution for each precision, Elasticsearch compares the average density of hexagonal bins at each resolution with the average density of tile bins at each zoom level. + Elasticsearch uses the H3 resolution that is closest to the corresponding geotile density.
`Get the search shards.
Get the indices and shards that a search request would be run against. This information can be useful for working out issues or planning optimizations with routing and shard preferences. - When filtered aliases are used, the filter is returned as part of the indices section.
+ When filtered aliases are used, the filter is returned as part of theindices
section.
+ If the Elasticsearch security features are enabled, you must have the view_index_metadata
or manage
index privilege for the target data stream, index, or alias.
Run a search with a search template.
- `Get terms in an index.
Discover terms that match a partial string in an index. - This "terms enum" API is designed for low-latency look-ups used in auto-complete scenarios.
-If the complete
property in the response is false, the returned terms set may be incomplete and should be treated as approximate.
- This can occur due to a few reasons, such as a request timeout or a node error.
NOTE: The terms enum API may return terms from deleted documents. Deleted documents are initially only marked as deleted. It is not until their segments are merged that documents are actually deleted. Until that happens, the terms enum API will return terms from these documents.
+ This API is designed for low-latency look-ups used in auto-complete scenarios. ++`info + The terms enum API may return terms from deleted documents. Deleted documents are initially only marked as deleted. It is not until their segments are merged that documents are actually deleted. Until that happens, the terms enum API will return terms from these documents.
+
Get term vector information.
Get information and statistics about terms in the fields of a particular document.
+You can retrieve term vectors for documents stored in the index or for artificial documents passed in the body of the request.
+ You can specify the fields you are interested in through the fields
parameter or by adding the fields to the request body.
+ For example:
GET /my-index-000001/_termvectors/1?fields=message
+
+ Fields can be specified using wildcards, similar to the multi match query.
+Term vectors are real-time by default, not near real-time.
+ This can be changed by setting realtime
parameter to false
.
You can request three types of values: term information, term statistics, and field statistics. + By default, all term information and field statistics are returned for all fields but term statistics are excluded.
+Term information
+positions: true
)offsets: true
)payloads: true
), as base64 encoded bytesIf the requested information wasn't stored in the index, it will be computed on the fly if possible. + Additionally, term vectors could be computed for documents not even existing in the index, but instead provided by the user.
+++warn + Start and end offsets assume UTF-16 encoding is being used. If you want to use these offsets in order to get the original text that produced this token, you should make sure that the string you are taking a sub-string of is also encoded using UTF-16.
+
Behaviour
+The term and field statistics are not accurate.
+ Deleted documents are not taken into account.
+ The information is only retrieved for the shard the requested document resides in.
+ The term and field statistics are therefore only useful as relative measures whereas the absolute numbers have no meaning in this context.
+ By default, when requesting term vectors of artificial documents, a shard to get the statistics from is randomly selected.
+ Use routing
only to hit a particular shard.
Update documents. Updates documents that match the specified query. If no query is specified, performs an update on every document in the data stream or index without modifying the source, which is useful for picking up mapping changes.
+If the Elasticsearch security features are enabled, you must have the following index privileges for the target data stream, index, or alias:
+read
index
or write
You can specify the query criteria in the request URI or the request body using the same syntax as the search API.
+When you submit an update by query request, Elasticsearch gets a snapshot of the data stream or index when it begins processing the request and updates matching documents using internal versioning.
+ When the versions match, the document is updated and the version number is incremented.
+ If a document changes between the time that the snapshot is taken and the update operation is processed, it results in a version conflict and the operation fails.
+ You can opt to count version conflicts instead of halting and returning by setting conflicts
to proceed
.
+ Note that if you opt to count version conflicts, the operation could attempt to update more documents from the source than max_docs
until it has successfully updated max_docs
documents or it has gone through every document in the source query.
NOTE: Documents with a version equal to 0 cannot be updated using update by query because internal versioning does not support 0 as a valid version number.
+While processing an update by query request, Elasticsearch performs multiple search requests sequentially to find all of the matching documents. + A bulk update request is performed for each batch of matching documents. + Any query or update failures cause the update by query request to fail and the failures are shown in the response. + Any update requests that completed successfully still stick, they are not rolled back.
+Throttling update requests
+To control the rate at which update by query issues batches of update operations, you can set requests_per_second
to any positive decimal number.
+ This pads each batch with a wait time to throttle the rate.
+ Set requests_per_second
to -1
to turn off throttling.
Throttling uses a wait time between batches so that the internal scroll requests can be given a timeout that takes the request padding into account.
+ The padding time is the difference between the batch size divided by the requests_per_second
and the time spent writing.
+ By default the batch size is 1000, so if requests_per_second
is set to 500
:
target_time = 1000 / 500 per second = 2 seconds
+ wait_time = target_time - write_time = 2 seconds - .5 seconds = 1.5 seconds
+
+ Since the batch is issued as a single _bulk request, large batch sizes cause Elasticsearch to create many requests and wait before starting the next set. + This is "bursty" instead of "smooth".
+Slicing
+Update by query supports sliced scroll to parallelize the update process. + This can improve efficiency and provide a convenient way to break the request down into smaller parts.
+Setting slices
to auto
chooses a reasonable number for most data streams and indices.
+ This setting will use one slice per shard, up to a certain limit.
+ If there are multiple source data streams or indices, it will choose the number of slices based on the index or backing index with the smallest number of shards.
Adding slices
to _update_by_query
just automates the manual process of creating sub-requests, which means it has some quirks:
slices
only contains the status of completed slices.slices
will rethrottle the unfinished sub-request proportionally.requests_per_second
and max_docs
on a request with slices are distributed proportionally to each sub-request. Combine that with the point above about distribution being uneven and you should conclude that using max_docs
with slices
might not result in exactly max_docs
documents being updated.If you're slicing manually or otherwise tuning automatic slicing, keep in mind that:
+Whether query or update performance dominates the runtime depends on the documents being reindexed and cluster resources.
+Update the document source
+Update by query supports scripts to update the document source.
+ As with the update API, you can set ctx.op
to change the operation that is performed.
Set ctx.op = "noop"
if your script decides that it doesn't have to make any changes.
+ The update by query operation skips updating the document and increments the noop
counter.
Set ctx.op = "delete"
if your script decides that the document should be deleted.
+ The update by query operation deletes the document and increments the deleted
counter.
Update by query supports only index
, noop
, and delete
.
+ Setting ctx.op
to anything else is an error.
+ Setting any other field in ctx
is an error.
+ This API enables you to only modify the source of matching documents; you cannot move them.
Get the async search status.
Get the status of a previously submitted async search request given its identifier, without retrieving search results.
- If the Elasticsearch security features are enabled, use of this API is restricted to the monitoring_user
role.
monitor
cluster privilege or greater privileges.Delete auto-follow patterns. - Delete a collection of cross-cluster replication auto-follow patterns.
+Delete auto-follow patterns.
+Delete a collection of cross-cluster replication auto-follow patterns.
`Get follower information. - Get information about all cross-cluster replication follower indices. +
Get follower information.
+Get information about all cross-cluster replication follower indices. For example, the results include follower index names, leader index names, replication options, and whether the follower indices are active or paused.
`Get follower stats. - Get cross-cluster replication follower stats. +
Get follower stats.
+Get cross-cluster replication follower stats. The API returns shard-level stats about the "following tasks" associated with each shard for the specified indices.
`Get auto-follow patterns. - Get cross-cluster replication auto-follow patterns.
+Get auto-follow patterns.
+Get cross-cluster replication auto-follow patterns.
`Pause an auto-follow pattern. - Pause a cross-cluster replication auto-follow pattern. +
Pause an auto-follow pattern.
+Pause a cross-cluster replication auto-follow pattern. When the API returns, the auto-follow pattern is inactive. New indices that are created on the remote cluster and match the auto-follow patterns are ignored.
You can resume auto-following with the resume auto-follow pattern API.
@@ -500,9 +507,11 @@ def pause_auto_follow_pattern(
` Pause a follower.
- Pause a cross-cluster replication follower index.
+ Pause a follower. Pause a cross-cluster replication follower index.
The follower index will not fetch any additional operations from the leader index.
You can resume following with the resume follower API.
You can pause and resume a follower index to change the configuration of the following task. Resume an auto-follow pattern.
- Resume a cross-cluster replication auto-follow pattern that was paused.
+ Resume an auto-follow pattern. Resume a cross-cluster replication auto-follow pattern that was paused.
The auto-follow pattern will resume configuring following indices for newly created indices that match its patterns on the remote cluster.
Remote indices created while the pattern was paused will also be followed unless they have been deleted or closed in the interim. Get cross-cluster replication stats.
- This API returns stats about auto-following and the same shard-level stats as the get follower stats API. Get cross-cluster replication stats. This API returns stats about auto-following and the same shard-level stats as the get follower stats API. Unfollow an index.
- Convert a cross-cluster replication follower index to a regular index.
+ Unfollow an index. Convert a cross-cluster replication follower index to a regular index.
The API stops the following task associated with a follower index and removes index metadata and settings associated with cross-cluster replication.
The follower index must be paused and closed before you call the unfollow API. NOTE: Currently cross-cluster replication does not support converting an existing regular index to a follower index. Converting a follower index to a regular index is an irreversible operation. info
+ Currently cross-cluster replication does not support converting an existing regular index to a follower index. Converting a follower index to a regular index is an irreversible operation. Get remote cluster information.
- Get all of the configured remote cluster information.
- This API returns connection and endpoint information keyed by the configured remote cluster alias. Get remote cluster information. Get information about configured remote clusters.
+ The API returns connection and endpoint information keyed by the configured remote cluster alias. info
+ This API returns information that reflects current state on the local cluster.
+ The
+
`
+
`connected
field does not necessarily reflect whether a remote cluster is down or unavailable, only whether there is currently an open connection to it.
+ Elasticsearch does not spontaneously try to reconnect to a disconnected remote cluster.
+ To trigger a reconnection, attempt a cross-cluster search, ES|QL cross-cluster search, or try the resolve cluster endpoint.
Stop async ES|QL query.
+This API interrupts the query execution and returns the results so far. + If the Elasticsearch security features are enabled, only the user who first submitted the ES|QL query can stop it.
+ + + `IMPORTANT: The features installed on the node you submit this request to are the features that will be reset. Run on the master node if you have any doubts about which plugins are installed on individual nodes.
- `Returns the current global checkpoints for an index. This API is design for internal use by the fleet server project.
+Get global checkpoints.
+Get the current global checkpoints for an index. + This API is designed for internal use by the Fleet server project.
`Get the ILM status. - Get the current index lifecycle management status.
+Get the ILM status.
+Get the current index lifecycle management status.
`Add an index block. - Limits the operations allowed on an index by blocking specific operation types.
+Add an index block.
+Add an index block to an index. + Index blocks limit the operations allowed on an index by blocking specific operation types.
- `Create a data stream. - Creates a data stream. - You must have a matching index template with data stream enabled.
+Create a data stream.
+You must have a matching index template with data stream enabled.
- `Get data stream stats. - Retrieves statistics for one or more data streams.
+Get data stream stats.
+Get statistics for one or more data streams.
- `Check aliases. - Checks if one or more data stream or index aliases exist.
+Check aliases.
+Check if one or more data stream or index aliases exist.
- `Check index templates. - Check whether index templates exist.
+Check index templates.
+Check whether index templates exist.
- `Get data stream lifecycles. - Retrieves the data stream lifecycle configuration of one or more data streams.
+Get data stream lifecycles.
+Get the data stream lifecycle configuration of one or more data streams.
`Get data streams. - Retrieves information about one or more data streams.
+Get data streams.
+Get information about one or more data streams.
- `Resolve the cluster. - Resolve the specified index expressions to return information about each cluster, including the local cluster, if included. - Multiple patterns and remote clusters are supported.
+Resolve the cluster.
+Resolve the specified index expressions to return information about each cluster, including the local "querying" cluster, if included. + If no index expression is provided, the API will return information about all the remote clusters that are configured on the querying cluster.
This endpoint is useful before doing a cross-cluster search in order to determine which remote clusters should be included in a search.
You use the same index expression with this endpoint as you would for cross-cluster search. Index and cluster exclusions are also supported with this endpoint.
For each cluster in the index expression, information is returned about:
remote/info
endpoint.skip_unavailable
as true
or false
.For example, GET /_resolve/cluster/my-index-*,cluster*:my-index-*
returns information about the local cluster and all remotely configured clusters that start with the alias cluster*
.
Each cluster returns information about whether it has any indices, aliases or data streams that match my-index-*
.
Advantages of using this endpoint before a cross-cluster search
+The ability to query without an index expression was added in version 8.18, so when
+ querying remote clusters older than that, the local cluster will send the index
+ expression dummy*
to those remote clusters. Thus, if an errors occur, you may see a reference
+ to that index expression even though you didn't request it. If it causes a problem, you can
+ instead include an index expression like *:*
to bypass the issue.
You may want to exclude a cluster or index from a search when:
skip_unavailable=false
. Running a cross-cluster search under those conditions will cause the entire search to fail._resolve/cluster
response will be present. (This is also where security/permission errors will be shown.)The remote/info
endpoint is commonly used to test whether the "local" cluster (the cluster being queried) is connected to its remote clusters, but it does not necessarily reflect whether the remote cluster is available or not.
+ The remote cluster may be available, while the local cluster is not currently connected to it.
You can use the _resolve/cluster
API to attempt to reconnect to remote clusters.
+ For example with GET _resolve/cluster
or GET _resolve/cluster/*:*
.
+ The connected
field in the response will indicate whether it was successful.
+ If a connection was (re-)established, this will also cause the remote/info
endpoint to now indicate a connected status.
Perform inference on the service
+Perform inference on the service.
+This API enables you to use machine learning models to perform specific tasks on data that you provide as an input. + It returns a response with the results of the tasks. + The inference endpoint you use can perform one specific task that has been defined when the endpoint was created with the create inference API.
++`info + The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.
+
Delete GeoIP database configurations. - Delete one or more IP geolocation database configurations.
+Delete GeoIP database configurations.
+Delete one or more IP geolocation database configurations.
- `Get GeoIP database configurations. - Get information about one or more IP geolocation database configurations.
+Get GeoIP database configurations.
+Get information about one or more IP geolocation database configurations.
- `Get pipelines. - Get information about one or more ingest pipelines. +
Get pipelines.
+Get information about one or more ingest pipelines. This API returns a local reference of the pipeline.
@@ -455,11 +455,11 @@ def put_geoip_database( """ .. raw:: html -Create or update a GeoIP database configuration. - Refer to the create or update IP geolocation database configuration API.
+Create or update a GeoIP database configuration.
+Refer to the create or update IP geolocation database configuration API.
- `Simulate a pipeline. - Run an ingest pipeline against a set of provided documents. +
Simulate a pipeline.
+Run an ingest pipeline against a set of provided documents. You can either specify an existing pipeline to use with the provided documents or supply a pipeline definition in the body of the request.
`Delete the license. - When the license expires, your subscription level reverts to Basic.
+Delete the license.
+When the license expires, your subscription level reverts to Basic.
If the operator privileges feature is enabled, only operator users can use this API.
`Get license information. - Get information about your Elastic license including its type, its status, when it was issued, and when it expires.
-NOTE: If the master node is generating a new cluster state, the get license API may return a 404 Not Found
response.
+
Get license information.
+Get information about your Elastic license including its type, its status, when it was issued, and when it expires.
++`info + If the master node is generating a new cluster state, the get license API may return a
+404 Not Found
response. If you receive an unexpected 404 response after cluster startup, wait a short period and retry the request.
Update the license. - You can update your license at runtime without shutting down your nodes. +
Update the license.
+You can update your license at runtime without shutting down your nodes. License updates take effect immediately. If the license you are installing does not support all of the features that were available with your previous license, however, you are notified in the response. You must then re-submit the API request with the acknowledge parameter set to true.
@@ -240,9 +243,9 @@ def post( :param license: :param licenses: A sequence of one or more JSON documents containing the license information. - :param master_timeout: Period to wait for a connection to the master node. - :param timeout: Period to wait for a response. If no response is received before - the timeout expires, the request fails and returns an error. + :param master_timeout: The period to wait for a connection to the master node. + :param timeout: The period to wait for a response. If no response is received + before the timeout expires, the request fails and returns an error. """ __path_parts: t.Dict[str, str] = {} __path = "/_license" @@ -297,8 +300,8 @@ def post_start_basic( """ .. raw:: html -Start a basic license. - Start an indefinite basic license, which gives access to all the basic features.
+Start a basic license.
+Start an indefinite basic license, which gives access to all the basic features.
NOTE: In order to start a basic license, you must not currently have a basic license.
If the basic license does not support all of the features that are available with your current license, however, you are notified in the response.
You must then re-submit the API request with the acknowledge
parameter set to true
.
Clear trained model deployment cache. - Cache will be cleared on all nodes where the trained model is assigned. +
Clear trained model deployment cache.
+Cache will be cleared on all nodes where the trained model is assigned. A trained model deployment may have an inference cache enabled. As requests are handled by each allocated node, their responses may be cached on that individual node. Calling this API clears the caches without restarting the deployment.
@@ -93,8 +93,8 @@ def close_job( """ .. raw:: html -Close anomaly detection jobs. - A job can be opened and closed multiple times throughout its lifecycle. A closed job cannot receive data or perform analysis operations, but you can still explore and navigate results. +
Close anomaly detection jobs.
+A job can be opened and closed multiple times throughout its lifecycle. A closed job cannot receive data or perform analysis operations, but you can still explore and navigate results. When you close a job, it runs housekeeping tasks such as pruning the model history, flushing buffers, calculating final results and persisting the model snapshots. Depending upon the size of the job, it could take several minutes to close and the equivalent time to re-open. After it is closed, the job has a minimal overhead on the cluster except for maintaining its meta data. Therefore it is a best practice to close jobs that are no longer required to process data. If you close an anomaly detection job whose datafeed is running, the request first tries to stop the datafeed. This behavior is equivalent to calling stop datafeed API with the same timeout and force parameters as the close job request. When a datafeed that has a specified end date stops, it automatically closes its associated job.
@@ -161,8 +161,8 @@ def delete_calendar( """ .. raw:: html -Delete a calendar. - Removes all scheduled events from a calendar, then deletes it.
+Delete a calendar.
+Remove all scheduled events from a calendar, then delete it.
`Delete expired ML data. - Deletes all job results, model snapshots and forecast data that have exceeded +
Delete expired ML data.
+Delete all job results, model snapshots and forecast data that have exceeded their retention days period. Machine learning state documents that are not associated with any job are also deleted. You can limit the request to a single or set of anomaly detection jobs by using a job identifier, a group name, a comma-separated list of jobs, or a wildcard expression. You can delete expired data for all anomaly detection - jobs by using _all, by specifying * as the <job_id>, or by omitting the - <job_id>.
+ jobs by using_all
, by specifying *
as the <job_id>
, or by omitting the
+ <job_id>
.
`Delete a filter. - If an anomaly detection job references the filter, you cannot delete the +
Delete a filter.
+If an anomaly detection job references the filter, you cannot delete the filter. You must update or delete the job before you can delete the filter.
@@ -533,8 +533,8 @@ def delete_forecast( """ .. raw:: html -Delete forecasts from a job. - By default, forecasts are retained for 14 days. You can specify a +
Delete forecasts from a job.
+By default, forecasts are retained for 14 days. You can specify a
different retention period with the expires_in
parameter in the forecast
jobs API. The delete forecast API enables you to delete one or more
forecasts before they expire.
Delete an anomaly detection job. - All job configuration, model state and results are deleted. +
Delete an anomaly detection job.
+All job configuration, model state and results are deleted. It is not currently possible to delete multiple jobs using wildcards or a comma separated list. If you delete a job that has a datafeed, the request first tries to delete the datafeed. This behavior is equivalent to calling @@ -670,8 +670,8 @@ def delete_model_snapshot( """ .. raw:: html -
Delete a model snapshot. - You cannot delete the active model snapshot. To delete that snapshot, first +
Delete a model snapshot.
+You cannot delete the active model snapshot. To delete that snapshot, first
revert to a different one. To identify the active model snapshot, refer to
the model_snapshot_id
in the results from the get jobs API.
Delete an unreferenced trained model. - The request deletes a trained inference model that is not referenced by an ingest pipeline.
+Delete an unreferenced trained model.
+The request deletes a trained inference model that is not referenced by an ingest pipeline.
`Delete a trained model alias. - This API deletes an existing model alias that refers to a trained model. If +
Delete a trained model alias.
+This API deletes an existing model alias that refers to a trained model. If
the model alias is missing or refers to a model other than the one identified
by the model_id
, this API returns an error.
Estimate job model memory usage. - Makes an estimation of the memory usage for an anomaly detection job model. - It is based on analysis configuration details for the job and cardinality +
Estimate job model memory usage.
+Make an estimation of the memory usage for an anomaly detection job model. + The estimate is based on analysis configuration details for the job and cardinality estimates for the fields it references.
- `Evaluate data frame analytics. - The API packages together commonly used evaluation metrics for various types +
Evaluate data frame analytics.
+The API packages together commonly used evaluation metrics for various types of machine learning features. This has been designed for use on indexes created by data frame analytics. Evaluation requires both a ground truth field and an analytics result field to be present.
@@ -990,8 +990,8 @@ def explain_data_frame_analytics( """ .. raw:: html -Explain data frame analytics config. - This API provides explanations for a data frame analytics config that either +
Explain data frame analytics config.
+This API provides explanations for a data frame analytics config that either exists already or one that has not been created yet. The following explanations are provided:
Open anomaly detection jobs. - An anomaly detection job must be opened to be ready to receive and analyze +
Open anomaly detection jobs.
+An anomaly detection job must be opened to be ready to receive and analyze data. It can be opened and closed multiple times throughout its lifecycle. When you open a new job, it starts with an empty model. When you open an existing job, the most recent model state is automatically @@ -3082,7 +3082,7 @@ def preview_data_frame_analytics( .. raw:: html
Preview features used by data frame analytics. - Previews the extracted features used by a data frame analytics config.
+ Preview the extracted features used by a data frame analytics config. `Create an anomaly detection job.
- If you include a datafeed_config
, you must have read index privileges on the source index.
+
Create an anomaly detection job.
+If you include a datafeed_config
, you must have read index privileges on the source index.
If you include a datafeed_config
but do not provide a query, the datafeed uses {"match_all": {"boost": 1}}
.
Upgrade a snapshot. - Upgrades an anomaly detection model snapshot to the latest major version. + Upgrade an anomaly detection model snapshot to the latest major version. Over time, older snapshot formats are deprecated and removed. Anomaly detection jobs support only snapshots that are from the current or previous major version. @@ -5782,7 +5782,7 @@ def validate_detector(
Validate an anomaly detection job.
- `Get node information. - By default, the API returns all attributes and core settings for cluster nodes.
+Get node information.
+By default, the API returns all attributes and core settings for cluster nodes.
`Delete a search application. - Remove a search application and its associated alias. Indices attached to the search application are not removed.
+Delete a search application.
+Remove a search application and its associated alias. Indices attached to the search application are not removed.
`Delete a transform. - Deletes a transform.
+Delete a transform.
`Get transforms. - Retrieves configuration information for transforms.
+ Get configuration information for transforms. `Get transform stats. - Retrieves usage information for transforms.
+Get transform stats.
+Get usage information for transforms.
`Reset a transform.
- Resets a transform.
- Before you can reset it, you must stop it; alternatively, use the force
query parameter.
+
Reset a transform.
+Before you can reset it, you must stop it; alternatively, use the force
query parameter.
If the destination index was created by the transform, it is deleted.
Schedule a transform to start now. - Instantly runs a transform to process data.
-If you _schedule_now a transform, it will process the new data instantly, - without waiting for the configured frequency interval. After _schedule_now API is called, - the transform will be processed again at now + frequency unless _schedule_now API +
Schedule a transform to start now.
+Instantly run a transform to process data.
+ If you run this API, the transform will process the new data instantly,
+ without waiting for the configured frequency interval. After the API is called,
+ the transform will be processed again at now + frequency
unless the API
is called again in the meantime.
Start a transform. - Starts a transform.
+Start a transform.
When you start a transform, it creates the destination index if it does not already exist. The number_of_shards
is
set to 1
and the auto_expand_replicas
is set to 0-1
. If it is a pivot transform, it deduces the mapping
definitions for the destination index from the source indices and the transform aggregations. If fields in the
@@ -879,8 +876,8 @@ def upgrade_transforms(
"""
.. raw:: html
-
Upgrade all transforms. - Transforms are compatible across minor versions and between supported major versions. +
Upgrade all transforms.
+Transforms are compatible across minor versions and between supported major versions. However, over time, the format of transform configuration information may change. This API identifies transforms that have a legacy configuration format and upgrades them to the latest version. It also cleans up the internal data structures that store the transform state and checkpoints. diff --git a/elasticsearch/helpers/vectorstore/_sync/vectorstore.py b/elasticsearch/helpers/vectorstore/_sync/vectorstore.py index 3c4a0d51a..6a6a5ee2a 100644 --- a/elasticsearch/helpers/vectorstore/_sync/vectorstore.py +++ b/elasticsearch/helpers/vectorstore/_sync/vectorstore.py @@ -22,7 +22,10 @@ from elasticsearch import Elasticsearch from elasticsearch._version import __versionstr__ as lib_version from elasticsearch.helpers import BulkIndexError, bulk -from elasticsearch.helpers.vectorstore import EmbeddingService, RetrievalStrategy +from elasticsearch.helpers.vectorstore import ( + EmbeddingService, + RetrievalStrategy, +) from elasticsearch.helpers.vectorstore._utils import maximal_marginal_relevance logger = logging.getLogger(__name__)