diff --git a/bin/run-elastic-reindex.sh b/bin/run-elastic-reindex.sh index f7c8e48b..3710401a 100755 --- a/bin/run-elastic-reindex.sh +++ b/bin/run-elastic-reindex.sh @@ -28,6 +28,7 @@ display_help() { echo " -s SOURCE_INDEX Required. One or more source indices to reindex from (space-separated)." echo " -d DEST_SUFFIX Required. Suffix for the destination index names." echo " -m MAX_DOCS Optional. The maximum number of documents to re-index. Must be a positive integer." + echo " -q QUERY Optional. The query to reindex a sub-set of documents." echo } @@ -116,22 +117,26 @@ check_dest_index_not_exists() { validate_query() { if [ -n "$QUERY" ]; then - response=$(curl -s -o /dev/null -w "%{http_code}" -X GET "$ES_HOST/_validate/query?explain" \ + response=$(curl -s -X GET "$ES_HOST/_validate/query" \ -H 'Content-Type: application/json' \ -d "{\"query\": $QUERY}") - if [ "$response" -ne 200 ]; then - echo "Error: The provided query is not valid" + echo "Response :$response" + + is_valid=$(echo "$response" | grep -o '"valid":\s*\(true\|false\)' | cut -d':' -f2 | tr -d ' ') + + if [ "$is_valid" != "true" ]; then + error_message=$(echo "$response" | grep -o '"error":{[^}]*}' | cut -d':' -f2-) + echo "Error: The provided query is not valid. Details: $error_message" exit 6 fi + echo "Query validated successfully." fi } # From ES https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-reindex.html#docs-reindex-from-multiple-sources # Indexing multiple sources -validate_query - start_reindex() { for index in "${SOURCE_INDEXES[@]}"; do DEST_INDEX="${index}-${DEST_SUFFIX}" @@ -183,6 +188,7 @@ start_reindex() { check_es_alive check_source_index_exists check_dest_index_not_exists +validate_query start_reindex echo "Reindexing script executed successfully." diff --git a/doc/elasticsearch/elasticsearch_reindex.md b/doc/elasticsearch/elasticsearch_reindex.md index 941aabd9..83519212 100644 --- a/doc/elasticsearch/elasticsearch_reindex.md +++ b/doc/elasticsearch/elasticsearch_reindex.md @@ -1,7 +1,7 @@ ## Elasticsearch Reindexing Guide The _reindex API in Elasticsearch allows you to copy documents from one index to another. This can be useful when you need to change the mappings of an index, upgrade Elasticsearch versions, or simply migrate data. -This huide covers reindexing using two methods; +This guide covers reindexing using two methods; 1. Using the Kibana Dev Tools 2. Using `curl` and Elasticsearch `reindex` API as per the script [here](../../bin/run-elastic-reindex.sh) @@ -14,7 +14,7 @@ The Kibana Dev Tools provides an interactive environment to execute Elasticsearc 1. Open Kibana and navigate to Dev Tools > Console -2. Use the following `POST` request to reindex documents from the source index to the destination index +2. Use the following `POST` request to re-index documents from the source index to the destination index ``` POST _reindex @@ -34,7 +34,7 @@ POST _reindex *** Set the "op-type":"create" to avoid overwriting existing documents in the destination index. To allow overwriting use "op_type":"index" -3. Use the following GET request from the Kibana Dev Tools console to get the Reindexing status +3. Use the following GET request from the Kibana Dev Tools console to get the Re-indexing status ``` GET _tasks/ @@ -109,7 +109,7 @@ bin/run-elastic-reindex.sh -s mc_search-000003 mc_search-000004 -d reindexed -m #### Slicing The Reindex API supports Sliced scroll to parallelize the [reindexing process](https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-reindex.html#docs-reindex-slice), thereby improving efficiency. -We can perfom slicing Manually (providing the no.of slices for each request) or Automatically (let Elasticsearch chose the number of slices to use). +We can perform slicing Manually (providing the no.of slices for each request) or Automatically (let Elasticsearch chose the number of slices to use). ``` curl -s -X POST "$ES_HOST/_reindex?slices=auto&wait_for_completion=false" @@ -121,7 +121,7 @@ The Reindex API supports throttling during reindexing by setting the `requests_p ##### Rethrotting During Reindex -Based on the clusture monitoring stats, you can adjust the throttling dynamically using the _rethrottle API. This allows us to manage the load to our clusture. +Based on the cluster monitoring stats, you can adjust the throttling dynamically using the _rethrottle API. This allows us to manage the load to our cluster. ``` POST _reindex//_rethrottle?requests_per_second=10