From 258668b1fc65639ca80db48701907c919e17362d Mon Sep 17 00:00:00 2001
From: Anton Rubin <anton.rubin@eliatra.com>
Date: Mon, 21 Oct 2024 16:07:30 +0100
Subject: [PATCH 01/15] adding arabic language analyzer

Signed-off-by: Anton Rubin <anton.rubin@eliatra.com>
---
 _analyzers/language-analyzers/arabic.md       | 109 ++++++++++++++++++
 .../index.md}                                 |   5 +-
 _analyzers/supported-analyzers/index.md       |   4 +-
 3 files changed, 114 insertions(+), 4 deletions(-)
 create mode 100644 _analyzers/language-analyzers/arabic.md
 rename _analyzers/{language-analyzers.md => language-analyzers/index.md} (95%)
diff --git a/_analyzers/language-analyzers/arabic.md b/_analyzers/language-analyzers/arabic.md
new file mode 100644
index 0000000000..194e42b66b
--- /dev/null
+++ b/_analyzers/language-analyzers/arabic.md
@@ -0,0 +1,109 @@
+---
+layout: default
+title: Arabic
+parent: Language analyzers
+nav_order: 10
+---
+
+# Arabic analyzer
+
+The built-in `arabic` analyzer can be applied to a text field using the following command:
+
+```json
+PUT /arabic-index
+{
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "arabic"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Arabic analyzer internals
+
+The `arabic` analyzer is build using the following:
+
+Tokenizer: `standard`
+
+Token Filters:
+- decimal_digit (general)
+- stop (arabic)
+- normalization (arabic)
+- keywords (arabic)
+- stemmer (arabic)
+
+## Custom Arabic analyzer
+
+You can create custom Arabic analyzer using the following command:
+
+```json
+PUT /arabic-index
+{
+  "settings": {
+    "analysis": {
+      "filter": {
+        "arabic_stop": {
+          "type": "stop",
+          "stopwords": "_arabic_"
+        },
+        "arabic_stemmer": {
+          "type": "stemmer",
+          "language": "arabic"
+        },
+        "arabic_normalization": {
+          "type": "arabic_normalization"
+        },
+        "decimal_digit": {
+          "type": "decimal_digit"
+        }
+      },
+      "analyzer": {
+        "arabic_analyzer": {
+          "type": "custom",
+          "tokenizer": "standard",
+          "filter": [
+            "lowercase",
+            "arabic_normalization",
+            "decimal_digit",
+            "arabic_stop",
+            "arabic_stemmer"
+          ]
+        }
+      }
+    }
+  },
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "arabic_analyzer"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Stem exclusion
+
+If you want to prevent certain words from stemming, you can add a `keyword_marker` token filter to mark list of words as keywords and add it to list of filters in analyzer.
+
+```json
+"arabic_stemmer": {
+    ...
+},
+"arabic_keywords": {
+    "type":       "keyword_marker",
+    "keywords":   ["بتن"] 
+},
+"arabic_normalization": {
+    ...
+},
+```
+
+
diff --git a/_analyzers/language-analyzers.md b/_analyzers/language-analyzers/index.md
similarity index 95%
rename from _analyzers/language-analyzers.md
rename to _analyzers/language-analyzers/index.md
index ca4ba320dd..9d5c634cd3 100644
--- a/_analyzers/language-analyzers.md
+++ b/_analyzers/language-analyzers/index.md
@@ -3,8 +3,9 @@ layout: default
 title: Language analyzers
 nav_order: 100
 parent: Analyzers
-redirect_from:
-  - /query-dsl/analyzers/language-analyzers/
+has_children: true
+has_toc: false
+
 ---
 
 # Language analyzers
diff --git a/_analyzers/supported-analyzers/index.md b/_analyzers/supported-analyzers/index.md
index af6ce6c3a6..682f20acac 100644
--- a/_analyzers/supported-analyzers/index.md
+++ b/_analyzers/supported-analyzers/index.md
@@ -24,9 +24,9 @@ Analyzer | Analysis performed | Analyzer output
 **Stop** | - Parses strings into tokens on any non-letter character <br> - Removes non-letter characters <br> - Removes stop words <br> - Converts tokens to lowercase | [`s`, `fun`, `contribute`, `brand`, `new`, `pr`, `opensearch`]
 **Keyword** (no-op) | - Outputs the entire string unchanged | [`It’s fun to contribute a brand-new PR or 2 to OpenSearch!`]
 **Pattern** | - Parses strings into tokens using regular expressions <br> - Supports converting strings to lowercase <br> - Supports removing stop words | [`it`, `s`, `fun`, `to`, `contribute`, `a`,`brand`, `new`, `pr`, `or`, `2`, `to`, `opensearch`]
-[**Language**]({{site.url}}{{site.baseurl}}/analyzers/language-analyzers/) | Performs analysis specific to a certain language (for example, `english`). | [`fun`, `contribut`, `brand`, `new`, `pr`, `2`, `opensearch`]
+[**Language**]({{site.url}}{{site.baseurl}}/analyzers/language-analyzers/index/) | Performs analysis specific to a certain language (for example, `english`). | [`fun`, `contribut`, `brand`, `new`, `pr`, `2`, `opensearch`]
 **Fingerprint** | - Parses strings on any non-letter character <br> - Normalizes characters by converting them to ASCII <br> - Converts tokens to lowercase <br> - Sorts, deduplicates, and concatenates tokens into a single token <br> - Supports removing stop words | [`2 a brand contribute fun it's new opensearch or pr to`] <br> Note that the apostrophe was converted to its ASCII counterpart.
 
 ## Language analyzers
 
-OpenSearch supports analyzers for various languages. For more information, see [Language analyzers]({{site.url}}{{site.baseurl}}/analyzers/language-analyzers/).
\ No newline at end of file
+OpenSearch supports analyzers for various languages. For more information, see [Language analyzers]({{site.url}}{{site.baseurl}}/analyzers/language-analyzers/index/).
\ No newline at end of file

From 37b845a005044ca0e56a001b2d142548a381ecb3 Mon Sep 17 00:00:00 2001
From: Fanit Kolchina <kolchfa@amazon.com>
Date: Mon, 21 Oct 2024 11:26:09 -0400
Subject: [PATCH 02/15] Add grandparent to arabic analyzer

Signed-off-by: Fanit Kolchina <kolchfa@amazon.com>
---
 _analyzers/language-analyzers/arabic.md | 1 +
 _analyzers/language-analyzers/index.md  | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/_analyzers/language-analyzers/arabic.md b/_analyzers/language-analyzers/arabic.md
index 194e42b66b..81dcba269f 100644
--- a/_analyzers/language-analyzers/arabic.md
+++ b/_analyzers/language-analyzers/arabic.md
@@ -2,6 +2,7 @@
 layout: default
 title: Arabic
 parent: Language analyzers
+grand_parent: Analyzers
 nav_order: 10
 ---
 
diff --git a/_analyzers/language-analyzers/index.md b/_analyzers/language-analyzers/index.md
index 9d5c634cd3..3760a77be0 100644
--- a/_analyzers/language-analyzers/index.md
+++ b/_analyzers/language-analyzers/index.md
@@ -5,7 +5,6 @@ nav_order: 100
 parent: Analyzers
 has_children: true
 has_toc: false
-
 ---
 
 # Language analyzers

From 436bd3ea304a2794811a551fe9f2e41852be0127 Mon Sep 17 00:00:00 2001
From: Anton Rubin <anton.rubin@eliatra.com>
Date: Tue, 22 Oct 2024 14:22:40 +0100
Subject: [PATCH 03/15] adding more details

Signed-off-by: Anton Rubin <anton.rubin@eliatra.com>
---
 _analyzers/language-analyzers/arabic.md | 88 ++++++++++++++++++++----
 _analyzers/language-analyzers/index.md  | 89 ++++++++++++++++++++++++-
 2 files changed, 164 insertions(+), 13 deletions(-)

diff --git a/_analyzers/language-analyzers/arabic.md b/_analyzers/language-analyzers/arabic.md
index 81dcba269f..bc092aa03b 100644
--- a/_analyzers/language-analyzers/arabic.md
+++ b/_analyzers/language-analyzers/arabic.md
@@ -25,6 +25,25 @@ PUT /arabic-index
 ```
 {% include copy-curl.html %}
 
+You can also use `stem_exclusion` with any language analyzer using the following command:
+
+```json
+PUT index_with_stem_exclusion_english_analyzer
+{
+  "settings": {
+    "analysis": {
+      "analyzer": {
+        "stem_exclusion_english_analyzer":{
+          "type":"arabic",
+          "stem_exclusion":["authority","authorization"]
+        }
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
 ## Arabic analyzer internals
 
 The `arabic` analyzer is build using the following:
@@ -90,21 +109,66 @@ PUT /arabic-index
 ```
 {% include copy-curl.html %}
 
-## Stem exclusion
+## Generated tokens
 
-If you want to prevent certain words from stemming, you can add a `keyword_marker` token filter to mark list of words as keywords and add it to list of filters in analyzer.
+Use the following request to examine the tokens generated using the analyzer:
 
 ```json
-"arabic_stemmer": {
-    ...
-},
-"arabic_keywords": {
-    "type":       "keyword_marker",
-    "keywords":   ["بتن"] 
-},
-"arabic_normalization": {
-    ...
-},
+POST /arabic-index/_analyze
+{
+  "field": "content",
+  "text": "الطلاب يدرسون في الجامعات العربية. أرقامهم ١٢٣٤٥٦."
+}
 ```
+{% include copy-curl.html %}
 
+The response contains the generated tokens:
 
+```json
+{
+  "tokens": [
+    {
+      "token": "طلاب",
+      "start_offset": 0,
+      "end_offset": 6,
+      "type": "<ALPHANUM>",
+      "position": 0
+    },
+    {
+      "token": "يدرس",
+      "start_offset": 7,
+      "end_offset": 13,
+      "type": "<ALPHANUM>",
+      "position": 1
+    },
+    {
+      "token": "جامع",
+      "start_offset": 17,
+      "end_offset": 25,
+      "type": "<ALPHANUM>",
+      "position": 3
+    },
+    {
+      "token": "عرب",
+      "start_offset": 26,
+      "end_offset": 33,
+      "type": "<ALPHANUM>",
+      "position": 4
+    },
+    {
+      "token": "ارقامهم",
+      "start_offset": 35,
+      "end_offset": 42,
+      "type": "<ALPHANUM>",
+      "position": 5
+    },
+    {
+      "token": "123456",
+      "start_offset": 43,
+      "end_offset": 49,
+      "type": "<NUM>",
+      "position": 6
+    }
+  ]
+}
+```
\ No newline at end of file
diff --git a/_analyzers/language-analyzers/index.md b/_analyzers/language-analyzers/index.md
index 3760a77be0..21d4bc08ad 100644
--- a/_analyzers/language-analyzers/index.md
+++ b/_analyzers/language-analyzers/index.md
@@ -41,4 +41,91 @@ PUT my-index
 }
 ```
 
-<!-- TO do: each of the options needs its own section with an example. Convert table to individual sections, and then give a streamlined list with valid values. -->
+## stem_exclusion
+
+The `stem_exclusion` feature can be applied to many language analyzers by providing a list of lowercase words that should be excluded from stemming. Internally, OpenSearch uses the `keyword_marker` token filter to mark these words as keywords, ensuring they are not stemmed.
+
+## Example stem_exclusion
+
+You can use the following command to configure `stem_exclusion`:
+
+```json
+PUT index_with_stem_exclusion_english_analyzer
+{
+  "settings": {
+    "analysis": {
+      "analyzer": {
+        "stem_exclusion_english_analyzer":{
+          "type":"english",
+          "stem_exclusion": ["manager", "management"]
+        }
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+Following languages support `stem_exclusion`:
+
+- arabic 
+- armenian 
+- basque, 
+- bengali
+- bulgarian
+- catalan
+- czech
+- dutch
+- english
+- finnish
+- french
+- galician
+- german
+- hindi
+- hungarian
+- indonesian
+- irish
+- italian
+- latvian
+- lithuanian
+- norwegian
+- portuguese
+- romanian
+- russian
+- sorani
+- spanish
+- swedish
+- turkish
+
+
+## stem_exclusion with custom analyzer
+
+All language analyzers are made up from tokenizers and token filters specific to the particular language. If you want to implement a custom version of the language analyzer with `stem_exclusion`, you need to configure `keyword_marker` token filter and list the necessary words in `keywords` parameter, see the following example:
+
+```json
+PUT index_with_keyword_marker_analyzer
+{
+  "settings": {
+    "analysis": {
+      "filter": {
+        "protected_keywords_filter": {
+          "type": "keyword_marker",
+          "keywords": ["Apple", "OpenSearch"]
+        }
+      },
+      "analyzer": {
+        "custom_english_analyzer": {
+          "type": "custom",
+          "tokenizer": "standard",
+          "filter": [
+            "lowercase",
+            "protected_keywords_filter",
+            "english_stemmer"
+          ]
+        }
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}

From 93308e4fb73d0d4b306546e20e2e76a2ab29e792 Mon Sep 17 00:00:00 2001
From: Anton Rubin <anton.rubin@eliatra.com>
Date: Wed, 30 Oct 2024 11:45:37 +0000
Subject: [PATCH 04/15] adding armenian language analyzer

Signed-off-by: Anton Rubin <anton.rubin@eliatra.com>
---
 _analyzers/language-analyzers/arabic.md   |  11 +-
 _analyzers/language-analyzers/armenian.md | 132 ++++++++++++++++++++++
 2 files changed, 139 insertions(+), 4 deletions(-)
 create mode 100644 _analyzers/language-analyzers/armenian.md

diff --git a/_analyzers/language-analyzers/arabic.md b/_analyzers/language-analyzers/arabic.md
index bc092aa03b..7f18454a26 100644
--- a/_analyzers/language-analyzers/arabic.md
+++ b/_analyzers/language-analyzers/arabic.md
@@ -25,17 +25,19 @@ PUT /arabic-index
 ```
 {% include copy-curl.html %}
 
-You can also use `stem_exclusion` with any language analyzer using the following command:
+## Stem exclusion
+
+You can also use `stem_exclusion` with this language analyzer using the following command:
 
 ```json
-PUT index_with_stem_exclusion_english_analyzer
+PUT index_with_stem_exclusion_arabic
 {
   "settings": {
     "analysis": {
       "analyzer": {
-        "stem_exclusion_english_analyzer":{
+        "stem_exclusion_arabic_analyzer":{
           "type":"arabic",
-          "stem_exclusion":["authority","authorization"]
+          "stem_exclusion":["تكنولوجيا","سلطة "]
         }
       }
     }
@@ -51,6 +53,7 @@ The `arabic` analyzer is build using the following:
 Tokenizer: `standard`
 
 Token Filters:
+- lowercase (general)
 - decimal_digit (general)
 - stop (arabic)
 - normalization (arabic)
diff --git a/_analyzers/language-analyzers/armenian.md b/_analyzers/language-analyzers/armenian.md
new file mode 100644
index 0000000000..9bdc316e2e
--- /dev/null
+++ b/_analyzers/language-analyzers/armenian.md
@@ -0,0 +1,132 @@
+---
+layout: default
+title: Armenian
+parent: Language analyzers
+grand_parent: Analyzers
+nav_order: 20
+---
+
+# Armenian analyzer
+
+The built-in `armenian` analyzer can be applied to a text field using the following command:
+
+```json
+PUT /arabic-index
+{
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "armenian"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Stem exclusion
+
+You can also use `stem_exclusion` with this language analyzer using the following command:
+
+```json
+PUT index_with_stem_exclusion_armenian_analyzer
+{
+  "settings": {
+    "analysis": {
+      "analyzer": {
+        "stem_exclusion_armenian_analyzer": {
+          "type": "armenian",
+          "stem_exclusion": ["բարև", "խաղաղություն"] 
+        }
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Armenian analyzer internals
+
+The `armenian` analyzer is build using the following:
+
+Tokenizer: `standard`
+
+Token Filters:
+- lowercase (general)
+- stop (armenian)
+- keywords (armenian)
+- stemmer (armenian)
+
+## Custom Armenian analyzer
+
+You can create custom Armenian analyzer using the following command:
+
+```json
+PUT /armenian-index
+{
+  "settings": {
+    "analysis": {
+      "filter": {
+        "armenian_stop": {
+          "type": "stop",
+          "stopwords": "_armenian_"
+        },
+        "armenian_stemmer": {
+          "type": "stemmer",
+          "language": "armenian"
+        }
+      },
+      "analyzer": {
+        "armenian_analyzer": {
+          "type": "custom",
+          "tokenizer": "standard",
+          "filter": [
+            "lowercase",
+            "armenian_stop",
+            "armenian_stemmer"
+          ]
+        }
+      }
+    }
+  },
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "armenian_analyzer"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Generated tokens
+
+Use the following request to examine the tokens generated using the analyzer:
+
+```json
+GET armenian-index/_analyze
+{
+  "analyzer": "stem_exclusion_armenian_analyzer",
+  "text": "բարև բոլորին, մենք խաղաղություն ենք ուզում և նոր օր ենք սկսել"
+}
+```
+{% include copy-curl.html %}
+
+The response contains the generated tokens:
+
+```json
+{
+  "tokens": [
+    {"token": "բարև","start_offset": 0,"end_offset": 4,"type": "<ALPHANUM>","position": 0},
+    {"token": "բոլոր","start_offset": 5,"end_offset": 12,"type": "<ALPHANUM>","position": 1},
+    {"token": "խաղաղություն","start_offset": 19,"end_offset": 31,"type": "<ALPHANUM>","position": 3},
+    {"token": "ուզ","start_offset": 36,"end_offset": 42,"type": "<ALPHANUM>","position": 5},
+    {"token": "նոր","start_offset": 45,"end_offset": 48,"type": "<ALPHANUM>","position": 7},
+    {"token": "օր","start_offset": 49,"end_offset": 51,"type": "<ALPHANUM>","position": 8},
+    {"token": "սկսել","start_offset": 56,"end_offset": 61,"type": "<ALPHANUM>","position": 10}
+  ]
+}
+```
\ No newline at end of file

From d416dd4ed45ff0ca57c943a49a8934913a31991f Mon Sep 17 00:00:00 2001
From: Anton Rubin <anton.rubin@eliatra.com>
Date: Wed, 30 Oct 2024 12:25:36 +0000
Subject: [PATCH 05/15] adding basque bengali and brazilian language analyzers

Signed-off-by: Anton Rubin <anton.rubin@eliatra.com>
---
 _analyzers/language-analyzers/basque.md    | 132 ++++++++++++++++++++
 _analyzers/language-analyzers/bengali.md   | 137 +++++++++++++++++++++
 _analyzers/language-analyzers/brazilian.md | 132 ++++++++++++++++++++
 _analyzers/language-analyzers/index.md     |   5 +-
 4 files changed, 404 insertions(+), 2 deletions(-)
 create mode 100644 _analyzers/language-analyzers/basque.md
 create mode 100644 _analyzers/language-analyzers/bengali.md
 create mode 100644 _analyzers/language-analyzers/brazilian.md

diff --git a/_analyzers/language-analyzers/basque.md b/_analyzers/language-analyzers/basque.md
new file mode 100644
index 0000000000..b48fc378fa
--- /dev/null
+++ b/_analyzers/language-analyzers/basque.md
@@ -0,0 +1,132 @@
+---
+layout: default
+title: Basque
+parent: Language analyzers
+grand_parent: Analyzers
+nav_order: 30
+---
+
+# Basque analyzer
+
+The built-in `basque` analyzer can be applied to a text field using the following command:
+
+```json
+PUT /basque-index
+{
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "basque"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Stem exclusion
+
+You can also use `stem_exclusion` with this language analyzer using the following command:
+
+```json
+PUT index_with_stem_exclusion_basque_analyzer
+{
+  "settings": {
+    "analysis": {
+      "analyzer": {
+        "stem_exclusion_basque_analyzer": {
+          "type": "basque",
+          "stem_exclusion": ["autoritate", "baldintza"]
+        }
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Basque analyzer internals
+
+The `basque` analyzer is build using the following:
+
+Tokenizer: `standard`
+
+Token Filters:
+- lowercase (general)
+- stop (basque)
+- keywords (basque)
+- stemmer (basque)
+
+## Custom Basque analyzer
+
+You can create custom Basque analyzer using the following command:
+
+```json
+PUT /basque-index
+{
+  "settings": {
+    "analysis": {
+      "filter": {
+        "basque_stop": {
+          "type": "stop",
+          "stopwords": "_basque_"
+        },
+        "basque_stemmer": {
+          "type": "stemmer",
+          "language": "basque"
+        }
+      },
+      "analyzer": {
+        "basque_analyzer": {
+          "type": "custom",
+          "tokenizer": "standard",
+          "filter": [
+            "lowercase",
+            "basque_stop",
+            "basque_stemmer"
+          ]
+        }
+      }
+    }
+  },
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "basque_analyzer"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Generated tokens
+
+Use the following request to examine the tokens generated using the analyzer:
+
+```json
+POST /basque-index/_analyze
+{
+  "field": "content",
+  "text": "Ikasleek euskal unibertsitateetan ikasten dute. Haien zenbakiak 123456 dira."
+}
+```
+{% include copy-curl.html %}
+
+The response contains the generated tokens:
+
+```json
+{
+  "tokens": [
+    {"token": "ikasle","start_offset": 0,"end_offset": 8,"type": "<ALPHANUM>","position": 0},
+    {"token": "euskal","start_offset": 9,"end_offset": 15,"type": "<ALPHANUM>","position": 1},
+    {"token": "unibertsi","start_offset": 16,"end_offset": 33,"type": "<ALPHANUM>","position": 2},
+    {"token": "ikas","start_offset": 34,"end_offset": 41,"type": "<ALPHANUM>","position": 3},
+    {"token": "haien","start_offset": 48,"end_offset": 53,"type": "<ALPHANUM>","position": 5},
+    {"token": "zenba","start_offset": 54,"end_offset": 63,"type": "<ALPHANUM>","position": 6},
+    {"token": "123456","start_offset": 64,"end_offset": 70,"type": "<NUM>","position": 7}
+  ]
+}
+```
\ No newline at end of file
diff --git a/_analyzers/language-analyzers/bengali.md b/_analyzers/language-analyzers/bengali.md
new file mode 100644
index 0000000000..011082b068
--- /dev/null
+++ b/_analyzers/language-analyzers/bengali.md
@@ -0,0 +1,137 @@
+---
+layout: default
+title: Bengali
+parent: Language analyzers
+grand_parent: Analyzers
+nav_order: 40
+---
+
+# Bengali analyzer
+
+The built-in `bengali` analyzer can be applied to a text field using the following command:
+
+```json
+PUT /bengali-index
+{
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "bengali"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Stem exclusion
+
+You can also use `stem_exclusion` with this language analyzer using the following command:
+
+```json
+PUT index_with_stem_exclusion_bengali_analyzer
+{
+  "settings": {
+    "analysis": {
+      "analyzer": {
+        "stem_exclusion_bengali_analyzer": {
+          "type": "bengali",
+          "stem_exclusion": ["কর্তৃপক্ষ", "অনুমোদন"]
+        }
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Bengali analyzer internals
+
+The `bengali` analyzer is build using the following:
+
+Tokenizer: `standard`
+
+Token Filters:
+- lowercase (general)
+- decimal_digit (general)
+- indic_normalization
+- normalization (bengali)
+- stop (bengali)
+- keywords (bengali)
+- stemmer (bengali)
+
+## Custom Bengali analyzer
+
+You can create custom Bengali analyzer using the following command:
+
+```json
+PUT /bengali-index
+{
+  "settings": {
+    "analysis": {
+      "filter": {
+        "bengali_stop": {
+          "type": "stop",
+          "stopwords": "_bengali_"
+        },
+        "bengali_stemmer": {
+          "type": "stemmer",
+          "language": "bengali"
+        }
+      },
+      "analyzer": {
+        "bengali_analyzer": {
+          "type": "custom",
+          "tokenizer": "standard",
+          "filter": [
+            "lowercase",
+            "decimal_digit",
+            "indic_normalization",
+            "bengali_normalization",
+            "bengali_stop",
+            "bengali_stemmer"
+          ]
+        }
+      }
+    }
+  },
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "bengali_analyzer"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Generated tokens
+
+Use the following request to examine the tokens generated using the analyzer:
+
+```json
+POST /bengali-index/_analyze
+{
+  "field": "content",
+  "text": "ছাত্ররা বিশ্ববিদ্যালয়ে পড়াশোনা করে। তাদের নম্বরগুলি ১২৩৪৫৬।"
+}
+```
+{% include copy-curl.html %}
+
+The response contains the generated tokens:
+
+```json
+{
+  "tokens": [
+    {"token": "ছাত্র","start_offset": 0,"end_offset": 7,"type": "<ALPHANUM>","position": 0},
+    {"token": "বিসসবিদালয়","start_offset": 8,"end_offset": 23,"type": "<ALPHANUM>","position": 1},
+    {"token": "পরাসোন","start_offset": 24,"end_offset": 32,"type": "<ALPHANUM>","position": 2},
+    {"token": "তা","start_offset": 38,"end_offset": 43,"type": "<ALPHANUM>","position": 4},
+    {"token": "নমমর","start_offset": 44,"end_offset": 53,"type": "<ALPHANUM>","position": 5},
+    {"token": "123456","start_offset": 54,"end_offset": 60,"type": "<NUM>","position": 6}
+  ]
+}
+```
\ No newline at end of file
diff --git a/_analyzers/language-analyzers/brazilian.md b/_analyzers/language-analyzers/brazilian.md
new file mode 100644
index 0000000000..073166d149
--- /dev/null
+++ b/_analyzers/language-analyzers/brazilian.md
@@ -0,0 +1,132 @@
+---
+layout: default
+title: Brazilian
+parent: Language analyzers
+grand_parent: Analyzers
+nav_order: 50
+---
+
+# Brazilian analyzer
+
+The built-in `brazilian` analyzer can be applied to a text field using the following command:
+
+```json
+PUT /brazilian-index
+{
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "brazilian"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Stem exclusion
+
+You can also use `stem_exclusion` with this language analyzer using the following command:
+
+```json
+PUT index_with_stem_exclusion_brazilian_analyzer
+{
+  "settings": {
+    "analysis": {
+      "analyzer": {
+        "stem_exclusion_brazilian_analyzer": {
+          "type": "brazilian",
+          "stem_exclusion": ["autoridade", "aprovação"]
+        }
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Brazilian analyzer internals
+
+The `brazilian` analyzer is build using the following:
+
+Tokenizer: `standard`
+
+Token Filters:
+- lowercase (general)
+- stop (brazilian)
+- keywords (brazilian)
+- stemmer (brazilian)
+
+## Custom Brazilian analyzer
+
+You can create custom Brazilian analyzer using the following command:
+
+```json
+PUT /brazilian-index
+{
+  "settings": {
+    "analysis": {
+      "filter": {
+        "brazilian_stop": {
+          "type": "stop",
+          "stopwords": "_brazilian_"
+        },
+        "brazilian_stemmer": {
+          "type": "stemmer",
+          "language": "brazilian"
+        }
+      },
+      "analyzer": {
+        "brazilian_analyzer": {
+          "type": "custom",
+          "tokenizer": "standard",
+          "filter": [
+            "lowercase",
+            "brazilian_stop",
+            "brazilian_stemmer"
+          ]
+        }
+      }
+    }
+  },
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "brazilian_analyzer"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Generated tokens
+
+Use the following request to examine the tokens generated using the analyzer:
+
+```json
+POST /brazilian-index/_analyze
+{
+  "field": "content",
+  "text": "Estudantes estudam em universidades brasileiras. Seus números são 123456."
+}
+```
+{% include copy-curl.html %}
+
+The response contains the generated tokens:
+
+```json
+{
+  "tokens": [
+    {"token": "estudant","start_offset": 0,"end_offset": 10,"type": "<ALPHANUM>","position": 0},
+    {"token": "estud","start_offset": 11,"end_offset": 18,"type": "<ALPHANUM>","position": 1},
+    {"token": "univers","start_offset": 22,"end_offset": 35,"type": "<ALPHANUM>","position": 3},
+    {"token": "brasileir","start_offset": 36,"end_offset": 47,"type": "<ALPHANUM>","position": 4},
+    {"token": "numer","start_offset": 54,"end_offset": 61,"type": "<ALPHANUM>","position": 6},
+    {"token": "sao","start_offset": 62,"end_offset": 65,"type": "<ALPHANUM>","position": 7},
+    {"token": "123456","start_offset": 66,"end_offset": 72,"type": "<NUM>","position": 8}
+  ]
+}
+```
\ No newline at end of file
diff --git a/_analyzers/language-analyzers/index.md b/_analyzers/language-analyzers/index.md
index 21d4bc08ad..8b032e205b 100644
--- a/_analyzers/language-analyzers/index.md
+++ b/_analyzers/language-analyzers/index.md
@@ -69,9 +69,10 @@ PUT index_with_stem_exclusion_english_analyzer
 Following languages support `stem_exclusion`:
 
 - arabic 
-- armenian 
-- basque, 
+- armenian
+- basque
 - bengali
+- brazilian
 - bulgarian
 - catalan
 - czech

From 2e4f01d18dca255fe0a48e3fb989c6c223dfb8ef Mon Sep 17 00:00:00 2001
From: Anton Rubin <anton.rubin@eliatra.com>
Date: Wed, 30 Oct 2024 15:34:35 +0000
Subject: [PATCH 06/15] adding bulgarian catalan and cjk language analyzers

Signed-off-by: Anton Rubin <anton.rubin@eliatra.com>
---
 _analyzers/language-analyzers/arabic.md    |   4 +-
 _analyzers/language-analyzers/armenian.md  |   2 +-
 _analyzers/language-analyzers/basque.md    |   2 +-
 _analyzers/language-analyzers/bengali.md   |   4 +-
 _analyzers/language-analyzers/brazilian.md |   2 +-
 _analyzers/language-analyzers/bulgarian.md | 132 +++++++++++++++++++
 _analyzers/language-analyzers/catalan.md   | 138 ++++++++++++++++++++
 _analyzers/language-analyzers/cjk.md       | 142 +++++++++++++++++++++
 _analyzers/language-analyzers/index.md     |   1 +
 9 files changed, 420 insertions(+), 7 deletions(-)
 create mode 100644 _analyzers/language-analyzers/bulgarian.md
 create mode 100644 _analyzers/language-analyzers/catalan.md
 create mode 100644 _analyzers/language-analyzers/cjk.md

diff --git a/_analyzers/language-analyzers/arabic.md b/_analyzers/language-analyzers/arabic.md
index 7f18454a26..913414c8a3 100644
--- a/_analyzers/language-analyzers/arabic.md
+++ b/_analyzers/language-analyzers/arabic.md
@@ -53,8 +53,8 @@ The `arabic` analyzer is build using the following:
 Tokenizer: `standard`
 
 Token Filters:
-- lowercase (general)
-- decimal_digit (general)
+- lowercase
+- decimal_digit
 - stop (arabic)
 - normalization (arabic)
 - keywords (arabic)
diff --git a/_analyzers/language-analyzers/armenian.md b/_analyzers/language-analyzers/armenian.md
index 9bdc316e2e..a5ce7d8526 100644
--- a/_analyzers/language-analyzers/armenian.md
+++ b/_analyzers/language-analyzers/armenian.md
@@ -53,7 +53,7 @@ The `armenian` analyzer is build using the following:
 Tokenizer: `standard`
 
 Token Filters:
-- lowercase (general)
+- lowercase
 - stop (armenian)
 - keywords (armenian)
 - stemmer (armenian)
diff --git a/_analyzers/language-analyzers/basque.md b/_analyzers/language-analyzers/basque.md
index b48fc378fa..7eac4cde82 100644
--- a/_analyzers/language-analyzers/basque.md
+++ b/_analyzers/language-analyzers/basque.md
@@ -53,7 +53,7 @@ The `basque` analyzer is build using the following:
 Tokenizer: `standard`
 
 Token Filters:
-- lowercase (general)
+- lowercase
 - stop (basque)
 - keywords (basque)
 - stemmer (basque)
diff --git a/_analyzers/language-analyzers/bengali.md b/_analyzers/language-analyzers/bengali.md
index 011082b068..d3df7f8417 100644
--- a/_analyzers/language-analyzers/bengali.md
+++ b/_analyzers/language-analyzers/bengali.md
@@ -53,8 +53,8 @@ The `bengali` analyzer is build using the following:
 Tokenizer: `standard`
 
 Token Filters:
-- lowercase (general)
-- decimal_digit (general)
+- lowercase
+- decimal_digit
 - indic_normalization
 - normalization (bengali)
 - stop (bengali)
diff --git a/_analyzers/language-analyzers/brazilian.md b/_analyzers/language-analyzers/brazilian.md
index 073166d149..b3b9c7cdb8 100644
--- a/_analyzers/language-analyzers/brazilian.md
+++ b/_analyzers/language-analyzers/brazilian.md
@@ -53,7 +53,7 @@ The `brazilian` analyzer is build using the following:
 Tokenizer: `standard`
 
 Token Filters:
-- lowercase (general)
+- lowercase
 - stop (brazilian)
 - keywords (brazilian)
 - stemmer (brazilian)
diff --git a/_analyzers/language-analyzers/bulgarian.md b/_analyzers/language-analyzers/bulgarian.md
new file mode 100644
index 0000000000..1d74f66c49
--- /dev/null
+++ b/_analyzers/language-analyzers/bulgarian.md
@@ -0,0 +1,132 @@
+---
+layout: default
+title: Bulgarian
+parent: Language analyzers
+grand_parent: Analyzers
+nav_order: 60
+---
+
+# Bulgarian analyzer
+
+The built-in `bulgarian` analyzer can be applied to a text field using the following command:
+
+```json
+PUT /bulgarian-index
+{
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "bulgarian"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Stem exclusion
+
+You can also use `stem_exclusion` with this language analyzer using the following command:
+
+```json
+PUT index_with_stem_exclusion_bulgarian_analyzer
+{
+  "settings": {
+    "analysis": {
+      "analyzer": {
+        "stem_exclusion_bulgarian_analyzer": {
+          "type": "bulgarian",
+          "stem_exclusion": ["авторитет", "одобрение"]
+        }
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Bulgarian analyzer internals
+
+The `bulgarian` analyzer is build using the following:
+
+Tokenizer: `standard`
+
+Token Filters:
+- lowercase
+- stop (bulgarian)
+- keywords (bulgarian)
+- stemmer (bulgarian)
+
+## Custom Bulgarian analyzer
+
+You can create custom Bulgarian analyzer using the following command:
+
+```json
+PUT /bulgarian-index
+{
+  "settings": {
+    "analysis": {
+      "filter": {
+        "bulgarian_stop": {
+          "type": "stop",
+          "stopwords": "_bulgarian_"
+        },
+        "bulgarian_stemmer": {
+          "type": "stemmer",
+          "language": "bulgarian"
+        }
+      },
+      "analyzer": {
+        "bulgarian_analyzer": {
+          "type": "custom",
+          "tokenizer": "standard",
+          "filter": [
+            "lowercase",
+            "bulgarian_stop",
+            "bulgarian_stemmer"
+          ]
+        }
+      }
+    }
+  },
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "bulgarian_analyzer"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Generated tokens
+
+Use the following request to examine the tokens generated using the analyzer:
+
+```json
+POST /bulgarian-index/_analyze
+{
+  "field": "content",
+  "text": "Студентите учат в българските университети. Техните номера са 123456."
+}
+```
+{% include copy-curl.html %}
+
+The response contains the generated tokens:
+
+```json
+{
+  "tokens": [
+    {"token": "студент","start_offset": 0,"end_offset": 10,"type": "<ALPHANUM>","position": 0},
+    {"token": "учат","start_offset": 11,"end_offset": 15,"type": "<ALPHANUM>","position": 1},
+    {"token": "българск","start_offset": 18,"end_offset": 29,"type": "<ALPHANUM>","position": 3},
+    {"token": "университят","start_offset": 30,"end_offset": 42,"type": "<ALPHANUM>","position": 4},
+    {"token": "техн","start_offset": 44,"end_offset": 51,"type": "<ALPHANUM>","position": 5},
+    {"token": "номер","start_offset": 52,"end_offset": 58,"type": "<ALPHANUM>","position": 6},
+    {"token": "123456","start_offset": 62,"end_offset": 68,"type": "<NUM>","position": 8}
+  ]
+}
+```
\ No newline at end of file
diff --git a/_analyzers/language-analyzers/catalan.md b/_analyzers/language-analyzers/catalan.md
new file mode 100644
index 0000000000..bc072f8bd9
--- /dev/null
+++ b/_analyzers/language-analyzers/catalan.md
@@ -0,0 +1,138 @@
+---
+layout: default
+title: Catalan
+parent: Language analyzers
+grand_parent: Analyzers
+nav_order: 70
+---
+
+# Catalan analyzer
+
+The built-in `catalan` analyzer can be applied to a text field using the following command:
+
+```json
+PUT /catalan-index
+{
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "catalan"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Stem exclusion
+
+You can also use `stem_exclusion` with this language analyzer using the following command:
+
+```json
+PUT index_with_stem_exclusion_catalan_analyzer
+{
+  "settings": {
+    "analysis": {
+      "analyzer": {
+        "stem_exclusion_catalan_analyzer": {
+          "type": "catalan",
+          "stem_exclusion": ["autoritat", "aprovació"]
+        }
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Catalan analyzer internals
+
+The `catalan` analyzer is build using the following:
+
+Tokenizer: `standard`
+
+Token Filters:
+- elision (catalan)
+- lowercase
+- stop (catalan)
+- keywords (catalan)
+- stemmer (catalan)
+
+## Custom Catalan analyzer
+
+You can create custom Catalan analyzer using the following command:
+
+```json
+PUT /catalan-index
+{
+  "settings": {
+    "analysis": {
+      "filter": {
+        "catalan_stop": {
+          "type": "stop",
+          "stopwords": "_catalan_"
+        },
+        "catalan_elision": {
+          "type":       "elision",
+          "articles":   [ "d", "l", "m", "n", "s", "t"],
+          "articles_case": true
+        },
+        "catalan_stemmer": {
+          "type": "stemmer",
+          "language": "catalan"
+        }
+      },
+      "analyzer": {
+        "catalan_analyzer": {
+          "type": "custom",
+          "tokenizer": "standard",
+          "filter": [
+            "catalan_elision",
+            "lowercase",
+            "catalan_stop",
+            "catalan_stemmer"
+          ]
+        }
+      }
+    }
+  },
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "catalan_analyzer"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Generated tokens
+
+Use the following request to examine the tokens generated using the analyzer:
+
+```json
+POST /catalan-index/_analyze
+{
+  "field": "content",
+  "text": "Els estudiants estudien a les universitats catalanes. Els seus números són 123456."
+}
+```
+{% include copy-curl.html %}
+
+The response contains the generated tokens:
+
+```json
+{
+  "tokens": [
+    {"token": "estud","start_offset": 4,"end_offset": 14,"type": "<ALPHANUM>","position": 1},
+    {"token": "estud","start_offset": 15,"end_offset": 23,"type": "<ALPHANUM>","position": 2},
+    {"token": "univer","start_offset": 30,"end_offset": 42,"type": "<ALPHANUM>","position": 5},
+    {"token": "catalan","start_offset": 43,"end_offset": 52,"type": "<ALPHANUM>","position": 6},
+    {"token": "numer","start_offset": 63,"end_offset": 70,"type": "<ALPHANUM>","position": 9},
+    {"token": "123456","start_offset": 75,"end_offset": 81,"type": "<NUM>","position": 11}
+  ]
+}
+```
\ No newline at end of file
diff --git a/_analyzers/language-analyzers/cjk.md b/_analyzers/language-analyzers/cjk.md
new file mode 100644
index 0000000000..111adb423b
--- /dev/null
+++ b/_analyzers/language-analyzers/cjk.md
@@ -0,0 +1,142 @@
+---
+layout: default
+title: CJK
+parent: Language analyzers
+grand_parent: Analyzers
+nav_order: 80
+---
+
+# CJK analyzer
+
+The built-in `cjk` analyzer can be applied to a text field using the following command:
+
+```json
+PUT /cjk-index
+{
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "cjk"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Stem exclusion
+
+You can also use `stem_exclusion` with this language analyzer using the following command:
+
+```json
+PUT index_with_stem_exclusion_cjk_analyzer
+{
+  "settings": {
+    "analysis": {
+      "analyzer": {
+        "stem_exclusion_cjk_analyzer": {
+          "type": "cjk",
+          "stem_exclusion": ["example", "words"]
+        }
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## CJK analyzer internals
+
+The `cjk` analyzer is build using the following:
+
+Tokenizer: `standard`
+
+Token Filters:
+- cjk_width
+- lowercase
+- cjk_bigram
+- stop (similar to english)
+
+## Custom CJK analyzer
+
+You can create custom CJK analyzer using the following command:
+
+```json
+PUT /cjk-index
+{
+  "settings": {
+    "analysis": {
+      "filter": {
+        "english_stop": {
+          "type":       "stop",
+          "stopwords":  [ 
+            "a", "and", "are", "as", "at", "be", "but", "by", "for",
+            "if", "in", "into", "is", "it", "no", "not", "of", "on",
+            "or", "s", "such", "t", "that", "the", "their", "then",
+            "there", "these", "they", "this", "to", "was", "will",
+            "with", "www"
+          ]
+        }
+      },
+      "analyzer": {
+        "cjk_custom_analyzer": {
+          "tokenizer": "standard",
+          "filter": [
+            "cjk_width",
+            "lowercase",
+            "cjk_bigram",
+            "english_stop"
+          ]
+        }
+      }
+    }
+  },
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "cjk_custom_analyzer"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Generated tokens
+
+Use the following request to examine the tokens generated using the analyzer:
+
+```json
+POST /cjk-index/_analyze
+{
+  "field": "content",
+  "text": "学生们在中国、日本和韩国的大学学习。123456"
+}
+```
+{% include copy-curl.html %}
+
+The response contains the generated tokens:
+
+```json
+{
+  "tokens": [
+    {"token": "学生","start_offset": 0,"end_offset": 2,"type": "<DOUBLE>","position": 0},
+    {"token": "生们","start_offset": 1,"end_offset": 3,"type": "<DOUBLE>","position": 1},
+    {"token": "们在","start_offset": 2,"end_offset": 4,"type": "<DOUBLE>","position": 2},
+    {"token": "在中","start_offset": 3,"end_offset": 5,"type": "<DOUBLE>","position": 3},
+    {"token": "中国","start_offset": 4,"end_offset": 6,"type": "<DOUBLE>","position": 4},
+    {"token": "日本","start_offset": 7,"end_offset": 9,"type": "<DOUBLE>","position": 5},
+    {"token": "本和","start_offset": 8,"end_offset": 10,"type": "<DOUBLE>","position": 6},
+    {"token": "和韩","start_offset": 9,"end_offset": 11,"type": "<DOUBLE>","position": 7},
+    {"token": "韩国","start_offset": 10,"end_offset": 12,"type": "<DOUBLE>","position": 8},
+    {"token": "国的","start_offset": 11,"end_offset": 13,"type": "<DOUBLE>","position": 9},
+    {"token": "的大","start_offset": 12,"end_offset": 14,"type": "<DOUBLE>","position": 10},
+    {"token": "大学","start_offset": 13,"end_offset": 15,"type": "<DOUBLE>","position": 11},
+    {"token": "学学","start_offset": 14,"end_offset": 16,"type": "<DOUBLE>","position": 12},
+    {"token": "学习","start_offset": 15,"end_offset": 17,"type": "<DOUBLE>","position": 13},
+    {"token": "123456","start_offset": 18,"end_offset": 24,"type": "<NUM>","position": 14}
+  ]
+}
+```
\ No newline at end of file
diff --git a/_analyzers/language-analyzers/index.md b/_analyzers/language-analyzers/index.md
index 8b032e205b..2e15c32b86 100644
--- a/_analyzers/language-analyzers/index.md
+++ b/_analyzers/language-analyzers/index.md
@@ -75,6 +75,7 @@ Following languages support `stem_exclusion`:
 - brazilian
 - bulgarian
 - catalan
+- cjk
 - czech
 - dutch
 - english

From 8a1052de56413c01db71b024cfd20061febdd265 Mon Sep 17 00:00:00 2001
From: Anton Rubin <anton.rubin@eliatra.com>
Date: Wed, 30 Oct 2024 16:40:40 +0000
Subject: [PATCH 07/15] adding
 czech,danish,dutch,english,estonian,finnish,french and galician analyzer docs

Signed-off-by: Anton Rubin <anton.rubin@eliatra.com>
---
 _analyzers/language-analyzers/arabic.md    |  13 +-
 _analyzers/language-analyzers/armenian.md  |  11 +-
 _analyzers/language-analyzers/basque.md    |  11 +-
 _analyzers/language-analyzers/bengali.md   |  13 +-
 _analyzers/language-analyzers/brazilian.md |  11 +-
 _analyzers/language-analyzers/bulgarian.md |  11 +-
 _analyzers/language-analyzers/catalan.md   |  13 +-
 _analyzers/language-analyzers/cjk.md       |   2 +-
 _analyzers/language-analyzers/czech.md     | 172 +++++++++++++++++++++
 _analyzers/language-analyzers/danish.md    | 172 +++++++++++++++++++++
 _analyzers/language-analyzers/dutch.md     | 148 ++++++++++++++++++
 _analyzers/language-analyzers/english.md   | 143 +++++++++++++++++
 _analyzers/language-analyzers/estonian.md  | 139 +++++++++++++++++
 _analyzers/language-analyzers/finnish.md   | 137 ++++++++++++++++
 _analyzers/language-analyzers/french.md    | 148 ++++++++++++++++++
 _analyzers/language-analyzers/galician.md  | 138 +++++++++++++++++
 _analyzers/language-analyzers/index.md     |   1 +
 17 files changed, 1258 insertions(+), 25 deletions(-)
 create mode 100644 _analyzers/language-analyzers/czech.md
 create mode 100644 _analyzers/language-analyzers/danish.md
 create mode 100644 _analyzers/language-analyzers/dutch.md
 create mode 100644 _analyzers/language-analyzers/english.md
 create mode 100644 _analyzers/language-analyzers/estonian.md
 create mode 100644 _analyzers/language-analyzers/finnish.md
 create mode 100644 _analyzers/language-analyzers/french.md
 create mode 100644 _analyzers/language-analyzers/galician.md

diff --git a/_analyzers/language-analyzers/arabic.md b/_analyzers/language-analyzers/arabic.md
index 913414c8a3..b15d7ee58d 100644
--- a/_analyzers/language-analyzers/arabic.md
+++ b/_analyzers/language-analyzers/arabic.md
@@ -55,10 +55,10 @@ Tokenizer: `standard`
 Token Filters:
 - lowercase
 - decimal_digit
-- stop (arabic)
-- normalization (arabic)
-- keywords (arabic)
-- stemmer (arabic)
+- stop (Arabic)
+- normalization (Arabic)
+- keywords
+- stemmer (Arabic)
 
 ## Custom Arabic analyzer
 
@@ -83,6 +83,10 @@ PUT /arabic-index
         },
         "decimal_digit": {
           "type": "decimal_digit"
+        },
+        "arabic_keywords": {
+          "type":       "keyword_marker",
+          "keywords":   [] 
         }
       },
       "analyzer": {
@@ -94,6 +98,7 @@ PUT /arabic-index
             "arabic_normalization",
             "decimal_digit",
             "arabic_stop",
+            "arabic_keywords",
             "arabic_stemmer"
           ]
         }
diff --git a/_analyzers/language-analyzers/armenian.md b/_analyzers/language-analyzers/armenian.md
index a5ce7d8526..1324e39420 100644
--- a/_analyzers/language-analyzers/armenian.md
+++ b/_analyzers/language-analyzers/armenian.md
@@ -54,9 +54,9 @@ Tokenizer: `standard`
 
 Token Filters:
 - lowercase
-- stop (armenian)
-- keywords (armenian)
-- stemmer (armenian)
+- stop (Armenian)
+- keywords
+- stemmer (Armenian)
 
 ## Custom Armenian analyzer
 
@@ -75,6 +75,10 @@ PUT /armenian-index
         "armenian_stemmer": {
           "type": "stemmer",
           "language": "armenian"
+        },
+        "armenian_keywords": {
+          "type":       "keyword_marker",
+          "keywords":   [] 
         }
       },
       "analyzer": {
@@ -84,6 +88,7 @@ PUT /armenian-index
           "filter": [
             "lowercase",
             "armenian_stop",
+            "armenian_keywords",
             "armenian_stemmer"
           ]
         }
diff --git a/_analyzers/language-analyzers/basque.md b/_analyzers/language-analyzers/basque.md
index 7eac4cde82..bab4ffa0fe 100644
--- a/_analyzers/language-analyzers/basque.md
+++ b/_analyzers/language-analyzers/basque.md
@@ -54,9 +54,9 @@ Tokenizer: `standard`
 
 Token Filters:
 - lowercase
-- stop (basque)
-- keywords (basque)
-- stemmer (basque)
+- stop (Basque)
+- keywords
+- stemmer (Basque)
 
 ## Custom Basque analyzer
 
@@ -75,6 +75,10 @@ PUT /basque-index
         "basque_stemmer": {
           "type": "stemmer",
           "language": "basque"
+        },
+        "basque_keywords": {
+          "type":       "keyword_marker",
+          "keywords":   [] 
         }
       },
       "analyzer": {
@@ -84,6 +88,7 @@ PUT /basque-index
           "filter": [
             "lowercase",
             "basque_stop",
+            "basque_keywords",
             "basque_stemmer"
           ]
         }
diff --git a/_analyzers/language-analyzers/bengali.md b/_analyzers/language-analyzers/bengali.md
index d3df7f8417..72132e8e91 100644
--- a/_analyzers/language-analyzers/bengali.md
+++ b/_analyzers/language-analyzers/bengali.md
@@ -56,10 +56,10 @@ Token Filters:
 - lowercase
 - decimal_digit
 - indic_normalization
-- normalization (bengali)
-- stop (bengali)
-- keywords (bengali)
-- stemmer (bengali)
+- normalization (Bengali)
+- stop (Bengali)
+- keywords
+- stemmer (Bengali)
 
 ## Custom Bengali analyzer
 
@@ -78,6 +78,10 @@ PUT /bengali-index
         "bengali_stemmer": {
           "type": "stemmer",
           "language": "bengali"
+        },
+        "bengali_keywords": {
+          "type":       "keyword_marker",
+          "keywords":   [] 
         }
       },
       "analyzer": {
@@ -90,6 +94,7 @@ PUT /bengali-index
             "indic_normalization",
             "bengali_normalization",
             "bengali_stop",
+            "bengali_keywords",
             "bengali_stemmer"
           ]
         }
diff --git a/_analyzers/language-analyzers/brazilian.md b/_analyzers/language-analyzers/brazilian.md
index b3b9c7cdb8..b905773bbb 100644
--- a/_analyzers/language-analyzers/brazilian.md
+++ b/_analyzers/language-analyzers/brazilian.md
@@ -54,9 +54,9 @@ Tokenizer: `standard`
 
 Token Filters:
 - lowercase
-- stop (brazilian)
-- keywords (brazilian)
-- stemmer (brazilian)
+- stop (Brazilian)
+- keywords
+- stemmer (Brazilian)
 
 ## Custom Brazilian analyzer
 
@@ -75,6 +75,10 @@ PUT /brazilian-index
         "brazilian_stemmer": {
           "type": "stemmer",
           "language": "brazilian"
+        },
+        "brazilian_keywords": {
+          "type":       "keyword_marker",
+          "keywords":   [] 
         }
       },
       "analyzer": {
@@ -84,6 +88,7 @@ PUT /brazilian-index
           "filter": [
             "lowercase",
             "brazilian_stop",
+            "brazilian_keywords",
             "brazilian_stemmer"
           ]
         }
diff --git a/_analyzers/language-analyzers/bulgarian.md b/_analyzers/language-analyzers/bulgarian.md
index 1d74f66c49..d924a81afc 100644
--- a/_analyzers/language-analyzers/bulgarian.md
+++ b/_analyzers/language-analyzers/bulgarian.md
@@ -54,9 +54,9 @@ Tokenizer: `standard`
 
 Token Filters:
 - lowercase
-- stop (bulgarian)
-- keywords (bulgarian)
-- stemmer (bulgarian)
+- stop (Bulgarian)
+- keywords
+- stemmer (Bulgarian)
 
 ## Custom Bulgarian analyzer
 
@@ -75,6 +75,10 @@ PUT /bulgarian-index
         "bulgarian_stemmer": {
           "type": "stemmer",
           "language": "bulgarian"
+        },
+        "bulgarian_keywords": {
+          "type":       "keyword_marker",
+          "keywords":   [] 
         }
       },
       "analyzer": {
@@ -84,6 +88,7 @@ PUT /bulgarian-index
           "filter": [
             "lowercase",
             "bulgarian_stop",
+            "bulgarian_keywords",
             "bulgarian_stemmer"
           ]
         }
diff --git a/_analyzers/language-analyzers/catalan.md b/_analyzers/language-analyzers/catalan.md
index bc072f8bd9..b1df91ce20 100644
--- a/_analyzers/language-analyzers/catalan.md
+++ b/_analyzers/language-analyzers/catalan.md
@@ -53,11 +53,11 @@ The `catalan` analyzer is build using the following:
 Tokenizer: `standard`
 
 Token Filters:
-- elision (catalan)
+- elision (Catalan)
 - lowercase
-- stop (catalan)
-- keywords (catalan)
-- stemmer (catalan)
+- stop (Catalan)
+- keywords
+- stemmer (Catalan)
 
 ## Custom Catalan analyzer
 
@@ -81,6 +81,10 @@ PUT /catalan-index
         "catalan_stemmer": {
           "type": "stemmer",
           "language": "catalan"
+        },
+        "catalan_keywords": {
+          "type":       "keyword_marker",
+          "keywords":   [] 
         }
       },
       "analyzer": {
@@ -91,6 +95,7 @@ PUT /catalan-index
             "catalan_elision",
             "lowercase",
             "catalan_stop",
+            "catalan_keywords",
             "catalan_stemmer"
           ]
         }
diff --git a/_analyzers/language-analyzers/cjk.md b/_analyzers/language-analyzers/cjk.md
index 111adb423b..e66b222062 100644
--- a/_analyzers/language-analyzers/cjk.md
+++ b/_analyzers/language-analyzers/cjk.md
@@ -56,7 +56,7 @@ Token Filters:
 - cjk_width
 - lowercase
 - cjk_bigram
-- stop (similar to english)
+- stop (similar to English)
 
 ## Custom CJK analyzer
 
diff --git a/_analyzers/language-analyzers/czech.md b/_analyzers/language-analyzers/czech.md
new file mode 100644
index 0000000000..f0a2ac6482
--- /dev/null
+++ b/_analyzers/language-analyzers/czech.md
@@ -0,0 +1,172 @@
+---
+layout: default
+title: Czech
+parent: Language analyzers
+grand_parent: Analyzers
+nav_order: 90
+---
+
+# Czech analyzer
+
+The built-in `czech` analyzer can be applied to a text field using the following command:
+
+```json
+PUT /czech-index
+{
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "czech"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Stem exclusion
+
+You can also use `stem_exclusion` with this language analyzer using the following command:
+
+```json
+PUT index_with_stem_exclusion_czech_analyzer
+{
+  "settings": {
+    "analysis": {
+      "analyzer": {
+        "stem_exclusion_czech_analyzer": {
+          "type": "czech",
+          "stem_exclusion": ["autorita", "schválení"]
+        }
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Czech analyzer internals
+
+The `czech` analyzer is build using the following:
+
+Tokenizer: `standard`
+
+Token Filters:
+- lowercase
+- stop (Czech)
+- keyword
+- stemmer (Czech)
+
+## Custom Czech analyzer
+
+You can create custom Czech analyzer using the following command:
+
+```json
+PUT /czech-index
+{
+  "settings": {
+    "analysis": {
+      "filter": {
+        "czech_stop": {
+          "type": "stop",
+          "stopwords": "_czech_"
+        },
+        "czech_stemmer": {
+          "type": "stemmer",
+          "language": "czech"
+        },
+        "czech_keywords": {
+          "type":       "keyword_marker",
+          "keywords":   [] 
+        }
+      },
+      "analyzer": {
+        "czech_analyzer": {
+          "type": "custom",
+          "tokenizer": "standard",
+          "filter": [
+            "lowercase",
+            "czech_stop",
+            "czech_keywords",
+            "czech_stemmer"
+          ]
+        }
+      }
+    }
+  },
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "czech_analyzer"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Generated tokens
+
+Use the following request to examine the tokens generated using the analyzer:
+
+```json
+POST /czech-index/_analyze
+{
+  "field": "content",
+  "text": "Studenti studují na českých univerzitách. Jejich čísla jsou 123456."
+}
+```
+{% include copy-curl.html %}
+
+The response contains the generated tokens:
+
+```json
+{
+  "tokens": [
+    {
+      "token": "student",
+      "start_offset": 0,
+      "end_offset": 8,
+      "type": "<ALPHANUM>",
+      "position": 0
+    },
+    {
+      "token": "studuj",
+      "start_offset": 9,
+      "end_offset": 16,
+      "type": "<ALPHANUM>",
+      "position": 1
+    },
+    {
+      "token": "česk",
+      "start_offset": 20,
+      "end_offset": 27,
+      "type": "<ALPHANUM>",
+      "position": 3
+    },
+    {
+      "token": "univerzit",
+      "start_offset": 28,
+      "end_offset": 40,
+      "type": "<ALPHANUM>",
+      "position": 4
+    },
+    {
+      "token": "čísl",
+      "start_offset": 49,
+      "end_offset": 54,
+      "type": "<ALPHANUM>",
+      "position": 6
+    },
+    {
+      "token": "123456",
+      "start_offset": 60,
+      "end_offset": 66,
+      "type": "<NUM>",
+      "position": 8
+    }
+  ]
+}
+```
\ No newline at end of file
diff --git a/_analyzers/language-analyzers/danish.md b/_analyzers/language-analyzers/danish.md
new file mode 100644
index 0000000000..3f974d5e0f
--- /dev/null
+++ b/_analyzers/language-analyzers/danish.md
@@ -0,0 +1,172 @@
+---
+layout: default
+title: Danish
+parent: Language analyzers
+grand_parent: Analyzers
+nav_order: 100
+---
+
+# Danish analyzer
+
+The built-in `danish` analyzer can be applied to a text field using the following command:
+
+```json
+PUT /danish-index
+{
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "danish"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Stem exclusion
+
+You can also use `stem_exclusion` with this language analyzer using the following command:
+
+```json
+PUT index_with_stem_exclusion_danish_analyzer
+{
+  "settings": {
+    "analysis": {
+      "analyzer": {
+        "stem_exclusion_danish_analyzer": {
+          "type": "danish",
+          "stem_exclusion": ["autoritet", "godkendelse"]
+        }
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Danish analyzer internals
+
+The `danish` analyzer is build using the following:
+
+Tokenizer: `standard`
+
+Token Filters:
+- lowercase
+- stop (Danish)
+- keyword
+- stemmer (Danish)
+
+## Custom Danish analyzer
+
+You can create custom Danish analyzer using the following command:
+
+```json
+PUT /danish-index
+{
+  "settings": {
+    "analysis": {
+      "filter": {
+        "danish_stop": {
+          "type": "stop",
+          "stopwords": "_danish_"
+        },
+        "danish_stemmer": {
+          "type": "stemmer",
+          "language": "danish"
+        },
+        "danish_keywords": {
+          "type":       "keyword_marker",
+          "keywords":   [] 
+        }
+      },
+      "analyzer": {
+        "danish_analyzer": {
+          "type": "custom",
+          "tokenizer": "standard",
+          "filter": [
+            "lowercase",
+            "danish_stop",
+            "danish_keywords",
+            "danish_stemmer"
+          ]
+        }
+      }
+    }
+  },
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "danish_analyzer"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Generated tokens
+
+Use the following request to examine the tokens generated using the analyzer:
+
+```json
+POST /danish-index/_analyze
+{
+  "field": "content",
+  "text": "Studerende studerer på de danske universiteter. Deres numre er 123456."
+}
+```
+{% include copy-curl.html %}
+
+The response contains the generated tokens:
+
+```json
+{
+  "tokens": [
+    {
+      "token": "stud",
+      "start_offset": 0,
+      "end_offset": 10,
+      "type": "<ALPHANUM>",
+      "position": 0
+    },
+    {
+      "token": "stud",
+      "start_offset": 11,
+      "end_offset": 19,
+      "type": "<ALPHANUM>",
+      "position": 1
+    },
+    {
+      "token": "dansk",
+      "start_offset": 26,
+      "end_offset": 32,
+      "type": "<ALPHANUM>",
+      "position": 4
+    },
+    {
+      "token": "universitet",
+      "start_offset": 33,
+      "end_offset": 46,
+      "type": "<ALPHANUM>",
+      "position": 5
+    },
+    {
+      "token": "numr",
+      "start_offset": 54,
+      "end_offset": 59,
+      "type": "<ALPHANUM>",
+      "position": 7
+    },
+    {
+      "token": "123456",
+      "start_offset": 63,
+      "end_offset": 69,
+      "type": "<NUM>",
+      "position": 9
+    }
+  ]
+}
+```
\ No newline at end of file
diff --git a/_analyzers/language-analyzers/dutch.md b/_analyzers/language-analyzers/dutch.md
new file mode 100644
index 0000000000..e96c05d147
--- /dev/null
+++ b/_analyzers/language-analyzers/dutch.md
@@ -0,0 +1,148 @@
+---
+layout: default
+title: Dutch
+parent: Language analyzers
+grand_parent: Analyzers
+nav_order: 110
+---
+
+# Dutch analyzer
+
+The built-in `dutch` analyzer can be applied to a text field using the following command:
+
+```json
+PUT /dutch-index
+{
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "dutch"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Stem exclusion
+
+You can also use `stem_exclusion` with this language analyzer using the following command:
+
+```json
+PUT index_with_stem_exclusion_dutch_analyzer
+{
+  "settings": {
+    "analysis": {
+      "analyzer": {
+        "stem_exclusion_dutch_analyzer": {
+          "type": "dutch",
+          "stem_exclusion": ["autoriteit", "goedkeuring"]
+        }
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Dutch analyzer internals
+
+The `dutch` analyzer is build using the following:
+
+Tokenizer: `standard`
+
+Token Filters:
+- lowercase
+- stop (Dutch)
+- keyword
+- stemmer_override
+- stemmer (Dutch)
+
+## Custom Dutch analyzer
+
+You can create custom Dutch analyzer using the following command:
+
+```json
+PUT /dutch-index
+{
+  "settings": {
+    "analysis": {
+      "filter": {
+        "dutch_stop": {
+          "type": "stop",
+          "stopwords": "_dutch_"
+        },
+        "dutch_stemmer": {
+          "type": "stemmer",
+          "language": "dutch"
+        },
+        "dutch_keywords": {
+          "type": "keyword_marker",
+          "keywords": []
+        },
+        "dutch_override": {
+          "type": "stemmer_override",
+          "rules": [
+            "fiets=>fiets",
+            "bromfiets=>bromfiets",
+            "ei=>eier",
+            "kind=>kinder"
+          ]
+        }
+      },
+      "analyzer": {
+        "dutch_analyzer": {
+          "type": "custom",
+          "tokenizer": "standard",
+          "filter": [
+            "lowercase",
+            "dutch_stop",
+            "dutch_keywords",
+            "dutch_override",
+            "dutch_stemmer"
+          ]
+        }
+      }
+    }
+  },
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "dutch_analyzer"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Generated tokens
+
+Use the following request to examine the tokens generated using the analyzer:
+
+```json
+POST /dutch-index/_analyze
+{
+  "field": "content",
+  "text": "De studenten studeren in Nederland en bezoeken Amsterdam. Hun nummers zijn 123456."
+}
+```
+{% include copy-curl.html %}
+
+The response contains the generated tokens:
+
+```json
+{
+  "tokens": [
+    {"token": "student","start_offset": 3,"end_offset": 12,"type": "<ALPHANUM>","position": 1},
+    {"token": "studer","start_offset": 13,"end_offset": 21,"type": "<ALPHANUM>","position": 2},
+    {"token": "nederland","start_offset": 25,"end_offset": 34,"type": "<ALPHANUM>","position": 4},
+    {"token": "bezoek","start_offset": 38,"end_offset": 46,"type": "<ALPHANUM>","position": 6},
+    {"token": "amsterdam","start_offset": 47,"end_offset": 56,"type": "<ALPHANUM>","position": 7},
+    {"token": "nummer","start_offset": 62,"end_offset": 69,"type": "<ALPHANUM>","position": 9},
+    {"token": "123456","start_offset": 75,"end_offset": 81,"type": "<NUM>","position": 11}
+  ]
+}
+```
\ No newline at end of file
diff --git a/_analyzers/language-analyzers/english.md b/_analyzers/language-analyzers/english.md
new file mode 100644
index 0000000000..4c3dff5bbc
--- /dev/null
+++ b/_analyzers/language-analyzers/english.md
@@ -0,0 +1,143 @@
+---
+layout: default
+title: English
+parent: Language analyzers
+grand_parent: Analyzers
+nav_order: 120
+---
+
+# English analyzer
+
+The built-in `english` analyzer can be applied to a text field using the following command:
+
+```json
+PUT /english-index
+{
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "english"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Stem exclusion
+
+You can also use `stem_exclusion` with this language analyzer using the following command:
+
+```json
+PUT index_with_stem_exclusion_english_analyzer
+{
+  "settings": {
+    "analysis": {
+      "analyzer": {
+        "stem_exclusion_english_analyzer": {
+          "type": "english",
+          "stem_exclusion": ["authority", "authorization"]
+        }
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## English analyzer internals
+
+The `english` analyzer is build using the following:
+
+Tokenizer: `standard`
+
+Token Filters:
+- stemmer (possessive_english)
+- lowercase
+- stop (English)
+- keyword
+- stemmer (English)
+
+## Custom English analyzer
+
+You can create custom English analyzer using the following command:
+
+```json
+PUT /english-index
+{
+  "settings": {
+    "analysis": {
+      "filter": {
+        "english_stop": {
+          "type": "stop",
+          "stopwords": "_english_"
+        },
+        "english_stemmer": {
+          "type": "stemmer",
+          "language": "english"
+        },
+        "english_keywords": {
+          "type": "keyword_marker",
+          "keywords": []
+        },
+        "english_possessive_stemmer": {
+          "type":       "stemmer",
+          "language":   "possessive_english"
+        }
+      },
+      "analyzer": {
+        "english_analyzer": {
+          "type": "custom",
+          "tokenizer": "standard",
+          "filter": [
+            "english_possessive_stemmer",
+            "lowercase",
+            "english_stop",
+            "english_keywords",
+            "english_stemmer"
+          ]
+        }
+      }
+    }
+  },
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "english_analyzer"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Generated tokens
+
+Use the following request to examine the tokens generated using the analyzer:
+
+```json
+POST /english-index/_analyze
+{
+  "field": "content",
+  "text": "The students study in the USA and work at NASA. Their numbers are 123456."
+}
+```
+{% include copy-curl.html %}
+
+The response contains the generated tokens:
+
+```json
+{
+  "tokens": [
+    {"token": "student","start_offset": 4,"end_offset": 12,"type": "<ALPHANUM>","position": 1},
+    {"token": "studi","start_offset": 13,"end_offset": 18,"type": "<ALPHANUM>","position": 2},
+    {"token": "usa","start_offset": 26,"end_offset": 29,"type": "<ALPHANUM>","position": 5},
+    {"token": "work","start_offset": 34,"end_offset": 38,"type": "<ALPHANUM>","position": 7},
+    {"token": "nasa","start_offset": 42,"end_offset": 46,"type": "<ALPHANUM>","position": 9},
+    {"token": "number","start_offset": 54,"end_offset": 61,"type": "<ALPHANUM>","position": 11},
+    {"token": "123456","start_offset": 66,"end_offset": 72,"type": "<NUM>","position": 13}
+  ]
+}
+```
\ No newline at end of file
diff --git a/_analyzers/language-analyzers/estonian.md b/_analyzers/language-analyzers/estonian.md
new file mode 100644
index 0000000000..6b5afa2271
--- /dev/null
+++ b/_analyzers/language-analyzers/estonian.md
@@ -0,0 +1,139 @@
+---
+layout: default
+title: Estonian
+parent: Language analyzers
+grand_parent: Analyzers
+nav_order: 130
+---
+
+# Estonian analyzer
+
+The built-in `estonian` analyzer can be applied to a text field using the following command:
+
+```json
+PUT /estonian-index
+{
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "estonian"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Stem exclusion
+
+You can also use `stem_exclusion` with this language analyzer using the following command:
+
+```json
+PUT index_with_stem_exclusion_estonian_analyzer
+{
+  "settings": {
+    "analysis": {
+      "analyzer": {
+        "stem_exclusion_estonian_analyzer": {
+          "type": "estonian",
+          "stem_exclusion": ["autoriteet", "kinnitus"]
+        }
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Estonian analyzer internals
+
+The `estonian` analyzer is build using the following:
+
+Tokenizer: `standard`
+
+Token Filters:
+- lowercase
+- stop (Estonian)
+- keyword
+- stemmer (Estonian)
+
+## Custom Estonian analyzer
+
+You can create custom Estonian analyzer using the following command:
+
+```json
+PUT /estonian-index
+{
+  "settings": {
+    "analysis": {
+      "filter": {
+        "estonian_stop": {
+          "type": "stop",
+          "stopwords": "_estonian_"
+        },
+        "estonian_stemmer": {
+          "type": "stemmer",
+          "language": "estonian"
+        },
+        "estonian_keywords": {
+          "type": "keyword_marker",
+          "keywords": []
+        }
+      },
+      "analyzer": {
+        "estonian_analyzer": {
+          "type": "custom",
+          "tokenizer": "standard",
+          "filter": [
+            "lowercase",
+            "estonian_stop",
+            "estonian_keywords",
+            "estonian_stemmer"
+          ]
+        }
+      }
+    }
+  },
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "estonian_analyzer"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Generated tokens
+
+Use the following request to examine the tokens generated using the analyzer:
+
+```json
+POST /estonian-index/_analyze
+{
+  "field": "content",
+  "text": "Õpilased õpivad Tallinnas ja Eesti ülikoolides. Nende numbrid on 123456."
+}
+```
+{% include copy-curl.html %}
+
+The response contains the generated tokens:
+
+```json
+{
+  "tokens": [
+    {"token": "õpilase","start_offset": 0,"end_offset": 8,"type": "<ALPHANUM>","position": 0},
+    {"token": "õpi","start_offset": 9,"end_offset": 15,"type": "<ALPHANUM>","position": 1},
+    {"token": "tallinna","start_offset": 16,"end_offset": 25,"type": "<ALPHANUM>","position": 2},
+    {"token": "eesti","start_offset": 29,"end_offset": 34,"type": "<ALPHANUM>","position": 4},
+    {"token": "ülikooli","start_offset": 35,"end_offset": 46,"type": "<ALPHANUM>","position": 5},
+    {"token": "nende","start_offset": 48,"end_offset": 53,"type": "<ALPHANUM>","position": 6},
+    {"token": "numbri","start_offset": 54,"end_offset": 61,"type": "<ALPHANUM>","position": 7},
+    {"token": "on","start_offset": 62,"end_offset": 64,"type": "<ALPHANUM>","position": 8},
+    {"token": "123456","start_offset": 65,"end_offset": 71,"type": "<NUM>","position": 9}
+  ]
+}
+```
\ No newline at end of file
diff --git a/_analyzers/language-analyzers/finnish.md b/_analyzers/language-analyzers/finnish.md
new file mode 100644
index 0000000000..ccc1534b2f
--- /dev/null
+++ b/_analyzers/language-analyzers/finnish.md
@@ -0,0 +1,137 @@
+---
+layout: default
+title: Finnish
+parent: Language analyzers
+grand_parent: Analyzers
+nav_order: 140
+---
+
+# Finnish analyzer
+
+The built-in `finnish` analyzer can be applied to a text field using the following command:
+
+```json
+PUT /finnish-index
+{
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "finnish"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Stem exclusion
+
+You can also use `stem_exclusion` with this language analyzer using the following command:
+
+```json
+PUT index_with_stem_exclusion_finnish_analyzer
+{
+  "settings": {
+    "analysis": {
+      "analyzer": {
+        "stem_exclusion_finnish_analyzer": {
+          "type": "finnish",
+          "stem_exclusion": ["valta", "hyväksyntä"]
+        }
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Finnish analyzer internals
+
+The `finnish` analyzer is build using the following:
+
+Tokenizer: `standard`
+
+Token Filters:
+- lowercase
+- stop (Finnish)
+- keyword
+- stemmer (Finnish)
+
+## Custom Finnish analyzer
+
+You can create custom Finnish analyzer using the following command:
+
+```json
+PUT /finnish-index
+{
+  "settings": {
+    "analysis": {
+      "filter": {
+        "finnish_stop": {
+          "type": "stop",
+          "stopwords": "_finnish_"
+        },
+        "finnish_stemmer": {
+          "type": "stemmer",
+          "language": "finnish"
+        },
+        "finnish_keywords": {
+          "type": "keyword_marker",
+          "keywords": ["Helsinki", "Suomi"]
+        }
+      },
+      "analyzer": {
+        "finnish_analyzer": {
+          "type": "custom",
+          "tokenizer": "standard",
+          "filter": [
+            "lowercase",
+            "finnish_stop",
+            "finnish_keywords",
+            "finnish_stemmer"
+          ]
+        }
+      }
+    }
+  },
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "finnish_analyzer"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Generated tokens
+
+Use the following request to examine the tokens generated using the analyzer:
+
+```json
+POST /finnish-index/_analyze
+{
+  "field": "content",
+  "text": "Opiskelijat opiskelevat Helsingissä ja Suomen yliopistoissa. Heidän numeronsa ovat 123456."
+}
+```
+{% include copy-curl.html %}
+
+The response contains the generated tokens:
+
+```json
+{
+  "tokens": [
+    {"token": "opiskelij","start_offset": 0,"end_offset": 11,"type": "<ALPHANUM>","position": 0},
+    {"token": "opiskelev","start_offset": 12,"end_offset": 23,"type": "<ALPHANUM>","position": 1},
+    {"token": "helsing","start_offset": 24,"end_offset": 35,"type": "<ALPHANUM>","position": 2},
+    {"token": "suome","start_offset": 39,"end_offset": 45,"type": "<ALPHANUM>","position": 4},
+    {"token": "yliopisto","start_offset": 46,"end_offset": 59,"type": "<ALPHANUM>","position": 5},
+    {"token": "numero","start_offset": 68,"end_offset": 77,"type": "<ALPHANUM>","position": 7},
+    {"token": "123456","start_offset": 83,"end_offset": 89,"type": "<NUM>","position": 9}
+  ]
+}
+```
\ No newline at end of file
diff --git a/_analyzers/language-analyzers/french.md b/_analyzers/language-analyzers/french.md
new file mode 100644
index 0000000000..730a2066d4
--- /dev/null
+++ b/_analyzers/language-analyzers/french.md
@@ -0,0 +1,148 @@
+---
+layout: default
+title: French
+parent: Language analyzers
+grand_parent: Analyzers
+nav_order: 150
+---
+
+# French analyzer
+
+The built-in `french` analyzer can be applied to a text field using the following command:
+
+```json
+PUT /french-index
+{
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "french"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Stem exclusion
+
+You can also use `stem_exclusion` with this language analyzer using the following command:
+
+```json
+PUT index_with_stem_exclusion_french_analyzer
+{
+  "settings": {
+    "analysis": {
+      "analyzer": {
+        "stem_exclusion_french_analyzer": {
+          "type": "french",
+          "stem_exclusion": ["autorité", "acceptation"]
+        }
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## French analyzer internals
+
+The `french` analyzer is build using the following:
+
+Tokenizer: `standard`
+
+Token Filters:
+- elision (French)
+- lowercase
+- stop (French)
+- keyword
+- stemmer (French)
+
+## Custom French analyzer
+
+You can create custom French analyzer using the following command:
+
+```json
+PUT /french-index
+{
+  "settings": {
+    "analysis": {
+      "filter": {
+        "french_stop": {
+          "type": "stop",
+          "stopwords": "_french_"
+        },
+        "french_elision": {
+          "type":         "elision",
+          "articles_case": true,
+          "articles": [
+              "l", "m", "t", "qu", "n", "s",
+              "j", "d", "c", "jusqu", "quoiqu",
+              "lorsqu", "puisqu"
+            ]
+        },
+        "french_stemmer": {
+          "type": "stemmer",
+          "language": "light_french"
+        },
+        "french_keywords": {
+          "type": "keyword_marker",
+          "keywords": []
+        }
+      },
+      "analyzer": {
+        "french_analyzer": {
+          "type": "custom",
+          "tokenizer": "standard",
+          "filter": [
+            "french_elision",
+            "lowercase",
+            "french_stop",
+            "french_keywords",
+            "french_stemmer"
+          ]
+        }
+      }
+    }
+  },
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "french_analyzer"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Generated tokens
+
+Use the following request to examine the tokens generated using the analyzer:
+
+```json
+POST /french-index/_analyze
+{
+  "field": "content",
+  "text": "Les étudiants étudient à Paris et dans les universités françaises. Leurs numéros sont 123456."
+}
+```
+{% include copy-curl.html %}
+
+The response contains the generated tokens:
+
+```json
+{
+  "tokens": [
+    {"token": "etudiant","start_offset": 4,"end_offset": 13,"type": "<ALPHANUM>","position": 1},
+    {"token": "etudient","start_offset": 14,"end_offset": 22,"type": "<ALPHANUM>","position": 2},
+    {"token": "pari","start_offset": 25,"end_offset": 30,"type": "<ALPHANUM>","position": 4},
+    {"token": "universit","start_offset": 43,"end_offset": 54,"type": "<ALPHANUM>","position": 8},
+    {"token": "francais","start_offset": 55,"end_offset": 65,"type": "<ALPHANUM>","position": 9},
+    {"token": "numero","start_offset": 73,"end_offset": 80,"type": "<ALPHANUM>","position": 11},
+    {"token": "123456","start_offset": 86,"end_offset": 92,"type": "<NUM>","position": 13}
+  ]
+}
+```
\ No newline at end of file
diff --git a/_analyzers/language-analyzers/galician.md b/_analyzers/language-analyzers/galician.md
new file mode 100644
index 0000000000..e0f833e13d
--- /dev/null
+++ b/_analyzers/language-analyzers/galician.md
@@ -0,0 +1,138 @@
+---
+layout: default
+title: Galician
+parent: Language analyzers
+grand_parent: Analyzers
+nav_order: 160
+---
+
+# Galician analyzer
+
+The built-in `galician` analyzer can be applied to a text field using the following command:
+
+```json
+PUT /galician-index
+{
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "galician"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Stem exclusion
+
+You can also use `stem_exclusion` with this language analyzer using the following command:
+
+```json
+PUT index_with_stem_exclusion_galician_analyzer
+{
+  "settings": {
+    "analysis": {
+      "analyzer": {
+        "stem_exclusion_galician_analyzer": {
+          "type": "galician",
+          "stem_exclusion": ["autoridade", "aceptación"]
+        }
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Galician analyzer internals
+
+The `galician` analyzer is build using the following:
+
+Tokenizer: `standard`
+
+Token Filters:
+- lowercase
+- stop (French)
+- keyword
+- stemmer (French)
+
+## Custom Galician analyzer
+
+You can create custom Galician analyzer using the following command:
+
+```json
+PUT /galician-index
+{
+  "settings": {
+    "analysis": {
+      "filter": {
+        "galician_stop": {
+          "type": "stop",
+          "stopwords": "_galician_"
+        },
+        "galician_stemmer": {
+          "type": "stemmer",
+          "language": "galician"
+        },
+        "galician_keywords": {
+          "type": "keyword_marker",
+          "keywords": []
+        }
+      },
+      "analyzer": {
+        "galician_analyzer": {
+          "type": "custom",
+          "tokenizer": "standard",
+          "filter": [
+            "lowercase",
+            "galician_stop",
+            "galician_keywords",
+            "galician_stemmer"
+          ]
+        }
+      }
+    }
+  },
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "galician_analyzer"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Generated tokens
+
+Use the following request to examine the tokens generated using the analyzer:
+
+```json
+POST /galician-index/_analyze
+{
+  "field": "content",
+  "text": "Os estudantes estudan en Santiago e nas universidades galegas. Os seus números son 123456."
+}
+```
+{% include copy-curl.html %}
+
+The response contains the generated tokens:
+
+```json
+{
+  "tokens": [
+    {"token": "estud","start_offset": 3,"end_offset": 13,"type": "<ALPHANUM>","position": 1},
+    {"token": "estud","start_offset": 14,"end_offset": 21,"type": "<ALPHANUM>","position": 2},
+    {"token": "santiag","start_offset": 25,"end_offset": 33,"type": "<ALPHANUM>","position": 4},
+    {"token": "univers","start_offset": 40,"end_offset": 53,"type": "<ALPHANUM>","position": 7},
+    {"token": "galeg","start_offset": 54,"end_offset": 61,"type": "<ALPHANUM>","position": 8},
+    {"token": "numer","start_offset": 71,"end_offset": 78,"type": "<ALPHANUM>","position": 11},
+    {"token": "son","start_offset": 79,"end_offset": 82,"type": "<ALPHANUM>","position": 12},
+    {"token": "123456","start_offset": 83,"end_offset": 89,"type": "<NUM>","position": 13}
+  ]
+}
+```
\ No newline at end of file
diff --git a/_analyzers/language-analyzers/index.md b/_analyzers/language-analyzers/index.md
index 2e15c32b86..c69337f3a9 100644
--- a/_analyzers/language-analyzers/index.md
+++ b/_analyzers/language-analyzers/index.md
@@ -77,6 +77,7 @@ Following languages support `stem_exclusion`:
 - catalan
 - cjk
 - czech
+- danish
 - dutch
 - english
 - finnish

From 15c0f8c7c35a386c7350110c4533487c2ea9d86a Mon Sep 17 00:00:00 2001
From: Anton Rubin <anton.rubin@eliatra.com>
Date: Thu, 31 Oct 2024 14:21:33 +0000
Subject: [PATCH 08/15] adding
 german,greek,hindi,hungarian,indonesian,irish,italian,latvian,lithuanian,norwegian
 and persion laguage analyzer docs

Signed-off-by: Anton Rubin <anton.rubin@eliatra.com>
---
 _analyzers/language-analyzers/german.md     | 174 +++++++++++++++++++
 _analyzers/language-analyzers/greek.md      | 139 +++++++++++++++
 _analyzers/language-analyzers/hindi.md      | 178 ++++++++++++++++++++
 _analyzers/language-analyzers/hungarian.md  | 172 +++++++++++++++++++
 _analyzers/language-analyzers/indonesian.md | 172 +++++++++++++++++++
 _analyzers/language-analyzers/irish.md      | 157 +++++++++++++++++
 _analyzers/language-analyzers/italian.md    | 148 ++++++++++++++++
 _analyzers/language-analyzers/latvian.md    | 148 ++++++++++++++++
 _analyzers/language-analyzers/lithuanian.md | 136 +++++++++++++++
 _analyzers/language-analyzers/norwegian.md  | 137 +++++++++++++++
 _analyzers/language-analyzers/persian.md    | 142 ++++++++++++++++
 11 files changed, 1703 insertions(+)
 create mode 100644 _analyzers/language-analyzers/german.md
 create mode 100644 _analyzers/language-analyzers/greek.md
 create mode 100644 _analyzers/language-analyzers/hindi.md
 create mode 100644 _analyzers/language-analyzers/hungarian.md
 create mode 100644 _analyzers/language-analyzers/indonesian.md
 create mode 100644 _analyzers/language-analyzers/irish.md
 create mode 100644 _analyzers/language-analyzers/italian.md
 create mode 100644 _analyzers/language-analyzers/latvian.md
 create mode 100644 _analyzers/language-analyzers/lithuanian.md
 create mode 100644 _analyzers/language-analyzers/norwegian.md
 create mode 100644 _analyzers/language-analyzers/persian.md

diff --git a/_analyzers/language-analyzers/german.md b/_analyzers/language-analyzers/german.md
new file mode 100644
index 0000000000..3076fea57c
--- /dev/null
+++ b/_analyzers/language-analyzers/german.md
@@ -0,0 +1,174 @@
+---
+layout: default
+title: German
+parent: Language analyzers
+grand_parent: Analyzers
+nav_order: 170
+---
+
+# German analyzer
+
+The built-in `german` analyzer can be applied to a text field using the following command:
+
+```json
+PUT /german-index
+{
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "german"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Stem exclusion
+
+You can also use `stem_exclusion` with this language analyzer using the following command:
+
+```json
+PUT index_with_stem_exclusion_german_analyzer
+{
+  "settings": {
+    "analysis": {
+      "analyzer": {
+        "stem_exclusion_german_analyzer": {
+          "type": "german",
+          "stem_exclusion": ["Autorität", "Genehmigung"]
+        }
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## German analyzer internals
+
+The `german` analyzer is build using the following:
+
+Tokenizer: `standard`
+
+Token Filters:
+- lowercase
+- stop (German)
+- keyword
+- normalization (German)
+- stemmer (German)
+
+## Custom German analyzer
+
+You can create custom German analyzer using the following command:
+
+```json
+PUT /german-index
+{
+  "settings": {
+    "analysis": {
+      "filter": {
+        "german_stop": {
+          "type": "stop",
+          "stopwords": "_german_"
+        },
+        "german_stemmer": {
+          "type": "stemmer",
+          "language": "light_german"
+        },
+        "german_keywords": {
+          "type": "keyword_marker",
+          "keywords": []
+        }
+      },
+      "analyzer": {
+        "german_analyzer": {
+          "type": "custom",
+          "tokenizer": "standard",
+          "filter": [
+            "lowercase",
+            "german_stop",
+            "german_keywords",
+            "german_normalization",
+            "german_stemmer"
+          ]
+        }
+      }
+    }
+  },
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "german_analyzer"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Generated tokens
+
+Use the following request to examine the tokens generated using the analyzer:
+
+```json
+POST /german-index/_analyze
+{
+  "field": "content",
+  "text": "Die Studenten studieren an den deutschen Universitäten. Ihre Nummern sind 123456."
+}
+```
+{% include copy-curl.html %}
+
+The response contains the generated tokens:
+
+```json
+{
+  "tokens": [
+    {
+      "token": "student",
+      "start_offset": 4,
+      "end_offset": 13,
+      "type": "<ALPHANUM>",
+      "position": 1
+    },
+    {
+      "token": "studi",
+      "start_offset": 14,
+      "end_offset": 23,
+      "type": "<ALPHANUM>",
+      "position": 2
+    },
+    {
+      "token": "deutsch",
+      "start_offset": 31,
+      "end_offset": 40,
+      "type": "<ALPHANUM>",
+      "position": 5
+    },
+    {
+      "token": "universitat",
+      "start_offset": 41,
+      "end_offset": 54,
+      "type": "<ALPHANUM>",
+      "position": 6
+    },
+    {
+      "token": "numm",
+      "start_offset": 61,
+      "end_offset": 68,
+      "type": "<ALPHANUM>",
+      "position": 8
+    },
+    {
+      "token": "123456",
+      "start_offset": 74,
+      "end_offset": 80,
+      "type": "<NUM>",
+      "position": 10
+    }
+  ]
+}
+```
\ No newline at end of file
diff --git a/_analyzers/language-analyzers/greek.md b/_analyzers/language-analyzers/greek.md
new file mode 100644
index 0000000000..01735581ca
--- /dev/null
+++ b/_analyzers/language-analyzers/greek.md
@@ -0,0 +1,139 @@
+---
+layout: default
+title: Greek
+parent: Language analyzers
+grand_parent: Analyzers
+nav_order: 180
+---
+
+# Greek analyzer
+
+The built-in `greek` analyzer can be applied to a text field using the following command:
+
+```json
+PUT /greek-index
+{
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "greek"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Stem exclusion
+
+You can also use `stem_exclusion` with this language analyzer using the following command:
+
+```json
+PUT index_with_stem_exclusion_greek_analyzer
+{
+  "settings": {
+    "analysis": {
+      "analyzer": {
+        "stem_exclusion_greek_analyzer": {
+          "type": "greek",
+          "stem_exclusion": ["αρχή", "έγκριση"]
+        }
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Greek analyzer internals
+
+The `greek` analyzer is build using the following:
+
+Tokenizer: `standard`
+
+Token Filters:
+- lowercase
+- stop (Greek)
+- keyword
+- stemmer (Greek)
+
+## Custom Greek analyzer
+
+You can create custom Greek analyzer using the following command:
+
+```json
+PUT /greek-index
+{
+  "settings": {
+    "analysis": {
+      "filter": {
+        "greek_stop": {
+          "type": "stop",
+          "stopwords": "_greek_"
+        },
+        "greek_stemmer": {
+          "type": "stemmer",
+          "language": "greek"
+        },
+        "greek_keywords": {
+          "type": "keyword_marker",
+          "keywords": []
+        }
+      },
+      "analyzer": {
+        "greek_analyzer": {
+          "type": "custom",
+          "tokenizer": "standard",
+          "filter": [
+            "lowercase",
+            "greek_stop",
+            "greek_keywords",
+            "greek_stemmer"
+          ]
+        }
+      }
+    }
+  },
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "greek_analyzer"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Generated tokens
+
+Use the following request to examine the tokens generated using the analyzer:
+
+```json
+POST /greek-index/_analyze
+{
+  "field": "content",
+  "text": "Οι φοιτητές σπουδάζουν στα ελληνικά πανεπιστήμια. Οι αριθμοί τους είναι 123456."
+}
+```
+{% include copy-curl.html %}
+
+The response contains the generated tokens:
+
+```json
+{
+  "tokens": [
+    {"token": "φοιτητές","start_offset": 3,"end_offset": 11,"type": "<ALPHANUM>","position": 1},
+    {"token": "σπουδάζ","start_offset": 12,"end_offset": 22,"type": "<ALPHANUM>","position": 2},
+    {"token": "στα","start_offset": 23,"end_offset": 26,"type": "<ALPHANUM>","position": 3},
+    {"token": "ελληνικά","start_offset": 27,"end_offset": 35,"type": "<ALPHANUM>","position": 4},
+    {"token": "πανεπιστήμ","start_offset": 36,"end_offset": 48,"type": "<ALPHANUM>","position": 5},
+    {"token": "αριθμοί","start_offset": 53,"end_offset": 60,"type": "<ALPHANUM>","position": 7},
+    {"token": "τους","start_offset": 61,"end_offset": 65,"type": "<ALPHANUM>","position": 8},
+    {"token": "είνα","start_offset": 66,"end_offset": 71,"type": "<ALPHANUM>","position": 9},
+    {"token": "123456","start_offset": 72,"end_offset": 78,"type": "<NUM>","position": 10}
+  ]
+}
+```
\ No newline at end of file
diff --git a/_analyzers/language-analyzers/hindi.md b/_analyzers/language-analyzers/hindi.md
new file mode 100644
index 0000000000..b2812edd49
--- /dev/null
+++ b/_analyzers/language-analyzers/hindi.md
@@ -0,0 +1,178 @@
+---
+layout: default
+title: Hindi
+parent: Language analyzers
+grand_parent: Analyzers
+nav_order: 190
+---
+
+# Hindi analyzer
+
+The built-in `hindi` analyzer can be applied to a text field using the following command:
+
+```json
+PUT /hindi-index
+{
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "hindi"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Stem exclusion
+
+You can also use `stem_exclusion` with this language analyzer using the following command:
+
+```json
+PUT index_with_stem_exclusion_hindi_analyzer
+{
+  "settings": {
+    "analysis": {
+      "analyzer": {
+        "stem_exclusion_hindi_analyzer": {
+          "type": "hindi",
+          "stem_exclusion": ["अधिकार", "अनुमोदन"]
+        }
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Hindi analyzer internals
+
+The `hindi` analyzer is build using the following:
+
+Tokenizer: `standard`
+
+Token Filters:
+- lowercase
+- decimal_digit
+- keyword
+- normalization (indic)
+- normalization (Hindi)
+- stop (Hindi)
+- stemmer (Hindi)
+
+## Custom Hindi analyzer
+
+You can create custom Hindi analyzer using the following command:
+
+```json
+PUT /hindi-index
+{
+  "settings": {
+    "analysis": {
+      "filter": {
+        "hindi_stop": {
+          "type": "stop",
+          "stopwords": "_hindi_"
+        },
+        "hindi_stemmer": {
+          "type": "stemmer",
+          "language": "hindi"
+        },
+        "hindi_keywords": {
+          "type": "keyword_marker",
+          "keywords": []
+        }
+      },
+      "analyzer": {
+        "hindi_analyzer": {
+          "type": "custom",
+          "tokenizer": "standard",
+          "filter": [
+            "lowercase",
+            "decimal_digit",
+            "hindi_keywords",
+            "indic_normalization",
+            "hindi_normalization",
+            "hindi_stop",
+            "hindi_stemmer"
+          ]
+        }
+      }
+    }
+  },
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "hindi_analyzer"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Generated tokens
+
+Use the following request to examine the tokens generated using the analyzer:
+
+```json
+POST /hindi-index/_analyze
+{
+  "field": "content",
+  "text": "छात्र भारतीय विश्वविद्यालयों में पढ़ते हैं। उनके नंबर १२३४५६ हैं।"
+}
+```
+{% include copy-curl.html %}
+
+The response contains the generated tokens:
+
+```json
+{
+  "tokens": [
+    {
+      "token": "छातर",
+      "start_offset": 0,
+      "end_offset": 5,
+      "type": "<ALPHANUM>",
+      "position": 0
+    },
+    {
+      "token": "भारतिय",
+      "start_offset": 6,
+      "end_offset": 12,
+      "type": "<ALPHANUM>",
+      "position": 1
+    },
+    {
+      "token": "विशवविदयालय",
+      "start_offset": 13,
+      "end_offset": 28,
+      "type": "<ALPHANUM>",
+      "position": 2
+    },
+    {
+      "token": "पढ",
+      "start_offset": 33,
+      "end_offset": 38,
+      "type": "<ALPHANUM>",
+      "position": 4
+    },
+    {
+      "token": "नंबर",
+      "start_offset": 49,
+      "end_offset": 53,
+      "type": "<ALPHANUM>",
+      "position": 7
+    },
+    {
+      "token": "123456",
+      "start_offset": 54,
+      "end_offset": 60,
+      "type": "<NUM>",
+      "position": 8
+    }
+  ]
+}
+```
\ No newline at end of file
diff --git a/_analyzers/language-analyzers/hungarian.md b/_analyzers/language-analyzers/hungarian.md
new file mode 100644
index 0000000000..7e32ead084
--- /dev/null
+++ b/_analyzers/language-analyzers/hungarian.md
@@ -0,0 +1,172 @@
+---
+layout: default
+title: Hungarian
+parent: Language analyzers
+grand_parent: Analyzers
+nav_order: 200
+---
+
+# Hungarian analyzer
+
+The built-in `hungarian` analyzer can be applied to a text field using the following command:
+
+```json
+PUT /hungarian-index
+{
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "hungarian"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Stem exclusion
+
+You can also use `stem_exclusion` with this language analyzer using the following command:
+
+```json
+PUT index_with_stem_exclusion_hungarian_analyzer
+{
+  "settings": {
+    "analysis": {
+      "analyzer": {
+        "stem_exclusion_hungarian_analyzer": {
+          "type": "hungarian",
+          "stem_exclusion": ["hatalom", "jóváhagyás"]
+        }
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Hungarian analyzer internals
+
+The `hungarian` analyzer is build using the following:
+
+Tokenizer: `standard`
+
+Token Filters:
+- lowercase
+- stop (Hungarian)
+- keyword
+- stemmer (Hungarian)
+
+## Custom Hungarian analyzer
+
+You can create custom Hungarian analyzer using the following command:
+
+```json
+PUT /hungarian-index
+{
+  "settings": {
+    "analysis": {
+      "filter": {
+        "hungarian_stop": {
+          "type": "stop",
+          "stopwords": "_hungarian_"
+        },
+        "hungarian_stemmer": {
+          "type": "stemmer",
+          "language": "hungarian"
+        },
+        "hungarian_keywords": {
+          "type": "keyword_marker",
+          "keywords": []
+        }
+      },
+      "analyzer": {
+        "hungarian_analyzer": {
+          "type": "custom",
+          "tokenizer": "standard",
+          "filter": [
+            "lowercase",
+            "hungarian_stop",
+            "hungarian_keywords",
+            "hungarian_stemmer"
+          ]
+        }
+      }
+    }
+  },
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "hungarian_analyzer"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Generated tokens
+
+Use the following request to examine the tokens generated using the analyzer:
+
+```json
+POST /hungarian-index/_analyze
+{
+  "field": "content",
+  "text": "A diákok a magyar egyetemeken tanulnak. A számaik 123456."
+}
+```
+{% include copy-curl.html %}
+
+The response contains the generated tokens:
+
+```json
+{
+  "tokens": [
+    {
+      "token": "diák",
+      "start_offset": 2,
+      "end_offset": 8,
+      "type": "<ALPHANUM>",
+      "position": 1
+    },
+    {
+      "token": "magyar",
+      "start_offset": 11,
+      "end_offset": 17,
+      "type": "<ALPHANUM>",
+      "position": 3
+    },
+    {
+      "token": "egyetem",
+      "start_offset": 18,
+      "end_offset": 29,
+      "type": "<ALPHANUM>",
+      "position": 4
+    },
+    {
+      "token": "tanul",
+      "start_offset": 30,
+      "end_offset": 38,
+      "type": "<ALPHANUM>",
+      "position": 5
+    },
+    {
+      "token": "szám",
+      "start_offset": 42,
+      "end_offset": 49,
+      "type": "<ALPHANUM>",
+      "position": 7
+    },
+    {
+      "token": "123456",
+      "start_offset": 50,
+      "end_offset": 56,
+      "type": "<NUM>",
+      "position": 8
+    }
+  ]
+}
+```
\ No newline at end of file
diff --git a/_analyzers/language-analyzers/indonesian.md b/_analyzers/language-analyzers/indonesian.md
new file mode 100644
index 0000000000..b4b567c588
--- /dev/null
+++ b/_analyzers/language-analyzers/indonesian.md
@@ -0,0 +1,172 @@
+---
+layout: default
+title: Indonesian
+parent: Language analyzers
+grand_parent: Analyzers
+nav_order: 210
+---
+
+# Indonesian analyzer
+
+The built-in `indonesian` analyzer can be applied to a text field using the following command:
+
+```json
+PUT /indonesian-index
+{
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "indonesian"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Stem exclusion
+
+You can also use `stem_exclusion` with this language analyzer using the following command:
+
+```json
+PUT index_with_stem_exclusion_indonesian_analyzer
+{
+  "settings": {
+    "analysis": {
+      "analyzer": {
+        "stem_exclusion_indonesian_analyzer": {
+          "type": "indonesian",
+          "stem_exclusion": ["otoritas", "persetujuan"]
+        }
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Indonesian analyzer internals
+
+The `indonesian` analyzer is build using the following:
+
+Tokenizer: `standard`
+
+Token Filters:
+- lowercase
+- stop (Indonesian)
+- keyword
+- stemmer (Indonesian)
+
+## Custom Indonesian analyzer
+
+You can create custom Indonesian analyzer using the following command:
+
+```json
+PUT /hungarian-index
+{
+  "settings": {
+    "analysis": {
+      "filter": {
+        "hungarian_stop": {
+          "type": "stop",
+          "stopwords": "_hungarian_"
+        },
+        "hungarian_stemmer": {
+          "type": "stemmer",
+          "language": "hungarian"
+        },
+        "hungarian_keywords": {
+          "type": "keyword_marker",
+          "keywords": []
+        }
+      },
+      "analyzer": {
+        "hungarian_analyzer": {
+          "type": "custom",
+          "tokenizer": "standard",
+          "filter": [
+            "lowercase",
+            "hungarian_stop",
+            "hungarian_keywords",
+            "hungarian_stemmer"
+          ]
+        }
+      }
+    }
+  },
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "hungarian_analyzer"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Generated tokens
+
+Use the following request to examine the tokens generated using the analyzer:
+
+```json
+POST /indonesian-index/_analyze
+{
+  "field": "content",
+  "text": "Mahasiswa belajar di universitas Indonesia. Nomor mereka adalah 123456."
+}
+```
+{% include copy-curl.html %}
+
+The response contains the generated tokens:
+
+```json
+{
+  "tokens": [
+    {
+      "token": "mahasiswa",
+      "start_offset": 0,
+      "end_offset": 9,
+      "type": "<ALPHANUM>",
+      "position": 0
+    },
+    {
+      "token": "ajar",
+      "start_offset": 10,
+      "end_offset": 17,
+      "type": "<ALPHANUM>",
+      "position": 1
+    },
+    {
+      "token": "universitas",
+      "start_offset": 21,
+      "end_offset": 32,
+      "type": "<ALPHANUM>",
+      "position": 3
+    },
+    {
+      "token": "indonesia",
+      "start_offset": 33,
+      "end_offset": 42,
+      "type": "<ALPHANUM>",
+      "position": 4
+    },
+    {
+      "token": "nomor",
+      "start_offset": 44,
+      "end_offset": 49,
+      "type": "<ALPHANUM>",
+      "position": 5
+    },
+    {
+      "token": "123456",
+      "start_offset": 64,
+      "end_offset": 70,
+      "type": "<NUM>",
+      "position": 8
+    }
+  ]
+}
+```
\ No newline at end of file
diff --git a/_analyzers/language-analyzers/irish.md b/_analyzers/language-analyzers/irish.md
new file mode 100644
index 0000000000..03fde20c3c
--- /dev/null
+++ b/_analyzers/language-analyzers/irish.md
@@ -0,0 +1,157 @@
+---
+layout: default
+title: Irish
+parent: Language analyzers
+grand_parent: Analyzers
+nav_order: 210
+---
+
+# Irish analyzer
+
+The built-in `irish` analyzer can be applied to a text field using the following command:
+
+```json
+PUT /irish-index
+{
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "irish"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Stem exclusion
+
+You can also use `stem_exclusion` with this language analyzer using the following command:
+
+```json
+PUT index_with_stem_exclusion_irish_analyzer
+{
+  "settings": {
+    "analysis": {
+      "analyzer": {
+        "stem_exclusion_irish_analyzer": {
+          "type": "irish",
+          "stem_exclusion": ["údarás", "faomhadh"]
+        }
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Irish analyzer internals
+
+The `irish` analyzer is build using the following:
+
+Tokenizer: `standard`
+
+Token Filters:
+- hyphenation (Irish)
+- elision (Irish)
+- lowercase (Irish)
+- stop (Irish)
+- keyword
+- stemmer (Irish)
+
+## Custom Irish analyzer
+
+You can create custom Irish analyzer using the following command:
+
+```json
+PUT /irish-index
+{
+  "settings": {
+    "analysis": {
+      "filter": {
+        "irish_stop": {
+          "type": "stop",
+          "stopwords": "_irish_"
+        },
+        "irish_elision": {
+          "type":       "elision",
+          "articles":   [ "d", "m", "b" ],
+          "articles_case": true
+        },
+        "irish_hyphenation": {
+          "type":       "stop",
+          "stopwords":  [ "h", "n", "t" ],
+          "ignore_case": true
+        },
+        "irish_lowercase": {
+          "type":       "lowercase",
+          "language":   "irish"
+        },
+        "irish_stemmer": {
+          "type": "stemmer",
+          "language": "irish"
+        },
+        "irish_keywords": {
+          "type": "keyword_marker",
+          "keywords": []
+        }
+      },
+      "analyzer": {
+        "irish_analyzer": {
+          "type": "custom",
+          "tokenizer": "standard",
+          "filter": [
+            "irish_hyphenation",
+            "irish_elision",
+            "irish_lowercase",
+            "irish_stop",
+            "irish_keywords",
+            "irish_stemmer"
+          ]
+        }
+      }
+    }
+  },
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "irish_analyzer"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Generated tokens
+
+Use the following request to examine the tokens generated using the analyzer:
+
+```json
+POST /irish-index/_analyze
+{
+  "field": "content",
+  "text": "Tá mic léinn ag staidéar in ollscoileanna na hÉireann. Is iad a gcuid uimhreacha ná 123456."
+}
+```
+{% include copy-curl.html %}
+
+The response contains the generated tokens:
+
+```json
+{
+  "tokens": [
+    {"token": "tá","start_offset": 0,"end_offset": 2,"type": "<ALPHANUM>","position": 0},
+    {"token": "mic","start_offset": 3,"end_offset": 6,"type": "<ALPHANUM>","position": 1},
+    {"token": "léinn","start_offset": 7,"end_offset": 12,"type": "<ALPHANUM>","position": 2},
+    {"token": "staidéar","start_offset": 16,"end_offset": 24,"type": "<ALPHANUM>","position": 4},
+    {"token": "ollscoileanna","start_offset": 28,"end_offset": 41,"type": "<ALPHANUM>","position": 6},
+    {"token": "héireann","start_offset": 45,"end_offset": 53,"type": "<ALPHANUM>","position": 8},
+    {"token": "cuid","start_offset": 64,"end_offset": 69,"type": "<ALPHANUM>","position": 12},
+    {"token": "uimhreacha","start_offset": 70,"end_offset": 80,"type": "<ALPHANUM>","position": 13},
+    {"token": "123456","start_offset": 84,"end_offset": 90,"type": "<NUM>","position": 15}
+  ]
+}
+```
\ No newline at end of file
diff --git a/_analyzers/language-analyzers/italian.md b/_analyzers/language-analyzers/italian.md
new file mode 100644
index 0000000000..636f58fcc8
--- /dev/null
+++ b/_analyzers/language-analyzers/italian.md
@@ -0,0 +1,148 @@
+---
+layout: default
+title: Italian
+parent: Language analyzers
+grand_parent: Analyzers
+nav_order: 220
+---
+
+# Italian analyzer
+
+The built-in `italian` analyzer can be applied to a text field using the following command:
+
+```json
+PUT /italian-index
+{
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "italian"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Stem exclusion
+
+You can also use `stem_exclusion` with this language analyzer using the following command:
+
+```json
+PUT index_with_stem_exclusion_italian_analyzer
+{
+  "settings": {
+    "analysis": {
+      "analyzer": {
+        "stem_exclusion_italian_analyzer": {
+          "type": "italian",
+          "stem_exclusion": ["autorità", "approvazione"]
+        }
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Italian analyzer internals
+
+The `italian` analyzer is build using the following:
+
+Tokenizer: `standard`
+
+Token Filters:
+- elision (Italian)
+- lowercase
+- stop (Italian)
+- keyword
+- stemmer (Italian)
+
+## Custom Italian analyzer
+
+You can create custom Italian analyzer using the following command:
+
+```json
+PUT /italian-index
+{
+  "settings": {
+    "analysis": {
+      "filter": {
+        "italian_stop": {
+          "type": "stop",
+          "stopwords": "_italian_"
+        },
+        "italian_elision": {
+          "type": "elision",
+          "articles": [
+                "c", "l", "all", "dall", "dell",
+                "nell", "sull", "coll", "pell",
+                "gl", "agl", "dagl", "degl", "negl",
+                "sugl", "un", "m", "t", "s", "v", "d"
+          ],
+          "articles_case": true
+        },
+        "italian_stemmer": {
+          "type": "stemmer",
+          "language": "light_italian"
+        },
+        "italian_keywords": {
+          "type": "keyword_marker",
+          "keywords": []
+        }
+      },
+      "analyzer": {
+        "italian_analyzer": {
+          "type": "custom",
+          "tokenizer": "standard",
+          "filter": [
+            "italian_elision",
+            "lowercase",
+            "italian_stop",
+            "italian_keywords",
+            "italian_stemmer"
+          ]
+        }
+      }
+    }
+  },
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "italian_analyzer"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Generated tokens
+
+Use the following request to examine the tokens generated using the analyzer:
+
+```json
+POST /italian-index/_analyze
+{
+  "field": "content",
+  "text": "Gli studenti studiano nelle università italiane. I loro numeri sono 123456."
+}
+```
+{% include copy-curl.html %}
+
+The response contains the generated tokens:
+
+```json
+{
+  "tokens": [
+    {"token": "student","start_offset": 4,"end_offset": 12,"type": "<ALPHANUM>","position": 1},
+    {"token": "studian","start_offset": 13,"end_offset": 21,"type": "<ALPHANUM>","position": 2},
+    {"token": "universit","start_offset": 28,"end_offset": 38,"type": "<ALPHANUM>","position": 4},
+    {"token": "italian","start_offset": 39,"end_offset": 47,"type": "<ALPHANUM>","position": 5},
+    {"token": "numer","start_offset": 56,"end_offset": 62,"type": "<ALPHANUM>","position": 8},
+    {"token": "123456","start_offset": 68,"end_offset": 74,"type": "<NUM>","position": 10}
+  ]
+}
+```
\ No newline at end of file
diff --git a/_analyzers/language-analyzers/latvian.md b/_analyzers/language-analyzers/latvian.md
new file mode 100644
index 0000000000..ecdc4b2f51
--- /dev/null
+++ b/_analyzers/language-analyzers/latvian.md
@@ -0,0 +1,148 @@
+---
+layout: default
+title: Latvian
+parent: Language analyzers
+grand_parent: Analyzers
+nav_order: 230
+---
+
+# Latvian analyzer
+
+The built-in `latvian` analyzer can be applied to a text field using the following command:
+
+```json
+PUT /latvian-index
+{
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "latvian"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Stem exclusion
+
+You can also use `stem_exclusion` with this language analyzer using the following command:
+
+```json
+PUT index_with_stem_exclusion_latvian_analyzer
+{
+  "settings": {
+    "analysis": {
+      "analyzer": {
+        "stem_exclusion_latvian_analyzer": {
+          "type": "latvian",
+          "stem_exclusion": ["autoritāte", "apstiprinājums"]
+        }
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Latvian analyzer internals
+
+The `latvian` analyzer is build using the following:
+
+Tokenizer: `standard`
+
+Token Filters:
+- lowercase
+- stop (Latvian)
+- keyword
+- stemmer (Latvian)
+
+## Custom Latvian analyzer
+
+You can create custom Latvian analyzer using the following command:
+
+```json
+PUT /italian-index
+{
+  "settings": {
+    "analysis": {
+      "filter": {
+        "italian_stop": {
+          "type": "stop",
+          "stopwords": "_italian_"
+        },
+        "italian_elision": {
+          "type": "elision",
+          "articles": [
+                "c", "l", "all", "dall", "dell",
+                "nell", "sull", "coll", "pell",
+                "gl", "agl", "dagl", "degl", "negl",
+                "sugl", "un", "m", "t", "s", "v", "d"
+          ],
+          "articles_case": true
+        },
+        "italian_stemmer": {
+          "type": "stemmer",
+          "language": "light_italian"
+        },
+        "italian_keywords": {
+          "type": "keyword_marker",
+          "keywords": []
+        }
+      },
+      "analyzer": {
+        "italian_analyzer": {
+          "type": "custom",
+          "tokenizer": "standard",
+          "filter": [
+            "italian_elision",
+            "lowercase",
+            "italian_stop",
+            "italian_keywords",
+            "italian_stemmer"
+          ]
+        }
+      }
+    }
+  },
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "italian_analyzer"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Generated tokens
+
+Use the following request to examine the tokens generated using the analyzer:
+
+```json
+POST /latvian-index/_analyze
+{
+  "field": "content",
+  "text": "Studenti mācās Latvijas universitātēs. Viņu numuri ir 123456."
+}
+```
+{% include copy-curl.html %}
+
+The response contains the generated tokens:
+
+```json
+{
+  "tokens": [
+    {"token": "student","start_offset": 0,"end_offset": 8,"type": "<ALPHANUM>","position": 0},
+    {"token": "māc","start_offset": 9,"end_offset": 14,"type": "<ALPHANUM>","position": 1},
+    {"token": "latvij","start_offset": 15,"end_offset": 23,"type": "<ALPHANUM>","position": 2},
+    {"token": "universitāt","start_offset": 24,"end_offset": 37,"type": "<ALPHANUM>","position": 3},
+    {"token": "vin","start_offset": 39,"end_offset": 43,"type": "<ALPHANUM>","position": 4},
+    {"token": "numur","start_offset": 44,"end_offset": 50,"type": "<ALPHANUM>","position": 5},
+    {"token": "123456","start_offset": 54,"end_offset": 60,"type": "<NUM>","position": 7}
+  ]
+}
+```
\ No newline at end of file
diff --git a/_analyzers/language-analyzers/lithuanian.md b/_analyzers/language-analyzers/lithuanian.md
new file mode 100644
index 0000000000..123e01139e
--- /dev/null
+++ b/_analyzers/language-analyzers/lithuanian.md
@@ -0,0 +1,136 @@
+---
+layout: default
+title: Lithuanian
+parent: Language analyzers
+grand_parent: Analyzers
+nav_order: 230
+---
+
+# Lithuanian analyzer
+
+The built-in `lithuanian` analyzer can be applied to a text field using the following command:
+
+```json
+PUT /lithuanian-index
+{
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "lithuanian"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Stem exclusion
+
+You can also use `stem_exclusion` with this language analyzer using the following command:
+
+```json
+PUT index_with_stem_exclusion_lithuanian_analyzer
+{
+  "settings": {
+    "analysis": {
+      "analyzer": {
+        "stem_exclusion_lithuanian_analyzer": {
+          "type": "lithuanian",
+          "stem_exclusion": ["autoritetas", "patvirtinimas"]
+        }
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Lithuanian analyzer internals
+
+The `lithuanian` analyzer is build using the following:
+
+Tokenizer: `standard`
+
+Token Filters:
+- lowercase
+- stop (Lithuanian)
+- keyword
+- stemmer (Lithuanian)
+
+## Custom Lithuanian analyzer
+
+You can create custom Lithuanian analyzer using the following command:
+
+```json
+PUT /lithuanian-index
+{
+  "settings": {
+    "analysis": {
+      "filter": {
+        "lithuanian_stop": {
+          "type": "stop",
+          "stopwords": "_lithuanian_"
+        },
+        "lithuanian_stemmer": {
+          "type": "stemmer",
+          "language": "lithuanian"
+        },
+        "lithuanian_keywords": {
+          "type": "keyword_marker",
+          "keywords": []
+        }
+      },
+      "analyzer": {
+        "lithuanian_analyzer": {
+          "type": "custom",
+          "tokenizer": "standard",
+          "filter": [
+            "lowercase",
+            "lithuanian_stop",
+            "lithuanian_keywords",
+            "lithuanian_stemmer"
+          ]
+        }
+      }
+    }
+  },
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "lithuanian_analyzer"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Generated tokens
+
+Use the following request to examine the tokens generated using the analyzer:
+
+```json
+POST /lithuanian-index/_analyze
+{
+  "field": "content",
+  "text": "Studentai mokosi Lietuvos universitetuose. Jų numeriai yra 123456."
+}
+```
+{% include copy-curl.html %}
+
+The response contains the generated tokens:
+
+```json
+{
+  "tokens": [
+    {"token": "student","start_offset": 0,"end_offset": 9,"type": "<ALPHANUM>","position": 0},
+    {"token": "mok","start_offset": 10,"end_offset": 16,"type": "<ALPHANUM>","position": 1},
+    {"token": "lietuv","start_offset": 17,"end_offset": 25,"type": "<ALPHANUM>","position": 2},
+    {"token": "universitet","start_offset": 26,"end_offset": 41,"type": "<ALPHANUM>","position": 3},
+    {"token": "num","start_offset": 46,"end_offset": 54,"type": "<ALPHANUM>","position": 5},
+    {"token": "123456","start_offset": 59,"end_offset": 65,"type": "<NUM>","position": 7}
+  ]
+}
+```
\ No newline at end of file
diff --git a/_analyzers/language-analyzers/norwegian.md b/_analyzers/language-analyzers/norwegian.md
new file mode 100644
index 0000000000..33d8e01f7f
--- /dev/null
+++ b/_analyzers/language-analyzers/norwegian.md
@@ -0,0 +1,137 @@
+---
+layout: default
+title: Norwegian
+parent: Language analyzers
+grand_parent: Analyzers
+nav_order: 240
+---
+
+# Norwegian analyzer
+
+The built-in `norwegian` analyzer can be applied to a text field using the following command:
+
+```json
+PUT /norwegian-index
+{
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "norwegian"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Stem exclusion
+
+You can also use `stem_exclusion` with this language analyzer using the following command:
+
+```json
+PUT index_with_stem_exclusion_norwegian_analyzer
+{
+  "settings": {
+    "analysis": {
+      "analyzer": {
+        "stem_exclusion_norwegian_analyzer": {
+          "type": "norwegian",
+          "stem_exclusion": ["autoritet", "godkjenning"]
+        }
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Norwegian analyzer internals
+
+The `norwegian` analyzer is build using the following:
+
+Tokenizer: `standard`
+
+Token Filters:
+- lowercase
+- stop (Norwegian)
+- keyword
+- stemmer (Norwegian)
+
+## Custom Norwegian analyzer
+
+You can create custom Norwegian analyzer using the following command:
+
+```json
+PUT /norwegian-index
+{
+  "settings": {
+    "analysis": {
+      "filter": {
+        "norwegian_stop": {
+          "type": "stop",
+          "stopwords": "_norwegian_"
+        },
+        "norwegian_stemmer": {
+          "type": "stemmer",
+          "language": "norwegian"
+        },
+        "norwegian_keywords": {
+          "type": "keyword_marker",
+          "keywords": []
+        }
+      },
+      "analyzer": {
+        "norwegian_analyzer": {
+          "type": "custom",
+          "tokenizer": "standard",
+          "filter": [
+            "lowercase",
+            "norwegian_stop",
+            "norwegian_keywords",
+            "norwegian_stemmer"
+          ]
+        }
+      }
+    }
+  },
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "norwegian_analyzer"
+      }
+    }
+  }
+}
+
+```
+{% include copy-curl.html %}
+
+## Generated tokens
+
+Use the following request to examine the tokens generated using the analyzer:
+
+```json
+POST /norwegian-index/_analyze
+{
+  "field": "content",
+  "text": "Studentene studerer ved norske universiteter. Deres nummer er 123456."
+}
+```
+{% include copy-curl.html %}
+
+The response contains the generated tokens:
+
+```json
+{
+  "tokens": [
+    {"token": "student","start_offset": 0,"end_offset": 10,"type": "<ALPHANUM>","position": 0},
+    {"token": "studer","start_offset": 11,"end_offset": 19,"type": "<ALPHANUM>","position": 1},
+    {"token": "norsk","start_offset": 24,"end_offset": 30,"type": "<ALPHANUM>","position": 3},
+    {"token": "universitet","start_offset": 31,"end_offset": 44,"type": "<ALPHANUM>","position": 4},
+    {"token": "numm","start_offset": 52,"end_offset": 58,"type": "<ALPHANUM>","position": 6},
+    {"token": "123456","start_offset": 62,"end_offset": 68,"type": "<NUM>","position": 8}
+  ]
+}
+```
\ No newline at end of file
diff --git a/_analyzers/language-analyzers/persian.md b/_analyzers/language-analyzers/persian.md
new file mode 100644
index 0000000000..5693b9e045
--- /dev/null
+++ b/_analyzers/language-analyzers/persian.md
@@ -0,0 +1,142 @@
+---
+layout: default
+title: Persian
+parent: Language analyzers
+grand_parent: Analyzers
+nav_order: 250
+---
+
+# Persian analyzer
+
+The built-in `persian` analyzer can be applied to a text field using the following command:
+
+```json
+PUT /persian-index
+{
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "persian"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Stem exclusion
+
+You can also use `stem_exclusion` with this language analyzer using the following command:
+
+```json
+PUT index_with_stem_exclusion_persian_analyzer
+{
+  "settings": {
+    "analysis": {
+      "analyzer": {
+        "stem_exclusion_persian_analyzer": {
+          "type": "persian",
+          "stem_exclusion": ["حکومت", "تأیید"]
+        }
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Persian analyzer internals
+
+The `persian` analyzer is build using the following:
+
+Tokenizer: `standard`
+
+Token Filters:
+- lowercase
+- decimal_digit
+- normalization (Arabic)
+- normalization (Persian)
+- keyword
+- stemmer (Norwegian)
+
+## Custom Persian analyzer
+
+You can create custom Persian analyzer using the following command:
+
+```json
+PUT /persian-index
+{
+  "settings": {
+    "analysis": {
+      "filter": {
+        "persian_stop": {
+          "type": "stop",
+          "stopwords": "_persian_"
+        },
+        "persian_keywords": {
+          "type": "keyword_marker",
+          "keywords": []
+        }
+      },
+      "char_filter": {
+        "null_width_replace_with_space": {
+            "type":       "mapping",
+            "mappings": [ "\\u200C=>\\u0020"] 
+        }
+      },
+      "analyzer": {
+        "persian_analyzer": {
+          "type": "custom",
+          "tokenizer": "standard",
+          "char_filter": [ "null_width_replace_with_space" ],
+          "filter": [
+            "lowercase",
+            "decimal_digit",
+            "arabic_normalization",
+            "persian_normalization",
+            "persian_stop"
+          ]
+        }
+      }
+    }
+  },
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "persian_analyzer"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Generated tokens
+
+Use the following request to examine the tokens generated using the analyzer:
+
+```json
+POST /persian-index/_analyze
+{
+  "field": "content",
+  "text": "دانشجویان در دانشگاه‌های ایرانی تحصیل می‌کنند. شماره‌های آن‌ها ۱۲۳۴۵۶ است."
+}
+```
+{% include copy-curl.html %}
+
+The response contains the generated tokens:
+
+```json
+{
+  "tokens": [
+    {"token": "دانشجويان","start_offset": 0,"end_offset": 9,"type": "<ALPHANUM>","position": 0},
+    {"token": "دانشگاه","start_offset": 13,"end_offset": 20,"type": "<ALPHANUM>","position": 2},
+    {"token": "ايراني","start_offset": 25,"end_offset": 31,"type": "<ALPHANUM>","position": 4},
+    {"token": "تحصيل","start_offset": 32,"end_offset": 37,"type": "<ALPHANUM>","position": 5},
+    {"token": "شماره","start_offset": 47,"end_offset": 52,"type": "<ALPHANUM>","position": 8},
+    {"token": "123456","start_offset": 63,"end_offset": 69,"type": "<NUM>","position": 12}
+  ]
+}
+```
\ No newline at end of file

From 2b2845f417918a7292707b5820c784037ff32f75 Mon Sep 17 00:00:00 2001
From: Anton Rubin <anton.rubin@eliatra.com>
Date: Fri, 1 Nov 2024 10:50:01 +0000
Subject: [PATCH 09/15] adding
 portuguese,romanian,russian,sorani,spanish,swedish,thai and turkish language
 analyzer docs

Signed-off-by: Anton Rubin <anton.rubin@eliatra.com>
---
 _analyzers/language-analyzers/arabic.md     |   2 +-
 _analyzers/language-analyzers/armenian.md   |   2 +-
 _analyzers/language-analyzers/basque.md     |   2 +-
 _analyzers/language-analyzers/bengali.md    |   2 +-
 _analyzers/language-analyzers/brazilian.md  |   2 +-
 _analyzers/language-analyzers/bulgarian.md  |   2 +-
 _analyzers/language-analyzers/catalan.md    |   2 +-
 _analyzers/language-analyzers/cjk.md        |   2 +-
 _analyzers/language-analyzers/czech.md      |   2 +-
 _analyzers/language-analyzers/danish.md     |   2 +-
 _analyzers/language-analyzers/dutch.md      |   2 +-
 _analyzers/language-analyzers/english.md    |   2 +-
 _analyzers/language-analyzers/estonian.md   |   2 +-
 _analyzers/language-analyzers/finnish.md    |   2 +-
 _analyzers/language-analyzers/french.md     |   2 +-
 _analyzers/language-analyzers/galician.md   |   2 +-
 _analyzers/language-analyzers/german.md     |   2 +-
 _analyzers/language-analyzers/greek.md      |   2 +-
 _analyzers/language-analyzers/hindi.md      |   2 +-
 _analyzers/language-analyzers/hungarian.md  |   2 +-
 _analyzers/language-analyzers/indonesian.md |   2 +-
 _analyzers/language-analyzers/irish.md      |   2 +-
 _analyzers/language-analyzers/italian.md    |   2 +-
 _analyzers/language-analyzers/latvian.md    |   2 +-
 _analyzers/language-analyzers/lithuanian.md |   2 +-
 _analyzers/language-analyzers/norwegian.md  |   2 +-
 _analyzers/language-analyzers/persian.md    |   4 +-
 _analyzers/language-analyzers/portuguese.md | 172 ++++++++++++++++++++
 _analyzers/language-analyzers/romanian.md   | 172 ++++++++++++++++++++
 _analyzers/language-analyzers/russian.md    | 172 ++++++++++++++++++++
 _analyzers/language-analyzers/sorani.md     | 168 +++++++++++++++++++
 _analyzers/language-analyzers/spanish.md    | 172 ++++++++++++++++++++
 _analyzers/language-analyzers/swedish.md    | 172 ++++++++++++++++++++
 _analyzers/language-analyzers/thai.md       | 132 +++++++++++++++
 _analyzers/language-analyzers/turkish.md    | 143 ++++++++++++++++
 35 files changed, 1332 insertions(+), 27 deletions(-)
 create mode 100644 _analyzers/language-analyzers/portuguese.md
 create mode 100644 _analyzers/language-analyzers/romanian.md
 create mode 100644 _analyzers/language-analyzers/russian.md
 create mode 100644 _analyzers/language-analyzers/sorani.md
 create mode 100644 _analyzers/language-analyzers/spanish.md
 create mode 100644 _analyzers/language-analyzers/swedish.md
 create mode 100644 _analyzers/language-analyzers/thai.md
 create mode 100644 _analyzers/language-analyzers/turkish.md

diff --git a/_analyzers/language-analyzers/arabic.md b/_analyzers/language-analyzers/arabic.md
index b15d7ee58d..2bbfb81140 100644
--- a/_analyzers/language-analyzers/arabic.md
+++ b/_analyzers/language-analyzers/arabic.md
@@ -52,7 +52,7 @@ The `arabic` analyzer is build using the following:
 
 Tokenizer: `standard`
 
-Token Filters:
+Token filters:
 - lowercase
 - decimal_digit
 - stop (Arabic)
diff --git a/_analyzers/language-analyzers/armenian.md b/_analyzers/language-analyzers/armenian.md
index 1324e39420..9355a49d05 100644
--- a/_analyzers/language-analyzers/armenian.md
+++ b/_analyzers/language-analyzers/armenian.md
@@ -52,7 +52,7 @@ The `armenian` analyzer is build using the following:
 
 Tokenizer: `standard`
 
-Token Filters:
+Token filters:
 - lowercase
 - stop (Armenian)
 - keywords
diff --git a/_analyzers/language-analyzers/basque.md b/_analyzers/language-analyzers/basque.md
index bab4ffa0fe..ada0b95cf5 100644
--- a/_analyzers/language-analyzers/basque.md
+++ b/_analyzers/language-analyzers/basque.md
@@ -52,7 +52,7 @@ The `basque` analyzer is build using the following:
 
 Tokenizer: `standard`
 
-Token Filters:
+Token filters:
 - lowercase
 - stop (Basque)
 - keywords
diff --git a/_analyzers/language-analyzers/bengali.md b/_analyzers/language-analyzers/bengali.md
index 72132e8e91..ec3f7f0ac5 100644
--- a/_analyzers/language-analyzers/bengali.md
+++ b/_analyzers/language-analyzers/bengali.md
@@ -52,7 +52,7 @@ The `bengali` analyzer is build using the following:
 
 Tokenizer: `standard`
 
-Token Filters:
+Token filters:
 - lowercase
 - decimal_digit
 - indic_normalization
diff --git a/_analyzers/language-analyzers/brazilian.md b/_analyzers/language-analyzers/brazilian.md
index b905773bbb..3e6eb3f89d 100644
--- a/_analyzers/language-analyzers/brazilian.md
+++ b/_analyzers/language-analyzers/brazilian.md
@@ -52,7 +52,7 @@ The `brazilian` analyzer is build using the following:
 
 Tokenizer: `standard`
 
-Token Filters:
+Token filters:
 - lowercase
 - stop (Brazilian)
 - keywords
diff --git a/_analyzers/language-analyzers/bulgarian.md b/_analyzers/language-analyzers/bulgarian.md
index d924a81afc..682430717f 100644
--- a/_analyzers/language-analyzers/bulgarian.md
+++ b/_analyzers/language-analyzers/bulgarian.md
@@ -52,7 +52,7 @@ The `bulgarian` analyzer is build using the following:
 
 Tokenizer: `standard`
 
-Token Filters:
+Token filters:
 - lowercase
 - stop (Bulgarian)
 - keywords
diff --git a/_analyzers/language-analyzers/catalan.md b/_analyzers/language-analyzers/catalan.md
index b1df91ce20..7a2c2e690b 100644
--- a/_analyzers/language-analyzers/catalan.md
+++ b/_analyzers/language-analyzers/catalan.md
@@ -52,7 +52,7 @@ The `catalan` analyzer is build using the following:
 
 Tokenizer: `standard`
 
-Token Filters:
+Token filters:
 - elision (Catalan)
 - lowercase
 - stop (Catalan)
diff --git a/_analyzers/language-analyzers/cjk.md b/_analyzers/language-analyzers/cjk.md
index e66b222062..8547a3156f 100644
--- a/_analyzers/language-analyzers/cjk.md
+++ b/_analyzers/language-analyzers/cjk.md
@@ -52,7 +52,7 @@ The `cjk` analyzer is build using the following:
 
 Tokenizer: `standard`
 
-Token Filters:
+Token filters:
 - cjk_width
 - lowercase
 - cjk_bigram
diff --git a/_analyzers/language-analyzers/czech.md b/_analyzers/language-analyzers/czech.md
index f0a2ac6482..b7725920e3 100644
--- a/_analyzers/language-analyzers/czech.md
+++ b/_analyzers/language-analyzers/czech.md
@@ -52,7 +52,7 @@ The `czech` analyzer is build using the following:
 
 Tokenizer: `standard`
 
-Token Filters:
+Token filters:
 - lowercase
 - stop (Czech)
 - keyword
diff --git a/_analyzers/language-analyzers/danish.md b/_analyzers/language-analyzers/danish.md
index 3f974d5e0f..652aedb3b7 100644
--- a/_analyzers/language-analyzers/danish.md
+++ b/_analyzers/language-analyzers/danish.md
@@ -52,7 +52,7 @@ The `danish` analyzer is build using the following:
 
 Tokenizer: `standard`
 
-Token Filters:
+Token filters:
 - lowercase
 - stop (Danish)
 - keyword
diff --git a/_analyzers/language-analyzers/dutch.md b/_analyzers/language-analyzers/dutch.md
index e96c05d147..5a4153702d 100644
--- a/_analyzers/language-analyzers/dutch.md
+++ b/_analyzers/language-analyzers/dutch.md
@@ -52,7 +52,7 @@ The `dutch` analyzer is build using the following:
 
 Tokenizer: `standard`
 
-Token Filters:
+Token filters:
 - lowercase
 - stop (Dutch)
 - keyword
diff --git a/_analyzers/language-analyzers/english.md b/_analyzers/language-analyzers/english.md
index 4c3dff5bbc..e266681030 100644
--- a/_analyzers/language-analyzers/english.md
+++ b/_analyzers/language-analyzers/english.md
@@ -52,7 +52,7 @@ The `english` analyzer is build using the following:
 
 Tokenizer: `standard`
 
-Token Filters:
+Token filters:
 - stemmer (possessive_english)
 - lowercase
 - stop (English)
diff --git a/_analyzers/language-analyzers/estonian.md b/_analyzers/language-analyzers/estonian.md
index 6b5afa2271..d67c88d3b2 100644
--- a/_analyzers/language-analyzers/estonian.md
+++ b/_analyzers/language-analyzers/estonian.md
@@ -52,7 +52,7 @@ The `estonian` analyzer is build using the following:
 
 Tokenizer: `standard`
 
-Token Filters:
+Token filters:
 - lowercase
 - stop (Estonian)
 - keyword
diff --git a/_analyzers/language-analyzers/finnish.md b/_analyzers/language-analyzers/finnish.md
index ccc1534b2f..73c4eade5e 100644
--- a/_analyzers/language-analyzers/finnish.md
+++ b/_analyzers/language-analyzers/finnish.md
@@ -52,7 +52,7 @@ The `finnish` analyzer is build using the following:
 
 Tokenizer: `standard`
 
-Token Filters:
+Token filters:
 - lowercase
 - stop (Finnish)
 - keyword
diff --git a/_analyzers/language-analyzers/french.md b/_analyzers/language-analyzers/french.md
index 730a2066d4..574be37ab5 100644
--- a/_analyzers/language-analyzers/french.md
+++ b/_analyzers/language-analyzers/french.md
@@ -52,7 +52,7 @@ The `french` analyzer is build using the following:
 
 Tokenizer: `standard`
 
-Token Filters:
+Token filters:
 - elision (French)
 - lowercase
 - stop (French)
diff --git a/_analyzers/language-analyzers/galician.md b/_analyzers/language-analyzers/galician.md
index e0f833e13d..75c789f1c2 100644
--- a/_analyzers/language-analyzers/galician.md
+++ b/_analyzers/language-analyzers/galician.md
@@ -52,7 +52,7 @@ The `galician` analyzer is build using the following:
 
 Tokenizer: `standard`
 
-Token Filters:
+Token filters:
 - lowercase
 - stop (French)
 - keyword
diff --git a/_analyzers/language-analyzers/german.md b/_analyzers/language-analyzers/german.md
index 3076fea57c..ed9bb19229 100644
--- a/_analyzers/language-analyzers/german.md
+++ b/_analyzers/language-analyzers/german.md
@@ -52,7 +52,7 @@ The `german` analyzer is build using the following:
 
 Tokenizer: `standard`
 
-Token Filters:
+Token filters:
 - lowercase
 - stop (German)
 - keyword
diff --git a/_analyzers/language-analyzers/greek.md b/_analyzers/language-analyzers/greek.md
index 01735581ca..94b9e5dddb 100644
--- a/_analyzers/language-analyzers/greek.md
+++ b/_analyzers/language-analyzers/greek.md
@@ -52,7 +52,7 @@ The `greek` analyzer is build using the following:
 
 Tokenizer: `standard`
 
-Token Filters:
+Token filters:
 - lowercase
 - stop (Greek)
 - keyword
diff --git a/_analyzers/language-analyzers/hindi.md b/_analyzers/language-analyzers/hindi.md
index b2812edd49..14964bfa4d 100644
--- a/_analyzers/language-analyzers/hindi.md
+++ b/_analyzers/language-analyzers/hindi.md
@@ -52,7 +52,7 @@ The `hindi` analyzer is build using the following:
 
 Tokenizer: `standard`
 
-Token Filters:
+Token filters:
 - lowercase
 - decimal_digit
 - keyword
diff --git a/_analyzers/language-analyzers/hungarian.md b/_analyzers/language-analyzers/hungarian.md
index 7e32ead084..f1851edf95 100644
--- a/_analyzers/language-analyzers/hungarian.md
+++ b/_analyzers/language-analyzers/hungarian.md
@@ -52,7 +52,7 @@ The `hungarian` analyzer is build using the following:
 
 Tokenizer: `standard`
 
-Token Filters:
+Token filters:
 - lowercase
 - stop (Hungarian)
 - keyword
diff --git a/_analyzers/language-analyzers/indonesian.md b/_analyzers/language-analyzers/indonesian.md
index b4b567c588..feeef6254e 100644
--- a/_analyzers/language-analyzers/indonesian.md
+++ b/_analyzers/language-analyzers/indonesian.md
@@ -52,7 +52,7 @@ The `indonesian` analyzer is build using the following:
 
 Tokenizer: `standard`
 
-Token Filters:
+Token filters:
 - lowercase
 - stop (Indonesian)
 - keyword
diff --git a/_analyzers/language-analyzers/irish.md b/_analyzers/language-analyzers/irish.md
index 03fde20c3c..b914ba6b21 100644
--- a/_analyzers/language-analyzers/irish.md
+++ b/_analyzers/language-analyzers/irish.md
@@ -52,7 +52,7 @@ The `irish` analyzer is build using the following:
 
 Tokenizer: `standard`
 
-Token Filters:
+Token filters:
 - hyphenation (Irish)
 - elision (Irish)
 - lowercase (Irish)
diff --git a/_analyzers/language-analyzers/italian.md b/_analyzers/language-analyzers/italian.md
index 636f58fcc8..11113635ca 100644
--- a/_analyzers/language-analyzers/italian.md
+++ b/_analyzers/language-analyzers/italian.md
@@ -52,7 +52,7 @@ The `italian` analyzer is build using the following:
 
 Tokenizer: `standard`
 
-Token Filters:
+Token filters:
 - elision (Italian)
 - lowercase
 - stop (Italian)
diff --git a/_analyzers/language-analyzers/latvian.md b/_analyzers/language-analyzers/latvian.md
index ecdc4b2f51..820cb252b8 100644
--- a/_analyzers/language-analyzers/latvian.md
+++ b/_analyzers/language-analyzers/latvian.md
@@ -52,7 +52,7 @@ The `latvian` analyzer is build using the following:
 
 Tokenizer: `standard`
 
-Token Filters:
+Token filters:
 - lowercase
 - stop (Latvian)
 - keyword
diff --git a/_analyzers/language-analyzers/lithuanian.md b/_analyzers/language-analyzers/lithuanian.md
index 123e01139e..55a94c9c1e 100644
--- a/_analyzers/language-analyzers/lithuanian.md
+++ b/_analyzers/language-analyzers/lithuanian.md
@@ -52,7 +52,7 @@ The `lithuanian` analyzer is build using the following:
 
 Tokenizer: `standard`
 
-Token Filters:
+Token filters:
 - lowercase
 - stop (Lithuanian)
 - keyword
diff --git a/_analyzers/language-analyzers/norwegian.md b/_analyzers/language-analyzers/norwegian.md
index 33d8e01f7f..92fbd9231e 100644
--- a/_analyzers/language-analyzers/norwegian.md
+++ b/_analyzers/language-analyzers/norwegian.md
@@ -52,7 +52,7 @@ The `norwegian` analyzer is build using the following:
 
 Tokenizer: `standard`
 
-Token Filters:
+Token filters:
 - lowercase
 - stop (Norwegian)
 - keyword
diff --git a/_analyzers/language-analyzers/persian.md b/_analyzers/language-analyzers/persian.md
index 5693b9e045..57ea1ea796 100644
--- a/_analyzers/language-analyzers/persian.md
+++ b/_analyzers/language-analyzers/persian.md
@@ -52,7 +52,9 @@ The `persian` analyzer is build using the following:
 
 Tokenizer: `standard`
 
-Token Filters:
+Char filter: `mapping`
+
+Token filters:
 - lowercase
 - decimal_digit
 - normalization (Arabic)
diff --git a/_analyzers/language-analyzers/portuguese.md b/_analyzers/language-analyzers/portuguese.md
new file mode 100644
index 0000000000..eb7b959c0b
--- /dev/null
+++ b/_analyzers/language-analyzers/portuguese.md
@@ -0,0 +1,172 @@
+---
+layout: default
+title: Portuguese
+parent: Language analyzers
+grand_parent: Analyzers
+nav_order: 260
+---
+
+# Portuguese analyzer
+
+The built-in `portuguese` analyzer can be applied to a text field using the following command:
+
+```json
+PUT /portuguese-index
+{
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "portuguese"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Stem exclusion
+
+You can also use `stem_exclusion` with this language analyzer using the following command:
+
+```json
+PUT index_with_stem_exclusion_portuguese_analyzer
+{
+  "settings": {
+    "analysis": {
+      "analyzer": {
+        "stem_exclusion_portuguese_analyzer": {
+          "type": "portuguese",
+          "stem_exclusion": ["autoridade", "aprovação"]
+        }
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Portuguese analyzer internals
+
+The `portuguese` analyzer is build using the following:
+
+Tokenizer: `standard`
+
+Token Filters:
+- lowercase
+- stop (Portuguese)
+- keyword
+- stemmer (Portuguese)
+
+## Custom Portuguese analyzer
+
+You can create custom Portuguese analyzer using the following command:
+
+```json
+PUT /portuguese-index
+{
+  "settings": {
+    "analysis": {
+      "filter": {
+        "portuguese_stop": {
+          "type": "stop",
+          "stopwords": "_portuguese_"
+        },
+        "portuguese_stemmer": {
+          "type": "stemmer",
+          "language": "light_portuguese"
+        },
+        "portuguese_keywords": {
+          "type": "keyword_marker",
+          "keywords": []
+        }
+      },
+      "analyzer": {
+        "portuguese_analyzer": {
+          "type": "custom",
+          "tokenizer": "standard",
+          "filter": [
+            "lowercase",
+            "portuguese_stop",
+            "portuguese_keywords",
+            "portuguese_stemmer"
+          ]
+        }
+      }
+    }
+  },
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "portuguese_analyzer"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Generated tokens
+
+Use the following request to examine the tokens generated using the analyzer:
+
+```json
+POST /portuguese-index/_analyze
+{
+  "field": "content",
+  "text": "Os estudantes estudam nas universidades brasileiras. Seus números são 123456."
+}
+```
+{% include copy-curl.html %}
+
+The response contains the generated tokens:
+
+```json
+{
+  "tokens": [
+    {
+      "token": "estudant",
+      "start_offset": 3,
+      "end_offset": 13,
+      "type": "<ALPHANUM>",
+      "position": 1
+    },
+    {
+      "token": "estudam",
+      "start_offset": 14,
+      "end_offset": 21,
+      "type": "<ALPHANUM>",
+      "position": 2
+    },
+    {
+      "token": "universidad",
+      "start_offset": 26,
+      "end_offset": 39,
+      "type": "<ALPHANUM>",
+      "position": 4
+    },
+    {
+      "token": "brasileir",
+      "start_offset": 40,
+      "end_offset": 51,
+      "type": "<ALPHANUM>",
+      "position": 5
+    },
+    {
+      "token": "numer",
+      "start_offset": 58,
+      "end_offset": 65,
+      "type": "<ALPHANUM>",
+      "position": 7
+    },
+    {
+      "token": "123456",
+      "start_offset": 70,
+      "end_offset": 76,
+      "type": "<NUM>",
+      "position": 9
+    }
+  ]
+}
+```
\ No newline at end of file
diff --git a/_analyzers/language-analyzers/romanian.md b/_analyzers/language-analyzers/romanian.md
new file mode 100644
index 0000000000..9b5c909665
--- /dev/null
+++ b/_analyzers/language-analyzers/romanian.md
@@ -0,0 +1,172 @@
+---
+layout: default
+title: Romanian
+parent: Language analyzers
+grand_parent: Analyzers
+nav_order: 270
+---
+
+# Romanian analyzer
+
+The built-in `romanian` analyzer can be applied to a text field using the following command:
+
+```json
+PUT /romanian-index
+{
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "romanian"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Stem exclusion
+
+You can also use `stem_exclusion` with this language analyzer using the following command:
+
+```json
+PUT index_with_stem_exclusion_romanian_analyzer
+{
+  "settings": {
+    "analysis": {
+      "analyzer": {
+        "stem_exclusion_romanian_analyzer": {
+          "type": "romanian",
+          "stem_exclusion": ["autoritate", "aprobat"]
+        }
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Romanian analyzer internals
+
+The `romanian` analyzer is build using the following:
+
+Tokenizer: `standard`
+
+Token Filters:
+- lowercase
+- stop (Romanian)
+- keyword
+- stemmer (Romanian)
+
+## Custom Romanian analyzer
+
+You can create custom Romanian analyzer using the following command:
+
+```json
+PUT /romanian-index
+{
+  "settings": {
+    "analysis": {
+      "filter": {
+        "romanian_stop": {
+          "type": "stop",
+          "stopwords": "_romanian_"
+        },
+        "romanian_stemmer": {
+          "type": "stemmer",
+          "language": "romanian"
+        },
+        "romanian_keywords": {
+          "type": "keyword_marker",
+          "keywords": []
+        }
+      },
+      "analyzer": {
+        "romanian_analyzer": {
+          "type": "custom",
+          "tokenizer": "standard",
+          "filter": [
+            "lowercase",
+            "romanian_stop",
+            "romanian_keywords",
+            "romanian_stemmer"
+          ]
+        }
+      }
+    }
+  },
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "romanian_analyzer"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Generated tokens
+
+Use the following request to examine the tokens generated using the analyzer:
+
+```json
+POST /romanian-index/_analyze
+{
+  "field": "content",
+  "text": "Studenții învață la universitățile din România. Numerele lor sunt 123456."
+}
+```
+{% include copy-curl.html %}
+
+The response contains the generated tokens:
+
+```json
+{
+  "tokens": [
+    {
+      "token": "studenț",
+      "start_offset": 0,
+      "end_offset": 9,
+      "type": "<ALPHANUM>",
+      "position": 0
+    },
+    {
+      "token": "învaț",
+      "start_offset": 10,
+      "end_offset": 16,
+      "type": "<ALPHANUM>",
+      "position": 1
+    },
+    {
+      "token": "universităț",
+      "start_offset": 20,
+      "end_offset": 34,
+      "type": "<ALPHANUM>",
+      "position": 3
+    },
+    {
+      "token": "român",
+      "start_offset": 39,
+      "end_offset": 46,
+      "type": "<ALPHANUM>",
+      "position": 5
+    },
+    {
+      "token": "numer",
+      "start_offset": 48,
+      "end_offset": 56,
+      "type": "<ALPHANUM>",
+      "position": 6
+    },
+    {
+      "token": "123456",
+      "start_offset": 66,
+      "end_offset": 72,
+      "type": "<NUM>",
+      "position": 9
+    }
+  ]
+}
+```
\ No newline at end of file
diff --git a/_analyzers/language-analyzers/russian.md b/_analyzers/language-analyzers/russian.md
new file mode 100644
index 0000000000..9552bce9da
--- /dev/null
+++ b/_analyzers/language-analyzers/russian.md
@@ -0,0 +1,172 @@
+---
+layout: default
+title: Russian
+parent: Language analyzers
+grand_parent: Analyzers
+nav_order: 280
+---
+
+# Russian analyzer
+
+The built-in `russian` analyzer can be applied to a text field using the following command:
+
+```json
+PUT /russian-index
+{
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "russian"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Stem exclusion
+
+You can also use `stem_exclusion` with this language analyzer using the following command:
+
+```json
+PUT index_with_stem_exclusion_russian_analyzer
+{
+  "settings": {
+    "analysis": {
+      "analyzer": {
+        "stem_exclusion_russian_analyzer": {
+          "type": "russian",
+          "stem_exclusion": ["авторитет", "одобрение"]
+        }
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Russian analyzer internals
+
+The `russian` analyzer is build using the following:
+
+Tokenizer: `standard`
+
+Token Filters:
+- lowercase
+- stop (Russian)
+- keyword
+- stemmer (Russian)
+
+## Custom Russian analyzer
+
+You can create custom Russian analyzer using the following command:
+
+```json
+PUT /russian-index
+{
+  "settings": {
+    "analysis": {
+      "filter": {
+        "russian_stop": {
+          "type": "stop",
+          "stopwords": "_russian_"
+        },
+        "russian_stemmer": {
+          "type": "stemmer",
+          "language": "russian"
+        },
+        "russian_keywords": {
+          "type": "keyword_marker",
+          "keywords": []
+        }
+      },
+      "analyzer": {
+        "russian_analyzer": {
+          "type": "custom",
+          "tokenizer": "standard",
+          "filter": [
+            "lowercase",
+            "russian_stop",
+            "russian_keywords",
+            "russian_stemmer"
+          ]
+        }
+      }
+    }
+  },
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "russian_analyzer"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Generated tokens
+
+Use the following request to examine the tokens generated using the analyzer:
+
+```json
+POST /russian-index/_analyze
+{
+  "field": "content",
+  "text": "Студенты учатся в университетах России. Их номера 123456."
+}
+```
+{% include copy-curl.html %}
+
+The response contains the generated tokens:
+
+```json
+{
+  "tokens": [
+    {
+      "token": "студент",
+      "start_offset": 0,
+      "end_offset": 8,
+      "type": "<ALPHANUM>",
+      "position": 0
+    },
+    {
+      "token": "учат",
+      "start_offset": 9,
+      "end_offset": 15,
+      "type": "<ALPHANUM>",
+      "position": 1
+    },
+    {
+      "token": "университет",
+      "start_offset": 18,
+      "end_offset": 31,
+      "type": "<ALPHANUM>",
+      "position": 3
+    },
+    {
+      "token": "росс",
+      "start_offset": 32,
+      "end_offset": 38,
+      "type": "<ALPHANUM>",
+      "position": 4
+    },
+    {
+      "token": "номер",
+      "start_offset": 43,
+      "end_offset": 49,
+      "type": "<ALPHANUM>",
+      "position": 6
+    },
+    {
+      "token": "123456",
+      "start_offset": 50,
+      "end_offset": 56,
+      "type": "<NUM>",
+      "position": 7
+    }
+  ]
+}
+```
\ No newline at end of file
diff --git a/_analyzers/language-analyzers/sorani.md b/_analyzers/language-analyzers/sorani.md
new file mode 100644
index 0000000000..df44ea5f27
--- /dev/null
+++ b/_analyzers/language-analyzers/sorani.md
@@ -0,0 +1,168 @@
+---
+layout: default
+title: Sorani
+parent: Language analyzers
+grand_parent: Analyzers
+nav_order: 290
+---
+
+# Sorani analyzer
+
+The built-in `sorani` analyzer can be applied to a text field using the following command:
+
+```json
+PUT /sorani-index
+{
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "sorani"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Stem exclusion
+
+You can also use `stem_exclusion` with this language analyzer using the following command:
+
+```json
+PUT index_with_stem_exclusion_sorani_analyzer
+{
+  "settings": {
+    "analysis": {
+      "analyzer": {
+        "stem_exclusion_sorani_analyzer": {
+          "type": "sorani",
+          "stem_exclusion": ["مؤسسه", "اجازه"]
+        }
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Sorani analyzer internals
+
+The `sorani` analyzer is build using the following:
+
+Tokenizer: `standard`
+
+Token Filters:
+- normalization (Sorani)
+- lowercase
+- decimal_digit
+- stop (Sorani)
+- keyword
+- stemmer (Sorani)
+
+## Custom Sorani analyzer
+
+You can create custom Sorani analyzer using the following command:
+
+```json
+PUT /sorani-index
+{
+  "settings": {
+    "analysis": {
+      "filter": {
+        "sorani_stop": {
+          "type": "stop",
+          "stopwords": "_sorani_"
+        },
+        "sorani_stemmer": {
+          "type": "stemmer",
+          "language": "sorani"
+        },
+        "sorani_keywords": {
+          "type": "keyword_marker",
+          "keywords": []
+        }
+      },
+      "analyzer": {
+        "sorani_analyzer": {
+          "type": "custom",
+          "tokenizer": "standard",
+          "filter": [
+            "lowercase",
+            "decimal_digit",
+            "sorani_stop",
+            "sorani_keywords",
+            "sorani_stemmer"
+          ]
+        }
+      }
+    }
+  },
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "sorani_analyzer"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Generated tokens
+
+Use the following request to examine the tokens generated using the analyzer:
+
+```json
+POST /sorani-index/_analyze
+{
+  "field": "content",
+  "text": "خوێندنی فەرمی لە هەولێرەوە. ژمارەکان ١٢٣٤٥٦."
+}
+```
+{% include copy-curl.html %}
+
+The response contains the generated tokens:
+
+```json
+{
+  "tokens": [
+    {
+      "token": "خوێندن",
+      "start_offset": 0,
+      "end_offset": 7,
+      "type": "<ALPHANUM>",
+      "position": 0
+    },
+    {
+      "token": "فەرم",
+      "start_offset": 8,
+      "end_offset": 13,
+      "type": "<ALPHANUM>",
+      "position": 1
+    },
+    {
+      "token": "هەولێر",
+      "start_offset": 17,
+      "end_offset": 26,
+      "type": "<ALPHANUM>",
+      "position": 3
+    },
+    {
+      "token": "ژمار",
+      "start_offset": 28,
+      "end_offset": 36,
+      "type": "<ALPHANUM>",
+      "position": 4
+    },
+    {
+      "token": "123456",
+      "start_offset": 37,
+      "end_offset": 43,
+      "type": "<NUM>",
+      "position": 5
+    }
+  ]
+}
+```
\ No newline at end of file
diff --git a/_analyzers/language-analyzers/spanish.md b/_analyzers/language-analyzers/spanish.md
new file mode 100644
index 0000000000..98ded27b83
--- /dev/null
+++ b/_analyzers/language-analyzers/spanish.md
@@ -0,0 +1,172 @@
+---
+layout: default
+title: Spanish
+parent: Language analyzers
+grand_parent: Analyzers
+nav_order: 300
+---
+
+# Spanish analyzer
+
+The built-in `spanish` analyzer can be applied to a text field using the following command:
+
+```json
+PUT /spanish-index
+{
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "spanish"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Stem exclusion
+
+You can also use `stem_exclusion` with this language analyzer using the following command:
+
+```json
+PUT index_with_stem_exclusion_spanish_analyzer
+{
+  "settings": {
+    "analysis": {
+      "analyzer": {
+        "stem_exclusion_spanish_analyzer": {
+          "type": "spanish",
+          "stem_exclusion": ["autoridad", "aprobación"]
+        }
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Spanish analyzer internals
+
+The `spanish` analyzer is build using the following:
+
+Tokenizer: `standard`
+
+Token Filters:
+- lowercase
+- stop (Spanish)
+- keyword
+- stemmer (Spanish)
+
+## Custom Spanish analyzer
+
+You can create custom Spanish analyzer using the following command:
+
+```json
+PUT /spanish-index
+{
+  "settings": {
+    "analysis": {
+      "filter": {
+        "spanish_stop": {
+          "type": "stop",
+          "stopwords": "_spanish_"
+        },
+        "spanish_stemmer": {
+          "type": "stemmer",
+          "language": "light_spanish"
+        },
+        "spanish_keywords": {
+          "type": "keyword_marker",
+          "keywords": []
+        }
+      },
+      "analyzer": {
+        "spanish_analyzer": {
+          "type": "custom",
+          "tokenizer": "standard",
+          "filter": [
+            "lowercase",
+            "spanish_stop",
+            "spanish_keywords",
+            "spanish_stemmer"
+          ]
+        }
+      }
+    }
+  },
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "spanish_analyzer"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Generated tokens
+
+Use the following request to examine the tokens generated using the analyzer:
+
+```json
+POST /spanish-index/_analyze
+{
+  "field": "content",
+  "text": "Los estudiantes estudian en universidades españolas. Sus números son 123456."
+}
+```
+{% include copy-curl.html %}
+
+The response contains the generated tokens:
+
+```json
+{
+  "tokens": [
+    {
+      "token": "estudiant",
+      "start_offset": 4,
+      "end_offset": 15,
+      "type": "<ALPHANUM>",
+      "position": 1
+    },
+    {
+      "token": "estudian",
+      "start_offset": 16,
+      "end_offset": 24,
+      "type": "<ALPHANUM>",
+      "position": 2
+    },
+    {
+      "token": "universidad",
+      "start_offset": 28,
+      "end_offset": 41,
+      "type": "<ALPHANUM>",
+      "position": 4
+    },
+    {
+      "token": "español",
+      "start_offset": 42,
+      "end_offset": 51,
+      "type": "<ALPHANUM>",
+      "position": 5
+    },
+    {
+      "token": "numer",
+      "start_offset": 57,
+      "end_offset": 64,
+      "type": "<ALPHANUM>",
+      "position": 7
+    },
+    {
+      "token": "123456",
+      "start_offset": 69,
+      "end_offset": 75,
+      "type": "<NUM>",
+      "position": 9
+    }
+  ]
+}
+```
\ No newline at end of file
diff --git a/_analyzers/language-analyzers/swedish.md b/_analyzers/language-analyzers/swedish.md
new file mode 100644
index 0000000000..67decf2344
--- /dev/null
+++ b/_analyzers/language-analyzers/swedish.md
@@ -0,0 +1,172 @@
+---
+layout: default
+title: Swedish
+parent: Language analyzers
+grand_parent: Analyzers
+nav_order: 310
+---
+
+# Swedish analyzer
+
+The built-in `swedish` analyzer can be applied to a text field using the following command:
+
+```json
+PUT /swedish-index
+{
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "swedish"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Stem exclusion
+
+You can also use `stem_exclusion` with this language analyzer using the following command:
+
+```json
+PUT index_with_stem_exclusion_swedish_analyzer
+{
+  "settings": {
+    "analysis": {
+      "analyzer": {
+        "stem_exclusion_swedish_analyzer": {
+          "type": "swedish",
+          "stem_exclusion": ["myndighet", "godkännande"]
+        }
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Swedish analyzer internals
+
+The `swedish` analyzer is build using the following:
+
+Tokenizer: `standard`
+
+Token Filters:
+- lowercase
+- stop (Swedish)
+- keyword
+- stemmer (Swedish)
+
+## Custom Swedish analyzer
+
+You can create custom Swedish analyzer using the following command:
+
+```json
+PUT /swedish-index
+{
+  "settings": {
+    "analysis": {
+      "filter": {
+        "swedish_stop": {
+          "type": "stop",
+          "stopwords": "_swedish_"
+        },
+        "swedish_stemmer": {
+          "type": "stemmer",
+          "language": "swedish"
+        },
+        "swedish_keywords": {
+          "type": "keyword_marker",
+          "keywords": []
+        }
+      },
+      "analyzer": {
+        "swedish_analyzer": {
+          "type": "custom",
+          "tokenizer": "standard",
+          "filter": [
+            "lowercase",
+            "swedish_stop",
+            "swedish_keywords",
+            "swedish_stemmer"
+          ]
+        }
+      }
+    }
+  },
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "swedish_analyzer"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Generated tokens
+
+Use the following request to examine the tokens generated using the analyzer:
+
+```json
+POST /swedish-index/_analyze
+{
+  "field": "content",
+  "text": "Studenter studerar vid svenska universitet. Deras nummer är 123456."
+}
+```
+{% include copy-curl.html %}
+
+The response contains the generated tokens:
+
+```json
+{
+  "tokens": [
+    {
+      "token": "student",
+      "start_offset": 0,
+      "end_offset": 9,
+      "type": "<ALPHANUM>",
+      "position": 0
+    },
+    {
+      "token": "studer",
+      "start_offset": 10,
+      "end_offset": 18,
+      "type": "<ALPHANUM>",
+      "position": 1
+    },
+    {
+      "token": "svensk",
+      "start_offset": 23,
+      "end_offset": 30,
+      "type": "<ALPHANUM>",
+      "position": 3
+    },
+    {
+      "token": "universitet",
+      "start_offset": 31,
+      "end_offset": 42,
+      "type": "<ALPHANUM>",
+      "position": 4
+    },
+    {
+      "token": "numm",
+      "start_offset": 50,
+      "end_offset": 56,
+      "type": "<ALPHANUM>",
+      "position": 6
+    },
+    {
+      "token": "123456",
+      "start_offset": 60,
+      "end_offset": 66,
+      "type": "<NUM>",
+      "position": 8
+    }
+  ]
+}
+```
\ No newline at end of file
diff --git a/_analyzers/language-analyzers/thai.md b/_analyzers/language-analyzers/thai.md
new file mode 100644
index 0000000000..f251067dc0
--- /dev/null
+++ b/_analyzers/language-analyzers/thai.md
@@ -0,0 +1,132 @@
+---
+layout: default
+title: Thai
+parent: Language analyzers
+grand_parent: Analyzers
+nav_order: 320
+---
+
+# Thai analyzer
+
+The built-in `thai` analyzer can be applied to a text field using the following command:
+
+```json
+PUT /thai-index
+{
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "thai"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Stem exclusion
+
+You can also use `stem_exclusion` with this language analyzer using the following command:
+
+```json
+PUT index_with_stem_exclusion_thai_analyzer
+{
+  "settings": {
+    "analysis": {
+      "analyzer": {
+        "stem_exclusion_thai_analyzer": {
+          "type": "thai",
+          "stem_exclusion": ["อำนาจ", "การอนุมัติ"]
+        }
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Thai analyzer internals
+
+The `thai` analyzer is build using the following:
+
+Tokenizer: `thai`
+
+Token Filters:
+- lowercase
+- decimal_digit
+- stop (Thai)
+- keyword
+
+## Custom Thai analyzer
+
+You can create custom Thai analyzer using the following command:
+
+```json
+PUT /thai-index
+{
+  "settings": {
+    "analysis": {
+      "filter": {
+        "thai_stop": {
+          "type": "stop",
+          "stopwords": "_thai_"
+        },
+        "thai_keywords": {
+          "type": "keyword_marker",
+          "keywords": []
+        }
+      },
+      "analyzer": {
+        "thai_analyzer": {
+          "tokenizer": "thai",
+          "filter": [
+            "lowercase",
+            "decimal_digit",
+            "thai_stop",
+            "thai_keywords"
+          ]
+        }
+      }
+    }
+  },
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "thai_analyzer"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Generated tokens
+
+Use the following request to examine the tokens generated using the analyzer:
+
+```json
+POST /thai-index/_analyze
+{
+  "field": "content",
+  "text": "นักเรียนกำลังศึกษาอยู่ที่มหาวิทยาลัยไทย หมายเลข 123456."
+}
+```
+{% include copy-curl.html %}
+
+The response contains the generated tokens:
+
+```json
+{
+  "tokens": [
+    {"token": "นักเรียน","start_offset": 0,"end_offset": 8,"type": "word","position": 0},
+    {"token": "กำลัง","start_offset": 8,"end_offset": 13,"type": "word","position": 1},
+    {"token": "ศึกษา","start_offset": 13,"end_offset": 18,"type": "word","position": 2},
+    {"token": "มหาวิทยาลัย","start_offset": 25,"end_offset": 36,"type": "word","position": 5},
+    {"token": "ไทย","start_offset": 36,"end_offset": 39,"type": "word","position": 6},
+    {"token": "หมายเลข","start_offset": 40,"end_offset": 47,"type": "word","position": 7},
+    {"token": "123456","start_offset": 48,"end_offset": 54,"type": "word","position": 8}
+  ]
+}
+```
\ No newline at end of file
diff --git a/_analyzers/language-analyzers/turkish.md b/_analyzers/language-analyzers/turkish.md
new file mode 100644
index 0000000000..9255682322
--- /dev/null
+++ b/_analyzers/language-analyzers/turkish.md
@@ -0,0 +1,143 @@
+---
+layout: default
+title: Turkish
+parent: Language analyzers
+grand_parent: Analyzers
+nav_order: 330
+---
+
+# Turkish analyzer
+
+The built-in `turkish` analyzer can be applied to a text field using the following command:
+
+```json
+PUT /turkish-index
+{
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "turkish"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Stem exclusion
+
+You can also use `stem_exclusion` with this language analyzer using the following command:
+
+```json
+PUT index_with_stem_exclusion_turkish_analyzer
+{
+  "settings": {
+    "analysis": {
+      "analyzer": {
+        "stem_exclusion_turkish_analyzer": {
+          "type": "turkish",
+          "stem_exclusion": ["otorite", "onay"]
+        }
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Turkish analyzer internals
+
+The `turkish` analyzer is build using the following:
+
+Tokenizer: `standard`
+
+Token Filters:
+- apostrophe
+- lowercase (Turkish)
+- stop (Turkish)
+- keyword
+- stemmer (Turkish)
+
+## Custom Turkish analyzer
+
+You can create custom Turkish analyzer using the following command:
+
+```json
+PUT /turkish-index
+{
+  "settings": {
+    "analysis": {
+      "filter": {
+        "turkish_stop": {
+          "type": "stop",
+          "stopwords": "_turkish_"
+        },
+        "turkish_stemmer": {
+          "type": "stemmer",
+          "language": "turkish"
+        },
+        "turkish_lowercase": {
+          "type":       "lowercase",
+          "language":   "turkish"
+        },
+        "turkish_keywords": {
+          "type": "keyword_marker",
+          "keywords": []
+        }
+      },
+      "analyzer": {
+        "turkish_analyzer": {
+          "type": "custom",
+          "tokenizer": "standard",
+          "filter": [
+            "apostrophe",
+            "turkish_lowercase",
+            "turkish_stop",
+            "turkish_keywords",
+            "turkish_stemmer"
+          ]
+        }
+      }
+    }
+  },
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text",
+        "analyzer": "turkish_analyzer"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Generated tokens
+
+Use the following request to examine the tokens generated using the analyzer:
+
+```json
+POST /turkish-index/_analyze
+{
+  "field": "content",
+  "text": "Öğrenciler Türk üniversitelerinde öğrenim görüyor. Numara 123456."
+}
+```
+{% include copy-curl.html %}
+
+The response contains the generated tokens:
+
+```json
+{
+  "tokens": [
+    {"token": "öğrenci","start_offset": 0,"end_offset": 10,"type": "<ALPHANUM>","position": 0},
+    {"token": "türk","start_offset": 11,"end_offset": 15,"type": "<ALPHANUM>","position": 1},
+    {"token": "üniversite","start_offset": 16,"end_offset": 33,"type": "<ALPHANUM>","position": 2},
+    {"token": "öğre","start_offset": 34,"end_offset": 41,"type": "<ALPHANUM>","position": 3},
+    {"token": "görüyor","start_offset": 42,"end_offset": 49,"type": "<ALPHANUM>","position": 4},
+    {"token": "numar","start_offset": 51,"end_offset": 57,"type": "<ALPHANUM>","position": 5},
+    {"token": "123456","start_offset": 58,"end_offset": 64,"type": "<NUM>","position": 6}
+  ]
+}
+```
\ No newline at end of file

From 3fc50b1498325c351f6c416654703970ecb6122b Mon Sep 17 00:00:00 2001
From: AntonEliatra <anton.rubin@eliatra.com>
Date: Thu, 7 Nov 2024 11:38:06 +0000
Subject: [PATCH 10/15] Apply suggestions from code review

Co-authored-by: kolchfa-aws <105444904+kolchfa-aws@users.noreply.github.com>
Signed-off-by: AntonEliatra <anton.rubin@eliatra.com>
---
 _analyzers/language-analyzers/arabic.md     | 10 +++++-----
 _analyzers/language-analyzers/armenian.md   |  2 +-
 _analyzers/language-analyzers/basque.md     |  2 +-
 _analyzers/language-analyzers/bengali.md    |  2 +-
 _analyzers/language-analyzers/brazilian.md  |  2 +-
 _analyzers/language-analyzers/bulgarian.md  |  2 +-
 _analyzers/language-analyzers/catalan.md    |  2 +-
 _analyzers/language-analyzers/cjk.md        |  2 +-
 _analyzers/language-analyzers/czech.md      |  2 +-
 _analyzers/language-analyzers/danish.md     |  2 +-
 _analyzers/language-analyzers/dutch.md      |  2 +-
 _analyzers/language-analyzers/english.md    |  2 +-
 _analyzers/language-analyzers/estonian.md   |  2 +-
 _analyzers/language-analyzers/finnish.md    |  2 +-
 _analyzers/language-analyzers/french.md     |  2 +-
 _analyzers/language-analyzers/galician.md   |  2 +-
 _analyzers/language-analyzers/german.md     |  2 +-
 _analyzers/language-analyzers/greek.md      |  2 +-
 _analyzers/language-analyzers/hindi.md      |  2 +-
 _analyzers/language-analyzers/hungarian.md  |  2 +-
 _analyzers/language-analyzers/index.md      | 14 +++++++-------
 _analyzers/language-analyzers/indonesian.md |  2 +-
 _analyzers/language-analyzers/irish.md      |  2 +-
 _analyzers/language-analyzers/italian.md    |  2 +-
 _analyzers/language-analyzers/latvian.md    |  2 +-
 _analyzers/language-analyzers/lithuanian.md |  2 +-
 _analyzers/language-analyzers/norwegian.md  |  2 +-
 _analyzers/language-analyzers/persian.md    |  2 +-
 _analyzers/language-analyzers/portuguese.md |  2 +-
 _analyzers/language-analyzers/romanian.md   |  2 +-
 _analyzers/language-analyzers/russian.md    |  2 +-
 _analyzers/language-analyzers/sorani.md     |  2 +-
 _analyzers/language-analyzers/spanish.md    |  2 +-
 _analyzers/language-analyzers/swedish.md    |  2 +-
 _analyzers/language-analyzers/thai.md       |  2 +-
 _analyzers/language-analyzers/turkish.md    |  2 +-
 36 files changed, 46 insertions(+), 46 deletions(-)

diff --git a/_analyzers/language-analyzers/arabic.md b/_analyzers/language-analyzers/arabic.md
index 2bbfb81140..64671d3b0c 100644
--- a/_analyzers/language-analyzers/arabic.md
+++ b/_analyzers/language-analyzers/arabic.md
@@ -27,7 +27,7 @@ PUT /arabic-index
 
 ## Stem exclusion
 
-You can also use `stem_exclusion` with this language analyzer using the following command:
+You can use `stem_exclusion` with this language analyzer using the following command:
 
 ```json
 PUT index_with_stem_exclusion_arabic
@@ -48,11 +48,11 @@ PUT index_with_stem_exclusion_arabic
 
 ## Arabic analyzer internals
 
-The `arabic` analyzer is build using the following:
+The `arabic` analyzer is built using the following components:
 
-Tokenizer: `standard`
+- Tokenizer: `standard`
 
-Token filters:
+- Token filters:
 - lowercase
 - decimal_digit
 - stop (Arabic)
@@ -62,7 +62,7 @@ Token filters:
 
 ## Custom Arabic analyzer
 
-You can create custom Arabic analyzer using the following command:
+You can create a custom Arabic analyzer using the following command:
 
 ```json
 PUT /arabic-index
diff --git a/_analyzers/language-analyzers/armenian.md b/_analyzers/language-analyzers/armenian.md
index 9355a49d05..38810533e1 100644
--- a/_analyzers/language-analyzers/armenian.md
+++ b/_analyzers/language-analyzers/armenian.md
@@ -27,7 +27,7 @@ PUT /arabic-index
 
 ## Stem exclusion
 
-You can also use `stem_exclusion` with this language analyzer using the following command:
+You can use `stem_exclusion` with this language analyzer using the following command:
 
 ```json
 PUT index_with_stem_exclusion_armenian_analyzer
diff --git a/_analyzers/language-analyzers/basque.md b/_analyzers/language-analyzers/basque.md
index ada0b95cf5..47e71b43e3 100644
--- a/_analyzers/language-analyzers/basque.md
+++ b/_analyzers/language-analyzers/basque.md
@@ -27,7 +27,7 @@ PUT /basque-index
 
 ## Stem exclusion
 
-You can also use `stem_exclusion` with this language analyzer using the following command:
+You can use `stem_exclusion` with this language analyzer using the following command:
 
 ```json
 PUT index_with_stem_exclusion_basque_analyzer
diff --git a/_analyzers/language-analyzers/bengali.md b/_analyzers/language-analyzers/bengali.md
index ec3f7f0ac5..8107ce7dfe 100644
--- a/_analyzers/language-analyzers/bengali.md
+++ b/_analyzers/language-analyzers/bengali.md
@@ -27,7 +27,7 @@ PUT /bengali-index
 
 ## Stem exclusion
 
-You can also use `stem_exclusion` with this language analyzer using the following command:
+You can use `stem_exclusion` with this language analyzer using the following command:
 
 ```json
 PUT index_with_stem_exclusion_bengali_analyzer
diff --git a/_analyzers/language-analyzers/brazilian.md b/_analyzers/language-analyzers/brazilian.md
index 3e6eb3f89d..925e559ac0 100644
--- a/_analyzers/language-analyzers/brazilian.md
+++ b/_analyzers/language-analyzers/brazilian.md
@@ -27,7 +27,7 @@ PUT /brazilian-index
 
 ## Stem exclusion
 
-You can also use `stem_exclusion` with this language analyzer using the following command:
+You can use `stem_exclusion` with this language analyzer using the following command:
 
 ```json
 PUT index_with_stem_exclusion_brazilian_analyzer
diff --git a/_analyzers/language-analyzers/bulgarian.md b/_analyzers/language-analyzers/bulgarian.md
index 682430717f..496ac086b7 100644
--- a/_analyzers/language-analyzers/bulgarian.md
+++ b/_analyzers/language-analyzers/bulgarian.md
@@ -27,7 +27,7 @@ PUT /bulgarian-index
 
 ## Stem exclusion
 
-You can also use `stem_exclusion` with this language analyzer using the following command:
+You can use `stem_exclusion` with this language analyzer using the following command:
 
 ```json
 PUT index_with_stem_exclusion_bulgarian_analyzer
diff --git a/_analyzers/language-analyzers/catalan.md b/_analyzers/language-analyzers/catalan.md
index 7a2c2e690b..d6a9b1c8b2 100644
--- a/_analyzers/language-analyzers/catalan.md
+++ b/_analyzers/language-analyzers/catalan.md
@@ -27,7 +27,7 @@ PUT /catalan-index
 
 ## Stem exclusion
 
-You can also use `stem_exclusion` with this language analyzer using the following command:
+You can use `stem_exclusion` with this language analyzer using the following command:
 
 ```json
 PUT index_with_stem_exclusion_catalan_analyzer
diff --git a/_analyzers/language-analyzers/cjk.md b/_analyzers/language-analyzers/cjk.md
index 8547a3156f..31dc917e99 100644
--- a/_analyzers/language-analyzers/cjk.md
+++ b/_analyzers/language-analyzers/cjk.md
@@ -27,7 +27,7 @@ PUT /cjk-index
 
 ## Stem exclusion
 
-You can also use `stem_exclusion` with this language analyzer using the following command:
+You can use `stem_exclusion` with this language analyzer using the following command:
 
 ```json
 PUT index_with_stem_exclusion_cjk_analyzer
diff --git a/_analyzers/language-analyzers/czech.md b/_analyzers/language-analyzers/czech.md
index b7725920e3..3c1fbd9c9c 100644
--- a/_analyzers/language-analyzers/czech.md
+++ b/_analyzers/language-analyzers/czech.md
@@ -27,7 +27,7 @@ PUT /czech-index
 
 ## Stem exclusion
 
-You can also use `stem_exclusion` with this language analyzer using the following command:
+You can use `stem_exclusion` with this language analyzer using the following command:
 
 ```json
 PUT index_with_stem_exclusion_czech_analyzer
diff --git a/_analyzers/language-analyzers/danish.md b/_analyzers/language-analyzers/danish.md
index 652aedb3b7..c9ccc0c01e 100644
--- a/_analyzers/language-analyzers/danish.md
+++ b/_analyzers/language-analyzers/danish.md
@@ -27,7 +27,7 @@ PUT /danish-index
 
 ## Stem exclusion
 
-You can also use `stem_exclusion` with this language analyzer using the following command:
+You can use `stem_exclusion` with this language analyzer using the following command:
 
 ```json
 PUT index_with_stem_exclusion_danish_analyzer
diff --git a/_analyzers/language-analyzers/dutch.md b/_analyzers/language-analyzers/dutch.md
index 5a4153702d..90ce69fa09 100644
--- a/_analyzers/language-analyzers/dutch.md
+++ b/_analyzers/language-analyzers/dutch.md
@@ -27,7 +27,7 @@ PUT /dutch-index
 
 ## Stem exclusion
 
-You can also use `stem_exclusion` with this language analyzer using the following command:
+You can use `stem_exclusion` with this language analyzer using the following command:
 
 ```json
 PUT index_with_stem_exclusion_dutch_analyzer
diff --git a/_analyzers/language-analyzers/english.md b/_analyzers/language-analyzers/english.md
index e266681030..fda095b912 100644
--- a/_analyzers/language-analyzers/english.md
+++ b/_analyzers/language-analyzers/english.md
@@ -27,7 +27,7 @@ PUT /english-index
 
 ## Stem exclusion
 
-You can also use `stem_exclusion` with this language analyzer using the following command:
+You can use `stem_exclusion` with this language analyzer using the following command:
 
 ```json
 PUT index_with_stem_exclusion_english_analyzer
diff --git a/_analyzers/language-analyzers/estonian.md b/_analyzers/language-analyzers/estonian.md
index d67c88d3b2..01961c2d46 100644
--- a/_analyzers/language-analyzers/estonian.md
+++ b/_analyzers/language-analyzers/estonian.md
@@ -27,7 +27,7 @@ PUT /estonian-index
 
 ## Stem exclusion
 
-You can also use `stem_exclusion` with this language analyzer using the following command:
+You can use `stem_exclusion` with this language analyzer using the following command:
 
 ```json
 PUT index_with_stem_exclusion_estonian_analyzer
diff --git a/_analyzers/language-analyzers/finnish.md b/_analyzers/language-analyzers/finnish.md
index 73c4eade5e..3ac753e5ea 100644
--- a/_analyzers/language-analyzers/finnish.md
+++ b/_analyzers/language-analyzers/finnish.md
@@ -27,7 +27,7 @@ PUT /finnish-index
 
 ## Stem exclusion
 
-You can also use `stem_exclusion` with this language analyzer using the following command:
+You can use `stem_exclusion` with this language analyzer using the following command:
 
 ```json
 PUT index_with_stem_exclusion_finnish_analyzer
diff --git a/_analyzers/language-analyzers/french.md b/_analyzers/language-analyzers/french.md
index 574be37ab5..278bfbb333 100644
--- a/_analyzers/language-analyzers/french.md
+++ b/_analyzers/language-analyzers/french.md
@@ -27,7 +27,7 @@ PUT /french-index
 
 ## Stem exclusion
 
-You can also use `stem_exclusion` with this language analyzer using the following command:
+You can use `stem_exclusion` with this language analyzer using the following command:
 
 ```json
 PUT index_with_stem_exclusion_french_analyzer
diff --git a/_analyzers/language-analyzers/galician.md b/_analyzers/language-analyzers/galician.md
index 75c789f1c2..515717bf3f 100644
--- a/_analyzers/language-analyzers/galician.md
+++ b/_analyzers/language-analyzers/galician.md
@@ -27,7 +27,7 @@ PUT /galician-index
 
 ## Stem exclusion
 
-You can also use `stem_exclusion` with this language analyzer using the following command:
+You can use `stem_exclusion` with this language analyzer using the following command:
 
 ```json
 PUT index_with_stem_exclusion_galician_analyzer
diff --git a/_analyzers/language-analyzers/german.md b/_analyzers/language-analyzers/german.md
index ed9bb19229..1e679aca0a 100644
--- a/_analyzers/language-analyzers/german.md
+++ b/_analyzers/language-analyzers/german.md
@@ -27,7 +27,7 @@ PUT /german-index
 
 ## Stem exclusion
 
-You can also use `stem_exclusion` with this language analyzer using the following command:
+You can use `stem_exclusion` with this language analyzer using the following command:
 
 ```json
 PUT index_with_stem_exclusion_german_analyzer
diff --git a/_analyzers/language-analyzers/greek.md b/_analyzers/language-analyzers/greek.md
index 94b9e5dddb..4b44d7014c 100644
--- a/_analyzers/language-analyzers/greek.md
+++ b/_analyzers/language-analyzers/greek.md
@@ -27,7 +27,7 @@ PUT /greek-index
 
 ## Stem exclusion
 
-You can also use `stem_exclusion` with this language analyzer using the following command:
+You can use `stem_exclusion` with this language analyzer using the following command:
 
 ```json
 PUT index_with_stem_exclusion_greek_analyzer
diff --git a/_analyzers/language-analyzers/hindi.md b/_analyzers/language-analyzers/hindi.md
index 14964bfa4d..b1fdabb2b6 100644
--- a/_analyzers/language-analyzers/hindi.md
+++ b/_analyzers/language-analyzers/hindi.md
@@ -27,7 +27,7 @@ PUT /hindi-index
 
 ## Stem exclusion
 
-You can also use `stem_exclusion` with this language analyzer using the following command:
+You can use `stem_exclusion` with this language analyzer using the following command:
 
 ```json
 PUT index_with_stem_exclusion_hindi_analyzer
diff --git a/_analyzers/language-analyzers/hungarian.md b/_analyzers/language-analyzers/hungarian.md
index f1851edf95..83330eb708 100644
--- a/_analyzers/language-analyzers/hungarian.md
+++ b/_analyzers/language-analyzers/hungarian.md
@@ -27,7 +27,7 @@ PUT /hungarian-index
 
 ## Stem exclusion
 
-You can also use `stem_exclusion` with this language analyzer using the following command:
+You can use `stem_exclusion` with this language analyzer using the following command:
 
 ```json
 PUT index_with_stem_exclusion_hungarian_analyzer
diff --git a/_analyzers/language-analyzers/index.md b/_analyzers/language-analyzers/index.md
index c69337f3a9..639ff28502 100644
--- a/_analyzers/language-analyzers/index.md
+++ b/_analyzers/language-analyzers/index.md
@@ -41,13 +41,13 @@ PUT my-index
 }
 ```
 
-## stem_exclusion
+## Stem exclusion
 
-The `stem_exclusion` feature can be applied to many language analyzers by providing a list of lowercase words that should be excluded from stemming. Internally, OpenSearch uses the `keyword_marker` token filter to mark these words as keywords, ensuring they are not stemmed.
+You can apply stem exclusion to many language analyzers by providing a list of lowercase words that should be excluded from stemming. Internally, OpenSearch uses the `keyword_marker` token filter to mark these words as keywords, ensuring they are not stemmed.
 
-## Example stem_exclusion
+## Stem exclusion example
 
-You can use the following command to configure `stem_exclusion`:
+Use the following request to configure `stem_exclusion`:
 
 ```json
 PUT index_with_stem_exclusion_english_analyzer
@@ -66,7 +66,7 @@ PUT index_with_stem_exclusion_english_analyzer
 ```
 {% include copy-curl.html %}
 
-Following languages support `stem_exclusion`:
+The following languages support stem exclusion:
 
 - arabic 
 - armenian
@@ -101,9 +101,9 @@ Following languages support `stem_exclusion`:
 - turkish
 
 
-## stem_exclusion with custom analyzer
+## Stem exclusion with custom analyzers
 
-All language analyzers are made up from tokenizers and token filters specific to the particular language. If you want to implement a custom version of the language analyzer with `stem_exclusion`, you need to configure `keyword_marker` token filter and list the necessary words in `keywords` parameter, see the following example:
+All language analyzers consist of tokenizers and token filters specific to the particular language. If you want to implement a custom version of the language analyzer with stem exclusion, you need to configure the `keyword_marker` token filter and list the words excluded from stemming in the `keywords` parameter:
 
 ```json
 PUT index_with_keyword_marker_analyzer
diff --git a/_analyzers/language-analyzers/indonesian.md b/_analyzers/language-analyzers/indonesian.md
index feeef6254e..73b551cd9a 100644
--- a/_analyzers/language-analyzers/indonesian.md
+++ b/_analyzers/language-analyzers/indonesian.md
@@ -27,7 +27,7 @@ PUT /indonesian-index
 
 ## Stem exclusion
 
-You can also use `stem_exclusion` with this language analyzer using the following command:
+You can use `stem_exclusion` with this language analyzer using the following command:
 
 ```json
 PUT index_with_stem_exclusion_indonesian_analyzer
diff --git a/_analyzers/language-analyzers/irish.md b/_analyzers/language-analyzers/irish.md
index b914ba6b21..b4e25e57c8 100644
--- a/_analyzers/language-analyzers/irish.md
+++ b/_analyzers/language-analyzers/irish.md
@@ -27,7 +27,7 @@ PUT /irish-index
 
 ## Stem exclusion
 
-You can also use `stem_exclusion` with this language analyzer using the following command:
+You can use `stem_exclusion` with this language analyzer using the following command:
 
 ```json
 PUT index_with_stem_exclusion_irish_analyzer
diff --git a/_analyzers/language-analyzers/italian.md b/_analyzers/language-analyzers/italian.md
index 11113635ca..1fc1063efd 100644
--- a/_analyzers/language-analyzers/italian.md
+++ b/_analyzers/language-analyzers/italian.md
@@ -27,7 +27,7 @@ PUT /italian-index
 
 ## Stem exclusion
 
-You can also use `stem_exclusion` with this language analyzer using the following command:
+You can use `stem_exclusion` with this language analyzer using the following command:
 
 ```json
 PUT index_with_stem_exclusion_italian_analyzer
diff --git a/_analyzers/language-analyzers/latvian.md b/_analyzers/language-analyzers/latvian.md
index 820cb252b8..620f694c23 100644
--- a/_analyzers/language-analyzers/latvian.md
+++ b/_analyzers/language-analyzers/latvian.md
@@ -27,7 +27,7 @@ PUT /latvian-index
 
 ## Stem exclusion
 
-You can also use `stem_exclusion` with this language analyzer using the following command:
+You can use `stem_exclusion` with this language analyzer using the following command:
 
 ```json
 PUT index_with_stem_exclusion_latvian_analyzer
diff --git a/_analyzers/language-analyzers/lithuanian.md b/_analyzers/language-analyzers/lithuanian.md
index 55a94c9c1e..6d67dc2262 100644
--- a/_analyzers/language-analyzers/lithuanian.md
+++ b/_analyzers/language-analyzers/lithuanian.md
@@ -27,7 +27,7 @@ PUT /lithuanian-index
 
 ## Stem exclusion
 
-You can also use `stem_exclusion` with this language analyzer using the following command:
+You can use `stem_exclusion` with this language analyzer using the following command:
 
 ```json
 PUT index_with_stem_exclusion_lithuanian_analyzer
diff --git a/_analyzers/language-analyzers/norwegian.md b/_analyzers/language-analyzers/norwegian.md
index 92fbd9231e..5a00a27924 100644
--- a/_analyzers/language-analyzers/norwegian.md
+++ b/_analyzers/language-analyzers/norwegian.md
@@ -27,7 +27,7 @@ PUT /norwegian-index
 
 ## Stem exclusion
 
-You can also use `stem_exclusion` with this language analyzer using the following command:
+You can use `stem_exclusion` with this language analyzer using the following command:
 
 ```json
 PUT index_with_stem_exclusion_norwegian_analyzer
diff --git a/_analyzers/language-analyzers/persian.md b/_analyzers/language-analyzers/persian.md
index 57ea1ea796..1a335dd483 100644
--- a/_analyzers/language-analyzers/persian.md
+++ b/_analyzers/language-analyzers/persian.md
@@ -27,7 +27,7 @@ PUT /persian-index
 
 ## Stem exclusion
 
-You can also use `stem_exclusion` with this language analyzer using the following command:
+You can use `stem_exclusion` with this language analyzer using the following command:
 
 ```json
 PUT index_with_stem_exclusion_persian_analyzer
diff --git a/_analyzers/language-analyzers/portuguese.md b/_analyzers/language-analyzers/portuguese.md
index eb7b959c0b..301d043f7d 100644
--- a/_analyzers/language-analyzers/portuguese.md
+++ b/_analyzers/language-analyzers/portuguese.md
@@ -27,7 +27,7 @@ PUT /portuguese-index
 
 ## Stem exclusion
 
-You can also use `stem_exclusion` with this language analyzer using the following command:
+You can use `stem_exclusion` with this language analyzer using the following command:
 
 ```json
 PUT index_with_stem_exclusion_portuguese_analyzer
diff --git a/_analyzers/language-analyzers/romanian.md b/_analyzers/language-analyzers/romanian.md
index 9b5c909665..6795d01a4d 100644
--- a/_analyzers/language-analyzers/romanian.md
+++ b/_analyzers/language-analyzers/romanian.md
@@ -27,7 +27,7 @@ PUT /romanian-index
 
 ## Stem exclusion
 
-You can also use `stem_exclusion` with this language analyzer using the following command:
+You can use `stem_exclusion` with this language analyzer using the following command:
 
 ```json
 PUT index_with_stem_exclusion_romanian_analyzer
diff --git a/_analyzers/language-analyzers/russian.md b/_analyzers/language-analyzers/russian.md
index 9552bce9da..3a305ee051 100644
--- a/_analyzers/language-analyzers/russian.md
+++ b/_analyzers/language-analyzers/russian.md
@@ -27,7 +27,7 @@ PUT /russian-index
 
 ## Stem exclusion
 
-You can also use `stem_exclusion` with this language analyzer using the following command:
+You can use `stem_exclusion` with this language analyzer using the following command:
 
 ```json
 PUT index_with_stem_exclusion_russian_analyzer
diff --git a/_analyzers/language-analyzers/sorani.md b/_analyzers/language-analyzers/sorani.md
index df44ea5f27..760b7e46c6 100644
--- a/_analyzers/language-analyzers/sorani.md
+++ b/_analyzers/language-analyzers/sorani.md
@@ -27,7 +27,7 @@ PUT /sorani-index
 
 ## Stem exclusion
 
-You can also use `stem_exclusion` with this language analyzer using the following command:
+You can use `stem_exclusion` with this language analyzer using the following command:
 
 ```json
 PUT index_with_stem_exclusion_sorani_analyzer
diff --git a/_analyzers/language-analyzers/spanish.md b/_analyzers/language-analyzers/spanish.md
index 98ded27b83..a20d0fa509 100644
--- a/_analyzers/language-analyzers/spanish.md
+++ b/_analyzers/language-analyzers/spanish.md
@@ -27,7 +27,7 @@ PUT /spanish-index
 
 ## Stem exclusion
 
-You can also use `stem_exclusion` with this language analyzer using the following command:
+You can use `stem_exclusion` with this language analyzer using the following command:
 
 ```json
 PUT index_with_stem_exclusion_spanish_analyzer
diff --git a/_analyzers/language-analyzers/swedish.md b/_analyzers/language-analyzers/swedish.md
index 67decf2344..f70a0dbca1 100644
--- a/_analyzers/language-analyzers/swedish.md
+++ b/_analyzers/language-analyzers/swedish.md
@@ -27,7 +27,7 @@ PUT /swedish-index
 
 ## Stem exclusion
 
-You can also use `stem_exclusion` with this language analyzer using the following command:
+You can use `stem_exclusion` with this language analyzer using the following command:
 
 ```json
 PUT index_with_stem_exclusion_swedish_analyzer
diff --git a/_analyzers/language-analyzers/thai.md b/_analyzers/language-analyzers/thai.md
index f251067dc0..78c3d1250d 100644
--- a/_analyzers/language-analyzers/thai.md
+++ b/_analyzers/language-analyzers/thai.md
@@ -27,7 +27,7 @@ PUT /thai-index
 
 ## Stem exclusion
 
-You can also use `stem_exclusion` with this language analyzer using the following command:
+You can use `stem_exclusion` with this language analyzer using the following command:
 
 ```json
 PUT index_with_stem_exclusion_thai_analyzer
diff --git a/_analyzers/language-analyzers/turkish.md b/_analyzers/language-analyzers/turkish.md
index 9255682322..14a6f8e9b6 100644
--- a/_analyzers/language-analyzers/turkish.md
+++ b/_analyzers/language-analyzers/turkish.md
@@ -27,7 +27,7 @@ PUT /turkish-index
 
 ## Stem exclusion
 
-You can also use `stem_exclusion` with this language analyzer using the following command:
+You can use `stem_exclusion` with this language analyzer using the following command:
 
 ```json
 PUT index_with_stem_exclusion_turkish_analyzer

From 26fbb9b5a7f156e3eb0f403b6dd04e05fa4570fa Mon Sep 17 00:00:00 2001
From: Anton Rubin <anton.rubin@eliatra.com>
Date: Thu, 7 Nov 2024 12:34:36 +0000
Subject: [PATCH 11/15] updating as per pr review

Signed-off-by: Anton Rubin <anton.rubin@eliatra.com>
---
 _analyzers/language-analyzers/arabic.md     | 12 ++--
 _analyzers/language-analyzers/armenian.md   | 14 ++--
 _analyzers/language-analyzers/basque.md     | 14 ++--
 _analyzers/language-analyzers/bengali.md    | 24 +++----
 _analyzers/language-analyzers/brazilian.md  | 14 ++--
 _analyzers/language-analyzers/bulgarian.md  | 14 ++--
 _analyzers/language-analyzers/catalan.md    | 16 ++---
 _analyzers/language-analyzers/cjk.md        | 14 ++--
 _analyzers/language-analyzers/czech.md      | 14 ++--
 _analyzers/language-analyzers/danish.md     | 14 ++--
 _analyzers/language-analyzers/dutch.md      | 16 ++---
 _analyzers/language-analyzers/english.md    | 16 ++---
 _analyzers/language-analyzers/estonian.md   | 14 ++--
 _analyzers/language-analyzers/finnish.md    | 14 ++--
 _analyzers/language-analyzers/french.md     | 16 ++---
 _analyzers/language-analyzers/galician.md   | 14 ++--
 _analyzers/language-analyzers/german.md     | 16 ++---
 _analyzers/language-analyzers/greek.md      | 14 ++--
 _analyzers/language-analyzers/hindi.md      | 24 +++----
 _analyzers/language-analyzers/hungarian.md  | 14 ++--
 _analyzers/language-analyzers/index.md      | 76 ++++++++++-----------
 _analyzers/language-analyzers/indonesian.md | 14 ++--
 _analyzers/language-analyzers/irish.md      | 18 ++---
 _analyzers/language-analyzers/italian.md    | 16 ++---
 _analyzers/language-analyzers/latvian.md    | 14 ++--
 _analyzers/language-analyzers/lithuanian.md | 14 ++--
 _analyzers/language-analyzers/norwegian.md  | 14 ++--
 _analyzers/language-analyzers/persian.md    | 20 +++---
 _analyzers/language-analyzers/portuguese.md | 14 ++--
 _analyzers/language-analyzers/romanian.md   | 14 ++--
 _analyzers/language-analyzers/russian.md    | 14 ++--
 _analyzers/language-analyzers/sorani.md     | 18 ++---
 _analyzers/language-analyzers/spanish.md    | 14 ++--
 _analyzers/language-analyzers/swedish.md    | 14 ++--
 _analyzers/language-analyzers/thai.md       | 14 ++--
 _analyzers/language-analyzers/turkish.md    | 16 ++---
 36 files changed, 306 insertions(+), 306 deletions(-)

diff --git a/_analyzers/language-analyzers/arabic.md b/_analyzers/language-analyzers/arabic.md
index 64671d3b0c..b6508827ff 100644
--- a/_analyzers/language-analyzers/arabic.md
+++ b/_analyzers/language-analyzers/arabic.md
@@ -53,12 +53,12 @@ The `arabic` analyzer is built using the following components:
 - Tokenizer: `standard`
 
 - Token filters:
-- lowercase
-- decimal_digit
-- stop (Arabic)
-- normalization (Arabic)
-- keywords
-- stemmer (Arabic)
+  - lowercase
+  - decimal_digit
+  - stop (Arabic)
+  - normalization (Arabic)
+  - keywords
+  - stemmer (Arabic)
 
 ## Custom Arabic analyzer
 
diff --git a/_analyzers/language-analyzers/armenian.md b/_analyzers/language-analyzers/armenian.md
index 38810533e1..1338fd38ed 100644
--- a/_analyzers/language-analyzers/armenian.md
+++ b/_analyzers/language-analyzers/armenian.md
@@ -48,15 +48,15 @@ PUT index_with_stem_exclusion_armenian_analyzer
 
 ## Armenian analyzer internals
 
-The `armenian` analyzer is build using the following:
+The `armenian` analyzer is built using the following components:
 
-Tokenizer: `standard`
+- Tokenizer: `standard`
 
-Token filters:
-- lowercase
-- stop (Armenian)
-- keywords
-- stemmer (Armenian)
+- Token filters:
+  - lowercase
+  - stop (Armenian)
+  - keywords
+  - stemmer (Armenian)
 
 ## Custom Armenian analyzer
 
diff --git a/_analyzers/language-analyzers/basque.md b/_analyzers/language-analyzers/basque.md
index 47e71b43e3..6613bc343b 100644
--- a/_analyzers/language-analyzers/basque.md
+++ b/_analyzers/language-analyzers/basque.md
@@ -48,15 +48,15 @@ PUT index_with_stem_exclusion_basque_analyzer
 
 ## Basque analyzer internals
 
-The `basque` analyzer is build using the following:
+The `basque` analyzer is built using the following components:
 
-Tokenizer: `standard`
+- Tokenizer: `standard`
 
-Token filters:
-- lowercase
-- stop (Basque)
-- keywords
-- stemmer (Basque)
+- Token filters:
+  - lowercase
+  - stop (Basque)
+  - keywords
+  - stemmer (Basque)
 
 ## Custom Basque analyzer
 
diff --git a/_analyzers/language-analyzers/bengali.md b/_analyzers/language-analyzers/bengali.md
index 8107ce7dfe..e1c53fd387 100644
--- a/_analyzers/language-analyzers/bengali.md
+++ b/_analyzers/language-analyzers/bengali.md
@@ -48,18 +48,18 @@ PUT index_with_stem_exclusion_bengali_analyzer
 
 ## Bengali analyzer internals
 
-The `bengali` analyzer is build using the following:
-
-Tokenizer: `standard`
-
-Token filters:
-- lowercase
-- decimal_digit
-- indic_normalization
-- normalization (Bengali)
-- stop (Bengali)
-- keywords
-- stemmer (Bengali)
+The `bengali` analyzer is built using the following components:
+
+- Tokenizer: `standard`
+
+- Token filters:
+  - lowercase
+  - decimal_digit
+  - indic_normalization
+  - normalization (Bengali)
+  - stop (Bengali)
+  - keywords
+  - stemmer (Bengali)
 
 ## Custom Bengali analyzer
 
diff --git a/_analyzers/language-analyzers/brazilian.md b/_analyzers/language-analyzers/brazilian.md
index 925e559ac0..eae04b03d4 100644
--- a/_analyzers/language-analyzers/brazilian.md
+++ b/_analyzers/language-analyzers/brazilian.md
@@ -48,15 +48,15 @@ PUT index_with_stem_exclusion_brazilian_analyzer
 
 ## Brazilian analyzer internals
 
-The `brazilian` analyzer is build using the following:
+The `brazilian` analyzer is built using the following components:
 
-Tokenizer: `standard`
+- Tokenizer: `standard`
 
-Token filters:
-- lowercase
-- stop (Brazilian)
-- keywords
-- stemmer (Brazilian)
+- Token filters:
+  - lowercase
+  - stop (Brazilian)
+  - keywords
+  - stemmer (Brazilian)
 
 ## Custom Brazilian analyzer
 
diff --git a/_analyzers/language-analyzers/bulgarian.md b/_analyzers/language-analyzers/bulgarian.md
index 496ac086b7..0ac726cba2 100644
--- a/_analyzers/language-analyzers/bulgarian.md
+++ b/_analyzers/language-analyzers/bulgarian.md
@@ -48,15 +48,15 @@ PUT index_with_stem_exclusion_bulgarian_analyzer
 
 ## Bulgarian analyzer internals
 
-The `bulgarian` analyzer is build using the following:
+The `bulgarian` analyzer is built using the following components:
 
-Tokenizer: `standard`
+- Tokenizer: `standard`
 
-Token filters:
-- lowercase
-- stop (Bulgarian)
-- keywords
-- stemmer (Bulgarian)
+- Token filters:
+  - lowercase
+  - stop (Bulgarian)
+  - keywords
+  - stemmer (Bulgarian)
 
 ## Custom Bulgarian analyzer
 
diff --git a/_analyzers/language-analyzers/catalan.md b/_analyzers/language-analyzers/catalan.md
index d6a9b1c8b2..4727aed9a2 100644
--- a/_analyzers/language-analyzers/catalan.md
+++ b/_analyzers/language-analyzers/catalan.md
@@ -48,16 +48,16 @@ PUT index_with_stem_exclusion_catalan_analyzer
 
 ## Catalan analyzer internals
 
-The `catalan` analyzer is build using the following:
+The `catalan` analyzer is built using the following components:
 
-Tokenizer: `standard`
+- Tokenizer: `standard`
 
-Token filters:
-- elision (Catalan)
-- lowercase
-- stop (Catalan)
-- keywords
-- stemmer (Catalan)
+- Token filters:
+  - elision (Catalan)
+  - lowercase
+  - stop (Catalan)
+  - keywords
+  - stemmer (Catalan)
 
 ## Custom Catalan analyzer
 
diff --git a/_analyzers/language-analyzers/cjk.md b/_analyzers/language-analyzers/cjk.md
index 31dc917e99..3968113e6e 100644
--- a/_analyzers/language-analyzers/cjk.md
+++ b/_analyzers/language-analyzers/cjk.md
@@ -48,15 +48,15 @@ PUT index_with_stem_exclusion_cjk_analyzer
 
 ## CJK analyzer internals
 
-The `cjk` analyzer is build using the following:
+The `cjk` analyzer is built using the following components:
 
-Tokenizer: `standard`
+- Tokenizer: `standard`
 
-Token filters:
-- cjk_width
-- lowercase
-- cjk_bigram
-- stop (similar to English)
+- Token filters:
+  - cjk_width
+  - lowercase
+  - cjk_bigram
+  - stop (similar to English)
 
 ## Custom CJK analyzer
 
diff --git a/_analyzers/language-analyzers/czech.md b/_analyzers/language-analyzers/czech.md
index 3c1fbd9c9c..12381472a5 100644
--- a/_analyzers/language-analyzers/czech.md
+++ b/_analyzers/language-analyzers/czech.md
@@ -48,15 +48,15 @@ PUT index_with_stem_exclusion_czech_analyzer
 
 ## Czech analyzer internals
 
-The `czech` analyzer is build using the following:
+The `czech` analyzer is built using the following components:
 
-Tokenizer: `standard`
+- Tokenizer: `standard`
 
-Token filters:
-- lowercase
-- stop (Czech)
-- keyword
-- stemmer (Czech)
+- Token filters:
+  - lowercase
+  - stop (Czech)
+  - keyword
+  - stemmer (Czech)
 
 ## Custom Czech analyzer
 
diff --git a/_analyzers/language-analyzers/danish.md b/_analyzers/language-analyzers/danish.md
index c9ccc0c01e..7a5e53f11f 100644
--- a/_analyzers/language-analyzers/danish.md
+++ b/_analyzers/language-analyzers/danish.md
@@ -48,15 +48,15 @@ PUT index_with_stem_exclusion_danish_analyzer
 
 ## Danish analyzer internals
 
-The `danish` analyzer is build using the following:
+The `danish` analyzer is built using the following components:
 
-Tokenizer: `standard`
+- Tokenizer: `standard`
 
-Token filters:
-- lowercase
-- stop (Danish)
-- keyword
-- stemmer (Danish)
+- Token filters:
+  - lowercase
+  - stop (Danish)
+  - keyword
+  - stemmer (Danish)
 
 ## Custom Danish analyzer
 
diff --git a/_analyzers/language-analyzers/dutch.md b/_analyzers/language-analyzers/dutch.md
index 90ce69fa09..334a93f5b0 100644
--- a/_analyzers/language-analyzers/dutch.md
+++ b/_analyzers/language-analyzers/dutch.md
@@ -48,16 +48,16 @@ PUT index_with_stem_exclusion_dutch_analyzer
 
 ## Dutch analyzer internals
 
-The `dutch` analyzer is build using the following:
+The `dutch` analyzer is built using the following components:
 
-Tokenizer: `standard`
+- Tokenizer: `standard`
 
-Token filters:
-- lowercase
-- stop (Dutch)
-- keyword
-- stemmer_override
-- stemmer (Dutch)
+- Token filters:
+  - lowercase
+  - stop (Dutch)
+  - keyword
+  - stemmer_override
+  - stemmer (Dutch)
 
 ## Custom Dutch analyzer
 
diff --git a/_analyzers/language-analyzers/english.md b/_analyzers/language-analyzers/english.md
index fda095b912..46a6a20961 100644
--- a/_analyzers/language-analyzers/english.md
+++ b/_analyzers/language-analyzers/english.md
@@ -48,16 +48,16 @@ PUT index_with_stem_exclusion_english_analyzer
 
 ## English analyzer internals
 
-The `english` analyzer is build using the following:
+The `english` analyzer is built using the following components:
 
-Tokenizer: `standard`
+- Tokenizer: `standard`
 
-Token filters:
-- stemmer (possessive_english)
-- lowercase
-- stop (English)
-- keyword
-- stemmer (English)
+- Token filters:
+  - stemmer (possessive_english)
+  - lowercase
+  - stop (English)
+  - keyword
+  - stemmer (English)
 
 ## Custom English analyzer
 
diff --git a/_analyzers/language-analyzers/estonian.md b/_analyzers/language-analyzers/estonian.md
index 01961c2d46..49411ddf96 100644
--- a/_analyzers/language-analyzers/estonian.md
+++ b/_analyzers/language-analyzers/estonian.md
@@ -48,15 +48,15 @@ PUT index_with_stem_exclusion_estonian_analyzer
 
 ## Estonian analyzer internals
 
-The `estonian` analyzer is build using the following:
+The `estonian` analyzer is built using the following components:
 
-Tokenizer: `standard`
+- Tokenizer: `standard`
 
-Token filters:
-- lowercase
-- stop (Estonian)
-- keyword
-- stemmer (Estonian)
+- Token filters:
+  - lowercase
+  - stop (Estonian)
+  - keyword
+  - stemmer (Estonian)
 
 ## Custom Estonian analyzer
 
diff --git a/_analyzers/language-analyzers/finnish.md b/_analyzers/language-analyzers/finnish.md
index 3ac753e5ea..f39a53adf9 100644
--- a/_analyzers/language-analyzers/finnish.md
+++ b/_analyzers/language-analyzers/finnish.md
@@ -48,15 +48,15 @@ PUT index_with_stem_exclusion_finnish_analyzer
 
 ## Finnish analyzer internals
 
-The `finnish` analyzer is build using the following:
+The `finnish` analyzer is built using the following components:
 
-Tokenizer: `standard`
+- Tokenizer: `standard`
 
-Token filters:
-- lowercase
-- stop (Finnish)
-- keyword
-- stemmer (Finnish)
+- Token filters:
+  - lowercase
+  - stop (Finnish)
+  - keyword
+  - stemmer (Finnish)
 
 ## Custom Finnish analyzer
 
diff --git a/_analyzers/language-analyzers/french.md b/_analyzers/language-analyzers/french.md
index 278bfbb333..fd1c9e7687 100644
--- a/_analyzers/language-analyzers/french.md
+++ b/_analyzers/language-analyzers/french.md
@@ -48,16 +48,16 @@ PUT index_with_stem_exclusion_french_analyzer
 
 ## French analyzer internals
 
-The `french` analyzer is build using the following:
+The `french` analyzer is built using the following components:
 
-Tokenizer: `standard`
+- Tokenizer: `standard`
 
-Token filters:
-- elision (French)
-- lowercase
-- stop (French)
-- keyword
-- stemmer (French)
+- Token filters:
+  - elision (French)
+  - lowercase
+  - stop (French)
+  - keyword
+  - stemmer (French)
 
 ## Custom French analyzer
 
diff --git a/_analyzers/language-analyzers/galician.md b/_analyzers/language-analyzers/galician.md
index 515717bf3f..d4fd176b87 100644
--- a/_analyzers/language-analyzers/galician.md
+++ b/_analyzers/language-analyzers/galician.md
@@ -48,15 +48,15 @@ PUT index_with_stem_exclusion_galician_analyzer
 
 ## Galician analyzer internals
 
-The `galician` analyzer is build using the following:
+The `galician` analyzer is built using the following components:
 
-Tokenizer: `standard`
+- Tokenizer: `standard`
 
-Token filters:
-- lowercase
-- stop (French)
-- keyword
-- stemmer (French)
+- Token filters:
+  - lowercase
+  - stop (French)
+  - keyword
+  - stemmer (French)
 
 ## Custom Galician analyzer
 
diff --git a/_analyzers/language-analyzers/german.md b/_analyzers/language-analyzers/german.md
index 1e679aca0a..d6859381e9 100644
--- a/_analyzers/language-analyzers/german.md
+++ b/_analyzers/language-analyzers/german.md
@@ -48,16 +48,16 @@ PUT index_with_stem_exclusion_german_analyzer
 
 ## German analyzer internals
 
-The `german` analyzer is build using the following:
+The `german` analyzer is built using the following components:
 
-Tokenizer: `standard`
+- Tokenizer: `standard`
 
-Token filters:
-- lowercase
-- stop (German)
-- keyword
-- normalization (German)
-- stemmer (German)
+- Token filters:
+  - lowercase
+  - stop (German)
+  - keyword
+  - normalization (German)
+  - stemmer (German)
 
 ## Custom German analyzer
 
diff --git a/_analyzers/language-analyzers/greek.md b/_analyzers/language-analyzers/greek.md
index 4b44d7014c..fcc1be8c86 100644
--- a/_analyzers/language-analyzers/greek.md
+++ b/_analyzers/language-analyzers/greek.md
@@ -48,15 +48,15 @@ PUT index_with_stem_exclusion_greek_analyzer
 
 ## Greek analyzer internals
 
-The `greek` analyzer is build using the following:
+The `greek` analyzer is built using the following components:
 
-Tokenizer: `standard`
+- Tokenizer: `standard`
 
-Token filters:
-- lowercase
-- stop (Greek)
-- keyword
-- stemmer (Greek)
+- Token filters:
+  - lowercase
+  - stop (Greek)
+  - keyword
+  - stemmer (Greek)
 
 ## Custom Greek analyzer
 
diff --git a/_analyzers/language-analyzers/hindi.md b/_analyzers/language-analyzers/hindi.md
index b1fdabb2b6..d9920008b1 100644
--- a/_analyzers/language-analyzers/hindi.md
+++ b/_analyzers/language-analyzers/hindi.md
@@ -48,18 +48,18 @@ PUT index_with_stem_exclusion_hindi_analyzer
 
 ## Hindi analyzer internals
 
-The `hindi` analyzer is build using the following:
-
-Tokenizer: `standard`
-
-Token filters:
-- lowercase
-- decimal_digit
-- keyword
-- normalization (indic)
-- normalization (Hindi)
-- stop (Hindi)
-- stemmer (Hindi)
+The `hindi` analyzer is built using the following components:
+
+- Tokenizer: `standard`
+
+- Token filters:
+  - lowercase
+  - decimal_digit
+  - keyword
+  - normalization (indic)
+  - normalization (Hindi)
+  - stop (Hindi)
+  - stemmer (Hindi)
 
 ## Custom Hindi analyzer
 
diff --git a/_analyzers/language-analyzers/hungarian.md b/_analyzers/language-analyzers/hungarian.md
index 83330eb708..601b5d3968 100644
--- a/_analyzers/language-analyzers/hungarian.md
+++ b/_analyzers/language-analyzers/hungarian.md
@@ -48,15 +48,15 @@ PUT index_with_stem_exclusion_hungarian_analyzer
 
 ## Hungarian analyzer internals
 
-The `hungarian` analyzer is build using the following:
+The `hungarian` analyzer is built using the following components:
 
-Tokenizer: `standard`
+- Tokenizer: `standard`
 
-Token filters:
-- lowercase
-- stop (Hungarian)
-- keyword
-- stemmer (Hungarian)
+- Token filters:
+  - lowercase
+  - stop (Hungarian)
+  - keyword
+  - stemmer (Hungarian)
 
 ## Custom Hungarian analyzer
 
diff --git a/_analyzers/language-analyzers/index.md b/_analyzers/language-analyzers/index.md
index 639ff28502..afe9d82452 100644
--- a/_analyzers/language-analyzers/index.md
+++ b/_analyzers/language-analyzers/index.md
@@ -4,7 +4,9 @@ title: Language analyzers
 nav_order: 100
 parent: Analyzers
 has_children: true
-has_toc: false
+has_toc: true
+redirect_from:
+  - /query-dsl/analyzers/language-analyzers/
 ---
 
 # Language analyzers
@@ -20,14 +22,14 @@ To use the analyzer when you map an index, specify the value within your query.
 
 #### Example request
 
-The following query specifies the `french` language analyzer for the index `my-index`:
+The following query specifies index `my-index` with `content` field configured as multi-field and sub-field named `french` is configured with `french` language analyzer:
 
 ```json
 PUT my-index
 {
   "mappings": {
     "properties": {
-      "text": { 
+      "content": { 
         "type": "text",
         "fields": {
           "french": { 
@@ -40,10 +42,42 @@ PUT my-index
   }
 }
 ```
+{% include copy-curl.html %}
+
+Default `french` analyzer can also be configured for the entire index using the following query:
+
+```json
+PUT my-index
+{
+  "settings": {
+    "analysis": {
+      "analyzer": {
+        "default": {
+          "type": "french"
+        }
+      }
+    }
+  },
+  "mappings": {
+    "properties": {
+      "content": {
+        "type": "text"
+      },
+      "title": {
+        "type": "text"
+      },
+      "description": {
+        "type": "text"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
 
 ## Stem exclusion
 
-You can apply stem exclusion to many language analyzers by providing a list of lowercase words that should be excluded from stemming. Internally, OpenSearch uses the `keyword_marker` token filter to mark these words as keywords, ensuring they are not stemmed.
+You can apply stem exclusion to any language analyzer by providing a list of lowercase words that should be excluded from stemming. Internally, OpenSearch uses the `keyword_marker` token filter to mark these words as keywords, ensuring they are not stemmed.
 
 ## Stem exclusion example
 
@@ -66,40 +100,6 @@ PUT index_with_stem_exclusion_english_analyzer
 ```
 {% include copy-curl.html %}
 
-The following languages support stem exclusion:
-
-- arabic 
-- armenian
-- basque
-- bengali
-- brazilian
-- bulgarian
-- catalan
-- cjk
-- czech
-- danish
-- dutch
-- english
-- finnish
-- french
-- galician
-- german
-- hindi
-- hungarian
-- indonesian
-- irish
-- italian
-- latvian
-- lithuanian
-- norwegian
-- portuguese
-- romanian
-- russian
-- sorani
-- spanish
-- swedish
-- turkish
-
 
 ## Stem exclusion with custom analyzers
 
diff --git a/_analyzers/language-analyzers/indonesian.md b/_analyzers/language-analyzers/indonesian.md
index 73b551cd9a..920319082a 100644
--- a/_analyzers/language-analyzers/indonesian.md
+++ b/_analyzers/language-analyzers/indonesian.md
@@ -48,15 +48,15 @@ PUT index_with_stem_exclusion_indonesian_analyzer
 
 ## Indonesian analyzer internals
 
-The `indonesian` analyzer is build using the following:
+The `indonesian` analyzer is built using the following components:
 
-Tokenizer: `standard`
+- Tokenizer: `standard`
 
-Token filters:
-- lowercase
-- stop (Indonesian)
-- keyword
-- stemmer (Indonesian)
+- Token filters:
+  - lowercase
+  - stop (Indonesian)
+  - keyword
+  - stemmer (Indonesian)
 
 ## Custom Indonesian analyzer
 
diff --git a/_analyzers/language-analyzers/irish.md b/_analyzers/language-analyzers/irish.md
index b4e25e57c8..606a81a10e 100644
--- a/_analyzers/language-analyzers/irish.md
+++ b/_analyzers/language-analyzers/irish.md
@@ -48,17 +48,17 @@ PUT index_with_stem_exclusion_irish_analyzer
 
 ## Irish analyzer internals
 
-The `irish` analyzer is build using the following:
+The `irish` analyzer is built using the following components:
 
-Tokenizer: `standard`
+- Tokenizer: `standard`
 
-Token filters:
-- hyphenation (Irish)
-- elision (Irish)
-- lowercase (Irish)
-- stop (Irish)
-- keyword
-- stemmer (Irish)
+- Token filters:
+  - hyphenation (Irish)
+  - elision (Irish)
+  - lowercase (Irish)
+  - stop (Irish)
+  - keyword
+  - stemmer (Irish)
 
 ## Custom Irish analyzer
 
diff --git a/_analyzers/language-analyzers/italian.md b/_analyzers/language-analyzers/italian.md
index 1fc1063efd..6cf423fe67 100644
--- a/_analyzers/language-analyzers/italian.md
+++ b/_analyzers/language-analyzers/italian.md
@@ -48,16 +48,16 @@ PUT index_with_stem_exclusion_italian_analyzer
 
 ## Italian analyzer internals
 
-The `italian` analyzer is build using the following:
+The `italian` analyzer is built using the following components:
 
-Tokenizer: `standard`
+- Tokenizer: `standard`
 
-Token filters:
-- elision (Italian)
-- lowercase
-- stop (Italian)
-- keyword
-- stemmer (Italian)
+- Token filters:
+  - elision (Italian)
+  - lowercase
+  - stop (Italian)
+  - keyword
+  - stemmer (Italian)
 
 ## Custom Italian analyzer
 
diff --git a/_analyzers/language-analyzers/latvian.md b/_analyzers/language-analyzers/latvian.md
index 620f694c23..edb8c5e95e 100644
--- a/_analyzers/language-analyzers/latvian.md
+++ b/_analyzers/language-analyzers/latvian.md
@@ -48,15 +48,15 @@ PUT index_with_stem_exclusion_latvian_analyzer
 
 ## Latvian analyzer internals
 
-The `latvian` analyzer is build using the following:
+The `latvian` analyzer is built using the following components:
 
-Tokenizer: `standard`
+- Tokenizer: `standard`
 
-Token filters:
-- lowercase
-- stop (Latvian)
-- keyword
-- stemmer (Latvian)
+- Token filters:
+  - lowercase
+  - stop (Latvian)
+  - keyword
+  - stemmer (Latvian)
 
 ## Custom Latvian analyzer
 
diff --git a/_analyzers/language-analyzers/lithuanian.md b/_analyzers/language-analyzers/lithuanian.md
index 6d67dc2262..7f2da59101 100644
--- a/_analyzers/language-analyzers/lithuanian.md
+++ b/_analyzers/language-analyzers/lithuanian.md
@@ -48,15 +48,15 @@ PUT index_with_stem_exclusion_lithuanian_analyzer
 
 ## Lithuanian analyzer internals
 
-The `lithuanian` analyzer is build using the following:
+The `lithuanian` analyzer is built using the following components:
 
-Tokenizer: `standard`
+- Tokenizer: `standard`
 
-Token filters:
-- lowercase
-- stop (Lithuanian)
-- keyword
-- stemmer (Lithuanian)
+- Token filters:
+  - lowercase
+  - stop (Lithuanian)
+  - keyword
+  - stemmer (Lithuanian)
 
 ## Custom Lithuanian analyzer
 
diff --git a/_analyzers/language-analyzers/norwegian.md b/_analyzers/language-analyzers/norwegian.md
index 5a00a27924..171da8ad51 100644
--- a/_analyzers/language-analyzers/norwegian.md
+++ b/_analyzers/language-analyzers/norwegian.md
@@ -48,15 +48,15 @@ PUT index_with_stem_exclusion_norwegian_analyzer
 
 ## Norwegian analyzer internals
 
-The `norwegian` analyzer is build using the following:
+The `norwegian` analyzer is built using the following components:
 
-Tokenizer: `standard`
+- Tokenizer: `standard`
 
-Token filters:
-- lowercase
-- stop (Norwegian)
-- keyword
-- stemmer (Norwegian)
+- Token filters:
+  - lowercase
+  - stop (Norwegian)
+  - keyword
+  - stemmer (Norwegian)
 
 ## Custom Norwegian analyzer
 
diff --git a/_analyzers/language-analyzers/persian.md b/_analyzers/language-analyzers/persian.md
index 1a335dd483..d6018ccaab 100644
--- a/_analyzers/language-analyzers/persian.md
+++ b/_analyzers/language-analyzers/persian.md
@@ -48,19 +48,19 @@ PUT index_with_stem_exclusion_persian_analyzer
 
 ## Persian analyzer internals
 
-The `persian` analyzer is build using the following:
+The `persian` analyzer is built using the following components:
 
-Tokenizer: `standard`
+- Tokenizer: `standard`
 
-Char filter: `mapping`
+- Char filter: `mapping`
 
-Token filters:
-- lowercase
-- decimal_digit
-- normalization (Arabic)
-- normalization (Persian)
-- keyword
-- stemmer (Norwegian)
+- Token filters:
+  - lowercase
+  - decimal_digit
+  - normalization (Arabic)
+  - normalization (Persian)
+  - keyword
+  - stemmer (Norwegian)
 
 ## Custom Persian analyzer
 
diff --git a/_analyzers/language-analyzers/portuguese.md b/_analyzers/language-analyzers/portuguese.md
index 301d043f7d..9a752dc4c6 100644
--- a/_analyzers/language-analyzers/portuguese.md
+++ b/_analyzers/language-analyzers/portuguese.md
@@ -48,15 +48,15 @@ PUT index_with_stem_exclusion_portuguese_analyzer
 
 ## Portuguese analyzer internals
 
-The `portuguese` analyzer is build using the following:
+The `portuguese` analyzer is built using the following components:
 
-Tokenizer: `standard`
+- Tokenizer: `standard`
 
-Token Filters:
-- lowercase
-- stop (Portuguese)
-- keyword
-- stemmer (Portuguese)
+- Token Filters:
+  - lowercase
+  - stop (Portuguese)
+  - keyword
+  - stemmer (Portuguese)
 
 ## Custom Portuguese analyzer
 
diff --git a/_analyzers/language-analyzers/romanian.md b/_analyzers/language-analyzers/romanian.md
index 6795d01a4d..bffe26288b 100644
--- a/_analyzers/language-analyzers/romanian.md
+++ b/_analyzers/language-analyzers/romanian.md
@@ -48,15 +48,15 @@ PUT index_with_stem_exclusion_romanian_analyzer
 
 ## Romanian analyzer internals
 
-The `romanian` analyzer is build using the following:
+The `romanian` analyzer is built using the following components:
 
-Tokenizer: `standard`
+- Tokenizer: `standard`
 
-Token Filters:
-- lowercase
-- stop (Romanian)
-- keyword
-- stemmer (Romanian)
+- Token Filters:
+  - lowercase
+  - stop (Romanian)
+  - keyword
+  - stemmer (Romanian)
 
 ## Custom Romanian analyzer
 
diff --git a/_analyzers/language-analyzers/russian.md b/_analyzers/language-analyzers/russian.md
index 3a305ee051..ac9ae0d72a 100644
--- a/_analyzers/language-analyzers/russian.md
+++ b/_analyzers/language-analyzers/russian.md
@@ -48,15 +48,15 @@ PUT index_with_stem_exclusion_russian_analyzer
 
 ## Russian analyzer internals
 
-The `russian` analyzer is build using the following:
+The `russian` analyzer is built using the following components:
 
-Tokenizer: `standard`
+- Tokenizer: `standard`
 
-Token Filters:
-- lowercase
-- stop (Russian)
-- keyword
-- stemmer (Russian)
+- Token Filters:
+  - lowercase
+  - stop (Russian)
+  - keyword
+  - stemmer (Russian)
 
 ## Custom Russian analyzer
 
diff --git a/_analyzers/language-analyzers/sorani.md b/_analyzers/language-analyzers/sorani.md
index 760b7e46c6..8c31c3ef1e 100644
--- a/_analyzers/language-analyzers/sorani.md
+++ b/_analyzers/language-analyzers/sorani.md
@@ -48,17 +48,17 @@ PUT index_with_stem_exclusion_sorani_analyzer
 
 ## Sorani analyzer internals
 
-The `sorani` analyzer is build using the following:
+The `sorani` analyzer is built using the following components:
 
-Tokenizer: `standard`
+- Tokenizer: `standard`
 
-Token Filters:
-- normalization (Sorani)
-- lowercase
-- decimal_digit
-- stop (Sorani)
-- keyword
-- stemmer (Sorani)
+- Token Filters:
+  - normalization (Sorani)
+  - lowercase
+  - decimal_digit
+  - stop (Sorani)
+  - keyword
+  - stemmer (Sorani)
 
 ## Custom Sorani analyzer
 
diff --git a/_analyzers/language-analyzers/spanish.md b/_analyzers/language-analyzers/spanish.md
index a20d0fa509..3a1573d291 100644
--- a/_analyzers/language-analyzers/spanish.md
+++ b/_analyzers/language-analyzers/spanish.md
@@ -48,15 +48,15 @@ PUT index_with_stem_exclusion_spanish_analyzer
 
 ## Spanish analyzer internals
 
-The `spanish` analyzer is build using the following:
+The `spanish` analyzer is built using the following components:
 
-Tokenizer: `standard`
+- Tokenizer: `standard`
 
-Token Filters:
-- lowercase
-- stop (Spanish)
-- keyword
-- stemmer (Spanish)
+- Token Filters:
+  - lowercase
+  - stop (Spanish)
+  - keyword
+  - stemmer (Spanish)
 
 ## Custom Spanish analyzer
 
diff --git a/_analyzers/language-analyzers/swedish.md b/_analyzers/language-analyzers/swedish.md
index f70a0dbca1..9aadc9bc60 100644
--- a/_analyzers/language-analyzers/swedish.md
+++ b/_analyzers/language-analyzers/swedish.md
@@ -48,15 +48,15 @@ PUT index_with_stem_exclusion_swedish_analyzer
 
 ## Swedish analyzer internals
 
-The `swedish` analyzer is build using the following:
+The `swedish` analyzer is built using the following components:
 
-Tokenizer: `standard`
+- Tokenizer: `standard`
 
-Token Filters:
-- lowercase
-- stop (Swedish)
-- keyword
-- stemmer (Swedish)
+- Token Filters:
+  - lowercase
+  - stop (Swedish)
+  - keyword
+  - stemmer (Swedish)
 
 ## Custom Swedish analyzer
 
diff --git a/_analyzers/language-analyzers/thai.md b/_analyzers/language-analyzers/thai.md
index 78c3d1250d..5f33554cf7 100644
--- a/_analyzers/language-analyzers/thai.md
+++ b/_analyzers/language-analyzers/thai.md
@@ -48,15 +48,15 @@ PUT index_with_stem_exclusion_thai_analyzer
 
 ## Thai analyzer internals
 
-The `thai` analyzer is build using the following:
+The `thai` analyzer is built using the following components:
 
-Tokenizer: `thai`
+- Tokenizer: `thai`
 
-Token Filters:
-- lowercase
-- decimal_digit
-- stop (Thai)
-- keyword
+- Token Filters:
+  - lowercase
+  - decimal_digit
+  - stop (Thai)
+  - keyword
 
 ## Custom Thai analyzer
 
diff --git a/_analyzers/language-analyzers/turkish.md b/_analyzers/language-analyzers/turkish.md
index 14a6f8e9b6..9e9b31acbc 100644
--- a/_analyzers/language-analyzers/turkish.md
+++ b/_analyzers/language-analyzers/turkish.md
@@ -48,16 +48,16 @@ PUT index_with_stem_exclusion_turkish_analyzer
 
 ## Turkish analyzer internals
 
-The `turkish` analyzer is build using the following:
+The `turkish` analyzer is built using the following components:
 
-Tokenizer: `standard`
+- Tokenizer: `standard`
 
-Token Filters:
-- apostrophe
-- lowercase (Turkish)
-- stop (Turkish)
-- keyword
-- stemmer (Turkish)
+- Token Filters:
+  - apostrophe
+  - lowercase (Turkish)
+  - stop (Turkish)
+  - keyword
+  - stemmer (Turkish)
 
 ## Custom Turkish analyzer
 

From 5172a0d9f3254e9601a6a1f70aff13ab66af73c8 Mon Sep 17 00:00:00 2001
From: Anton Rubin <anton.rubin@eliatra.com>
Date: Thu, 7 Nov 2024 12:46:20 +0000
Subject: [PATCH 12/15] fixing broken link

Signed-off-by: Anton Rubin <anton.rubin@eliatra.com>
---
 _analyzers/supported-analyzers/index.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/_analyzers/supported-analyzers/index.md b/_analyzers/supported-analyzers/index.md
index fef0b4d34e..43e41b8d6a 100644
--- a/_analyzers/supported-analyzers/index.md
+++ b/_analyzers/supported-analyzers/index.md
@@ -29,7 +29,7 @@ Analyzer | Analysis performed | Analyzer output
 
 ## Language analyzers
 
-OpenSearch supports multiple language analyzers. For more information, see [Language analyzers]({{site.url}}{{site.baseurl}}/analyzers/language-analyzers/).
+OpenSearch supports multiple language analyzers. For more information, see [Language analyzers]({{site.url}}{{site.baseurl}}/analyzers/language-analyzers/index).
 
 ## Additional analyzers
 

From 719ef66f6b5d0cfbac247dd9a0f79a6389f3b19a Mon Sep 17 00:00:00 2001
From: AntonEliatra <anton.rubin@eliatra.com>
Date: Tue, 12 Nov 2024 16:51:06 +0000
Subject: [PATCH 13/15] Apply suggestions from code review

Co-authored-by: Nathan Bower <nbower@amazon.com>
Signed-off-by: AntonEliatra <anton.rubin@eliatra.com>
---
 _analyzers/language-analyzers/arabic.md     |  2 +-
 _analyzers/language-analyzers/armenian.md   |  4 ++--
 _analyzers/language-analyzers/basque.md     |  4 ++--
 _analyzers/language-analyzers/bengali.md    |  4 ++--
 _analyzers/language-analyzers/brazilian.md  |  4 ++--
 _analyzers/language-analyzers/bulgarian.md  |  4 ++--
 _analyzers/language-analyzers/catalan.md    |  4 ++--
 _analyzers/language-analyzers/cjk.md        |  2 +-
 _analyzers/language-analyzers/czech.md      |  2 +-
 _analyzers/language-analyzers/danish.md     |  2 +-
 _analyzers/language-analyzers/dutch.md      |  2 +-
 _analyzers/language-analyzers/english.md    |  2 +-
 _analyzers/language-analyzers/estonian.md   |  2 +-
 _analyzers/language-analyzers/finnish.md    |  2 +-
 _analyzers/language-analyzers/french.md     |  2 +-
 _analyzers/language-analyzers/galician.md   |  2 +-
 _analyzers/language-analyzers/german.md     |  2 +-
 _analyzers/language-analyzers/greek.md      |  2 +-
 _analyzers/language-analyzers/hindi.md      |  2 +-
 _analyzers/language-analyzers/hungarian.md  |  2 +-
 _analyzers/language-analyzers/index.md      | 12 ++++++------
 _analyzers/language-analyzers/indonesian.md |  2 +-
 _analyzers/language-analyzers/irish.md      |  2 +-
 _analyzers/language-analyzers/italian.md    |  2 +-
 _analyzers/language-analyzers/latvian.md    |  2 +-
 _analyzers/language-analyzers/lithuanian.md |  2 +-
 _analyzers/language-analyzers/norwegian.md  |  2 +-
 _analyzers/language-analyzers/persian.md    |  2 +-
 _analyzers/language-analyzers/portuguese.md |  4 ++--
 _analyzers/language-analyzers/romanian.md   |  4 ++--
 _analyzers/language-analyzers/russian.md    |  4 ++--
 _analyzers/language-analyzers/sorani.md     |  4 ++--
 _analyzers/language-analyzers/spanish.md    |  4 ++--
 _analyzers/language-analyzers/swedish.md    |  4 ++--
 _analyzers/language-analyzers/thai.md       |  4 ++--
 _analyzers/language-analyzers/turkish.md    |  4 ++--
 36 files changed, 55 insertions(+), 55 deletions(-)

diff --git a/_analyzers/language-analyzers/arabic.md b/_analyzers/language-analyzers/arabic.md
index b6508827ff..e61c684cbb 100644
--- a/_analyzers/language-analyzers/arabic.md
+++ b/_analyzers/language-analyzers/arabic.md
@@ -57,7 +57,7 @@ The `arabic` analyzer is built using the following components:
   - decimal_digit
   - stop (Arabic)
   - normalization (Arabic)
-  - keywords
+  - keyword
   - stemmer (Arabic)
 
 ## Custom Arabic analyzer
diff --git a/_analyzers/language-analyzers/armenian.md b/_analyzers/language-analyzers/armenian.md
index 1338fd38ed..9bd0549c80 100644
--- a/_analyzers/language-analyzers/armenian.md
+++ b/_analyzers/language-analyzers/armenian.md
@@ -55,12 +55,12 @@ The `armenian` analyzer is built using the following components:
 - Token filters:
   - lowercase
   - stop (Armenian)
-  - keywords
+  - keyword
   - stemmer (Armenian)
 
 ## Custom Armenian analyzer
 
-You can create custom Armenian analyzer using the following command:
+You can create a custom Armenian analyzer using the following command:
 
 ```json
 PUT /armenian-index
diff --git a/_analyzers/language-analyzers/basque.md b/_analyzers/language-analyzers/basque.md
index 6613bc343b..e73510cc66 100644
--- a/_analyzers/language-analyzers/basque.md
+++ b/_analyzers/language-analyzers/basque.md
@@ -55,12 +55,12 @@ The `basque` analyzer is built using the following components:
 - Token filters:
   - lowercase
   - stop (Basque)
-  - keywords
+  - keyword
   - stemmer (Basque)
 
 ## Custom Basque analyzer
 
-You can create custom Basque analyzer using the following command:
+You can create a custom Basque analyzer using the following command:
 
 ```json
 PUT /basque-index
diff --git a/_analyzers/language-analyzers/bengali.md b/_analyzers/language-analyzers/bengali.md
index e1c53fd387..af913a01ef 100644
--- a/_analyzers/language-analyzers/bengali.md
+++ b/_analyzers/language-analyzers/bengali.md
@@ -58,12 +58,12 @@ The `bengali` analyzer is built using the following components:
   - indic_normalization
   - normalization (Bengali)
   - stop (Bengali)
-  - keywords
+  - keyword
   - stemmer (Bengali)
 
 ## Custom Bengali analyzer
 
-You can create custom Bengali analyzer using the following command:
+You can create a custom Bengali analyzer using the following command:
 
 ```json
 PUT /bengali-index
diff --git a/_analyzers/language-analyzers/brazilian.md b/_analyzers/language-analyzers/brazilian.md
index eae04b03d4..67db2b92bc 100644
--- a/_analyzers/language-analyzers/brazilian.md
+++ b/_analyzers/language-analyzers/brazilian.md
@@ -55,12 +55,12 @@ The `brazilian` analyzer is built using the following components:
 - Token filters:
   - lowercase
   - stop (Brazilian)
-  - keywords
+  - keyword
   - stemmer (Brazilian)
 
 ## Custom Brazilian analyzer
 
-You can create custom Brazilian analyzer using the following command:
+You can create a custom Brazilian analyzer using the following command:
 
 ```json
 PUT /brazilian-index
diff --git a/_analyzers/language-analyzers/bulgarian.md b/_analyzers/language-analyzers/bulgarian.md
index 0ac726cba2..42d5794e18 100644
--- a/_analyzers/language-analyzers/bulgarian.md
+++ b/_analyzers/language-analyzers/bulgarian.md
@@ -55,12 +55,12 @@ The `bulgarian` analyzer is built using the following components:
 - Token filters:
   - lowercase
   - stop (Bulgarian)
-  - keywords
+  - keyword
   - stemmer (Bulgarian)
 
 ## Custom Bulgarian analyzer
 
-You can create custom Bulgarian analyzer using the following command:
+You can create a custom Bulgarian analyzer using the following command:
 
 ```json
 PUT /bulgarian-index
diff --git a/_analyzers/language-analyzers/catalan.md b/_analyzers/language-analyzers/catalan.md
index 4727aed9a2..89762da094 100644
--- a/_analyzers/language-analyzers/catalan.md
+++ b/_analyzers/language-analyzers/catalan.md
@@ -56,12 +56,12 @@ The `catalan` analyzer is built using the following components:
   - elision (Catalan)
   - lowercase
   - stop (Catalan)
-  - keywords
+  - keyword
   - stemmer (Catalan)
 
 ## Custom Catalan analyzer
 
-You can create custom Catalan analyzer using the following command:
+You can create a custom Catalan analyzer using the following command:
 
 ```json
 PUT /catalan-index
diff --git a/_analyzers/language-analyzers/cjk.md b/_analyzers/language-analyzers/cjk.md
index 3968113e6e..aed7e6da22 100644
--- a/_analyzers/language-analyzers/cjk.md
+++ b/_analyzers/language-analyzers/cjk.md
@@ -60,7 +60,7 @@ The `cjk` analyzer is built using the following components:
 
 ## Custom CJK analyzer
 
-You can create custom CJK analyzer using the following command:
+You can create a custom CJK analyzer using the following command:
 
 ```json
 PUT /cjk-index
diff --git a/_analyzers/language-analyzers/czech.md b/_analyzers/language-analyzers/czech.md
index 12381472a5..c1778cd0f4 100644
--- a/_analyzers/language-analyzers/czech.md
+++ b/_analyzers/language-analyzers/czech.md
@@ -60,7 +60,7 @@ The `czech` analyzer is built using the following components:
 
 ## Custom Czech analyzer
 
-You can create custom Czech analyzer using the following command:
+You can create a custom Czech analyzer using the following command:
 
 ```json
 PUT /czech-index
diff --git a/_analyzers/language-analyzers/danish.md b/_analyzers/language-analyzers/danish.md
index 7a5e53f11f..b5ee1b0e97 100644
--- a/_analyzers/language-analyzers/danish.md
+++ b/_analyzers/language-analyzers/danish.md
@@ -60,7 +60,7 @@ The `danish` analyzer is built using the following components:
 
 ## Custom Danish analyzer
 
-You can create custom Danish analyzer using the following command:
+You can create a custom Danish analyzer using the following command:
 
 ```json
 PUT /danish-index
diff --git a/_analyzers/language-analyzers/dutch.md b/_analyzers/language-analyzers/dutch.md
index 334a93f5b0..0259707d78 100644
--- a/_analyzers/language-analyzers/dutch.md
+++ b/_analyzers/language-analyzers/dutch.md
@@ -61,7 +61,7 @@ The `dutch` analyzer is built using the following components:
 
 ## Custom Dutch analyzer
 
-You can create custom Dutch analyzer using the following command:
+You can create a custom Dutch analyzer using the following command:
 
 ```json
 PUT /dutch-index
diff --git a/_analyzers/language-analyzers/english.md b/_analyzers/language-analyzers/english.md
index 46a6a20961..2d0b600312 100644
--- a/_analyzers/language-analyzers/english.md
+++ b/_analyzers/language-analyzers/english.md
@@ -61,7 +61,7 @@ The `english` analyzer is built using the following components:
 
 ## Custom English analyzer
 
-You can create custom English analyzer using the following command:
+You can create a custom English analyzer using the following command:
 
 ```json
 PUT /english-index
diff --git a/_analyzers/language-analyzers/estonian.md b/_analyzers/language-analyzers/estonian.md
index 49411ddf96..a4cb664f18 100644
--- a/_analyzers/language-analyzers/estonian.md
+++ b/_analyzers/language-analyzers/estonian.md
@@ -60,7 +60,7 @@ The `estonian` analyzer is built using the following components:
 
 ## Custom Estonian analyzer
 
-You can create custom Estonian analyzer using the following command:
+You can create a custom Estonian analyzer using the following command:
 
 ```json
 PUT /estonian-index
diff --git a/_analyzers/language-analyzers/finnish.md b/_analyzers/language-analyzers/finnish.md
index f39a53adf9..6f559650d2 100644
--- a/_analyzers/language-analyzers/finnish.md
+++ b/_analyzers/language-analyzers/finnish.md
@@ -60,7 +60,7 @@ The `finnish` analyzer is built using the following components:
 
 ## Custom Finnish analyzer
 
-You can create custom Finnish analyzer using the following command:
+You can create a custom Finnish analyzer using the following command:
 
 ```json
 PUT /finnish-index
diff --git a/_analyzers/language-analyzers/french.md b/_analyzers/language-analyzers/french.md
index fd1c9e7687..64e7ab5415 100644
--- a/_analyzers/language-analyzers/french.md
+++ b/_analyzers/language-analyzers/french.md
@@ -61,7 +61,7 @@ The `french` analyzer is built using the following components:
 
 ## Custom French analyzer
 
-You can create custom French analyzer using the following command:
+You can create a custom French analyzer using the following command:
 
 ```json
 PUT /french-index
diff --git a/_analyzers/language-analyzers/galician.md b/_analyzers/language-analyzers/galician.md
index d4fd176b87..00338b23a7 100644
--- a/_analyzers/language-analyzers/galician.md
+++ b/_analyzers/language-analyzers/galician.md
@@ -60,7 +60,7 @@ The `galician` analyzer is built using the following components:
 
 ## Custom Galician analyzer
 
-You can create custom Galician analyzer using the following command:
+You can create a custom Galician analyzer using the following command:
 
 ```json
 PUT /galician-index
diff --git a/_analyzers/language-analyzers/german.md b/_analyzers/language-analyzers/german.md
index d6859381e9..4071ef5378 100644
--- a/_analyzers/language-analyzers/german.md
+++ b/_analyzers/language-analyzers/german.md
@@ -61,7 +61,7 @@ The `german` analyzer is built using the following components:
 
 ## Custom German analyzer
 
-You can create custom German analyzer using the following command:
+You can create a custom German analyzer using the following command:
 
 ```json
 PUT /german-index
diff --git a/_analyzers/language-analyzers/greek.md b/_analyzers/language-analyzers/greek.md
index fcc1be8c86..2446b1e2d6 100644
--- a/_analyzers/language-analyzers/greek.md
+++ b/_analyzers/language-analyzers/greek.md
@@ -60,7 +60,7 @@ The `greek` analyzer is built using the following components:
 
 ## Custom Greek analyzer
 
-You can create custom Greek analyzer using the following command:
+You can create a custom Greek analyzer using the following command:
 
 ```json
 PUT /greek-index
diff --git a/_analyzers/language-analyzers/hindi.md b/_analyzers/language-analyzers/hindi.md
index d9920008b1..93f2eea319 100644
--- a/_analyzers/language-analyzers/hindi.md
+++ b/_analyzers/language-analyzers/hindi.md
@@ -63,7 +63,7 @@ The `hindi` analyzer is built using the following components:
 
 ## Custom Hindi analyzer
 
-You can create custom Hindi analyzer using the following command:
+You can create a custom Hindi analyzer using the following command:
 
 ```json
 PUT /hindi-index
diff --git a/_analyzers/language-analyzers/hungarian.md b/_analyzers/language-analyzers/hungarian.md
index 601b5d3968..d115c5d29c 100644
--- a/_analyzers/language-analyzers/hungarian.md
+++ b/_analyzers/language-analyzers/hungarian.md
@@ -60,7 +60,7 @@ The `hungarian` analyzer is built using the following components:
 
 ## Custom Hungarian analyzer
 
-You can create custom Hungarian analyzer using the following command:
+You can create a custom Hungarian analyzer using the following command:
 
 ```json
 PUT /hungarian-index
diff --git a/_analyzers/language-analyzers/index.md b/_analyzers/language-analyzers/index.md
index afe9d82452..17c3cb613b 100644
--- a/_analyzers/language-analyzers/index.md
+++ b/_analyzers/language-analyzers/index.md
@@ -12,9 +12,9 @@ redirect_from:
 # Language analyzers
 
 OpenSearch supports the following language analyzers:
-`arabic`, `armenian`, `basque`, `bengali`, `brazilian`, `bulgarian`, `catalan`, `czech`, `danish`, `dutch`, `english`, `estonian`, `finnish`, `french`, `galician`, `german`, `greek`, `hindi`, `hungarian`, `indonesian`, `irish`, `italian`, `latvian`, `lithuanian`, `norwegian`, `persian`, `portuguese`, `romanian`, `russian`, `sorani`, `spanish`, `swedish`, `turkish`, and `thai`.
+`arabic`, `armenian`, `basque`, `bengali`, `brazilian`, `bulgarian`, `catalan`, `czech`, `danish`, `dutch`, `english`, `estonian`, `finnish`, `french`, `galician`, `german`, `greek`, `hindi`, `hungarian`, `indonesian`, `irish`, `italian`, `latvian`, `lithuanian`, `norwegian`, `persian`, `portuguese`, `romanian`, `russian`, `sorani`, `spanish`, `swedish`, `thai`, and `turkish`.
 
-To use the analyzer when you map an index, specify the value within your query. For example, to map your index with the French language analyzer, specify the `french` value for the analyzer field:
+To use an analyzer when you map an index, specify the value in your query. For example, to map your index with the French language analyzer, specify the `french` value in the analyzer field:
 
 ```json
  "analyzer": "french"
@@ -22,7 +22,7 @@ To use the analyzer when you map an index, specify the value within your query.
 
 #### Example request
 
-The following query specifies index `my-index` with `content` field configured as multi-field and sub-field named `french` is configured with `french` language analyzer:
+The following query specifies an index `my-index` with the `content` field configured as multi-field, and a sub-field named `french` is configured with the `french` language analyzer:
 
 ```json
 PUT my-index
@@ -44,7 +44,7 @@ PUT my-index
 ```
 {% include copy-curl.html %}
 
-Default `french` analyzer can also be configured for the entire index using the following query:
+The default `french` analyzer can also be configured for the entire index using the following query:
 
 ```json
 PUT my-index
@@ -77,7 +77,7 @@ PUT my-index
 
 ## Stem exclusion
 
-You can apply stem exclusion to any language analyzer by providing a list of lowercase words that should be excluded from stemming. Internally, OpenSearch uses the `keyword_marker` token filter to mark these words as keywords, ensuring they are not stemmed.
+You can apply stem exclusion to any language analyzer by providing a list of lowercase words that should be excluded from stemming. Internally, OpenSearch uses the `keyword_marker` token filter to mark these words as keywords, ensuring that they are not stemmed.
 
 ## Stem exclusion example
 
@@ -103,7 +103,7 @@ PUT index_with_stem_exclusion_english_analyzer
 
 ## Stem exclusion with custom analyzers
 
-All language analyzers consist of tokenizers and token filters specific to the particular language. If you want to implement a custom version of the language analyzer with stem exclusion, you need to configure the `keyword_marker` token filter and list the words excluded from stemming in the `keywords` parameter:
+All language analyzers consist of tokenizers and token filters specific to a particular language. If you want to implement a custom version of the language analyzer with stem exclusion, you need to configure the `keyword_marker` token filter and list the words excluded from stemming in the `keywords` parameter:
 
 ```json
 PUT index_with_keyword_marker_analyzer
diff --git a/_analyzers/language-analyzers/indonesian.md b/_analyzers/language-analyzers/indonesian.md
index 920319082a..5c3d430b3a 100644
--- a/_analyzers/language-analyzers/indonesian.md
+++ b/_analyzers/language-analyzers/indonesian.md
@@ -60,7 +60,7 @@ The `indonesian` analyzer is built using the following components:
 
 ## Custom Indonesian analyzer
 
-You can create custom Indonesian analyzer using the following command:
+You can create a custom Indonesian analyzer using the following command:
 
 ```json
 PUT /hungarian-index
diff --git a/_analyzers/language-analyzers/irish.md b/_analyzers/language-analyzers/irish.md
index 606a81a10e..3e1535d134 100644
--- a/_analyzers/language-analyzers/irish.md
+++ b/_analyzers/language-analyzers/irish.md
@@ -62,7 +62,7 @@ The `irish` analyzer is built using the following components:
 
 ## Custom Irish analyzer
 
-You can create custom Irish analyzer using the following command:
+You can create a custom Irish analyzer using the following command:
 
 ```json
 PUT /irish-index
diff --git a/_analyzers/language-analyzers/italian.md b/_analyzers/language-analyzers/italian.md
index 6cf423fe67..190056d63c 100644
--- a/_analyzers/language-analyzers/italian.md
+++ b/_analyzers/language-analyzers/italian.md
@@ -61,7 +61,7 @@ The `italian` analyzer is built using the following components:
 
 ## Custom Italian analyzer
 
-You can create custom Italian analyzer using the following command:
+You can create a custom Italian analyzer using the following command:
 
 ```json
 PUT /italian-index
diff --git a/_analyzers/language-analyzers/latvian.md b/_analyzers/language-analyzers/latvian.md
index edb8c5e95e..2301759763 100644
--- a/_analyzers/language-analyzers/latvian.md
+++ b/_analyzers/language-analyzers/latvian.md
@@ -60,7 +60,7 @@ The `latvian` analyzer is built using the following components:
 
 ## Custom Latvian analyzer
 
-You can create custom Latvian analyzer using the following command:
+You can create a custom Latvian analyzer using the following command:
 
 ```json
 PUT /italian-index
diff --git a/_analyzers/language-analyzers/lithuanian.md b/_analyzers/language-analyzers/lithuanian.md
index 7f2da59101..ca5966c54e 100644
--- a/_analyzers/language-analyzers/lithuanian.md
+++ b/_analyzers/language-analyzers/lithuanian.md
@@ -60,7 +60,7 @@ The `lithuanian` analyzer is built using the following components:
 
 ## Custom Lithuanian analyzer
 
-You can create custom Lithuanian analyzer using the following command:
+You can create a custom Lithuanian analyzer using the following command:
 
 ```json
 PUT /lithuanian-index
diff --git a/_analyzers/language-analyzers/norwegian.md b/_analyzers/language-analyzers/norwegian.md
index 171da8ad51..cfb04eebf3 100644
--- a/_analyzers/language-analyzers/norwegian.md
+++ b/_analyzers/language-analyzers/norwegian.md
@@ -60,7 +60,7 @@ The `norwegian` analyzer is built using the following components:
 
 ## Custom Norwegian analyzer
 
-You can create custom Norwegian analyzer using the following command:
+You can create a custom Norwegian analyzer using the following command:
 
 ```json
 PUT /norwegian-index
diff --git a/_analyzers/language-analyzers/persian.md b/_analyzers/language-analyzers/persian.md
index d6018ccaab..40b38656fd 100644
--- a/_analyzers/language-analyzers/persian.md
+++ b/_analyzers/language-analyzers/persian.md
@@ -64,7 +64,7 @@ The `persian` analyzer is built using the following components:
 
 ## Custom Persian analyzer
 
-You can create custom Persian analyzer using the following command:
+You can create a custom Persian analyzer using the following command:
 
 ```json
 PUT /persian-index
diff --git a/_analyzers/language-analyzers/portuguese.md b/_analyzers/language-analyzers/portuguese.md
index 9a752dc4c6..166ffa0010 100644
--- a/_analyzers/language-analyzers/portuguese.md
+++ b/_analyzers/language-analyzers/portuguese.md
@@ -52,7 +52,7 @@ The `portuguese` analyzer is built using the following components:
 
 - Tokenizer: `standard`
 
-- Token Filters:
+- Token filters:
   - lowercase
   - stop (Portuguese)
   - keyword
@@ -60,7 +60,7 @@ The `portuguese` analyzer is built using the following components:
 
 ## Custom Portuguese analyzer
 
-You can create custom Portuguese analyzer using the following command:
+You can create a custom Portuguese analyzer using the following command:
 
 ```json
 PUT /portuguese-index
diff --git a/_analyzers/language-analyzers/romanian.md b/_analyzers/language-analyzers/romanian.md
index bffe26288b..cad0953385 100644
--- a/_analyzers/language-analyzers/romanian.md
+++ b/_analyzers/language-analyzers/romanian.md
@@ -52,7 +52,7 @@ The `romanian` analyzer is built using the following components:
 
 - Tokenizer: `standard`
 
-- Token Filters:
+- Token filters:
   - lowercase
   - stop (Romanian)
   - keyword
@@ -60,7 +60,7 @@ The `romanian` analyzer is built using the following components:
 
 ## Custom Romanian analyzer
 
-You can create custom Romanian analyzer using the following command:
+You can create a custom Romanian analyzer using the following command:
 
 ```json
 PUT /romanian-index
diff --git a/_analyzers/language-analyzers/russian.md b/_analyzers/language-analyzers/russian.md
index ac9ae0d72a..bd57ba0b27 100644
--- a/_analyzers/language-analyzers/russian.md
+++ b/_analyzers/language-analyzers/russian.md
@@ -52,7 +52,7 @@ The `russian` analyzer is built using the following components:
 
 - Tokenizer: `standard`
 
-- Token Filters:
+- Token filters:
   - lowercase
   - stop (Russian)
   - keyword
@@ -60,7 +60,7 @@ The `russian` analyzer is built using the following components:
 
 ## Custom Russian analyzer
 
-You can create custom Russian analyzer using the following command:
+You can create a custom Russian analyzer using the following command:
 
 ```json
 PUT /russian-index
diff --git a/_analyzers/language-analyzers/sorani.md b/_analyzers/language-analyzers/sorani.md
index 8c31c3ef1e..f71d43c481 100644
--- a/_analyzers/language-analyzers/sorani.md
+++ b/_analyzers/language-analyzers/sorani.md
@@ -52,7 +52,7 @@ The `sorani` analyzer is built using the following components:
 
 - Tokenizer: `standard`
 
-- Token Filters:
+- Token filters:
   - normalization (Sorani)
   - lowercase
   - decimal_digit
@@ -62,7 +62,7 @@ The `sorani` analyzer is built using the following components:
 
 ## Custom Sorani analyzer
 
-You can create custom Sorani analyzer using the following command:
+You can create a custom Sorani analyzer using the following command:
 
 ```json
 PUT /sorani-index
diff --git a/_analyzers/language-analyzers/spanish.md b/_analyzers/language-analyzers/spanish.md
index 3a1573d291..8a0d8fad3c 100644
--- a/_analyzers/language-analyzers/spanish.md
+++ b/_analyzers/language-analyzers/spanish.md
@@ -52,7 +52,7 @@ The `spanish` analyzer is built using the following components:
 
 - Tokenizer: `standard`
 
-- Token Filters:
+- Token filters:
   - lowercase
   - stop (Spanish)
   - keyword
@@ -60,7 +60,7 @@ The `spanish` analyzer is built using the following components:
 
 ## Custom Spanish analyzer
 
-You can create custom Spanish analyzer using the following command:
+You can create a custom Spanish analyzer using the following command:
 
 ```json
 PUT /spanish-index
diff --git a/_analyzers/language-analyzers/swedish.md b/_analyzers/language-analyzers/swedish.md
index 9aadc9bc60..9da595f12e 100644
--- a/_analyzers/language-analyzers/swedish.md
+++ b/_analyzers/language-analyzers/swedish.md
@@ -52,7 +52,7 @@ The `swedish` analyzer is built using the following components:
 
 - Tokenizer: `standard`
 
-- Token Filters:
+- Token filters:
   - lowercase
   - stop (Swedish)
   - keyword
@@ -60,7 +60,7 @@ The `swedish` analyzer is built using the following components:
 
 ## Custom Swedish analyzer
 
-You can create custom Swedish analyzer using the following command:
+You can create a custom Swedish analyzer using the following command:
 
 ```json
 PUT /swedish-index
diff --git a/_analyzers/language-analyzers/thai.md b/_analyzers/language-analyzers/thai.md
index 5f33554cf7..e4daa1f0be 100644
--- a/_analyzers/language-analyzers/thai.md
+++ b/_analyzers/language-analyzers/thai.md
@@ -52,7 +52,7 @@ The `thai` analyzer is built using the following components:
 
 - Tokenizer: `thai`
 
-- Token Filters:
+- Token filters:
   - lowercase
   - decimal_digit
   - stop (Thai)
@@ -60,7 +60,7 @@ The `thai` analyzer is built using the following components:
 
 ## Custom Thai analyzer
 
-You can create custom Thai analyzer using the following command:
+You can create a custom Thai analyzer using the following command:
 
 ```json
 PUT /thai-index
diff --git a/_analyzers/language-analyzers/turkish.md b/_analyzers/language-analyzers/turkish.md
index 9e9b31acbc..fb36c5413c 100644
--- a/_analyzers/language-analyzers/turkish.md
+++ b/_analyzers/language-analyzers/turkish.md
@@ -52,7 +52,7 @@ The `turkish` analyzer is built using the following components:
 
 - Tokenizer: `standard`
 
-- Token Filters:
+- Token filters:
   - apostrophe
   - lowercase (Turkish)
   - stop (Turkish)
@@ -61,7 +61,7 @@ The `turkish` analyzer is built using the following components:
 
 ## Custom Turkish analyzer
 
-You can create custom Turkish analyzer using the following command:
+You can create a custom Turkish analyzer using the following command:
 
 ```json
 PUT /turkish-index

From e29f6900e13eb6c16b57457e880647dbd4ffd58c Mon Sep 17 00:00:00 2001
From: kolchfa-aws <105444904+kolchfa-aws@users.noreply.github.com>
Date: Thu, 14 Nov 2024 16:19:32 -0500
Subject: [PATCH 14/15] Update _analyzers/language-analyzers/index.md

Co-authored-by: Nathan Bower <nbower@amazon.com>
Signed-off-by: kolchfa-aws <105444904+kolchfa-aws@users.noreply.github.com>

From 3a429eb4968a8d44510f88784dbda5d5fee6e3a0 Mon Sep 17 00:00:00 2001
From: Fanit Kolchina <kolchfa@amazon.com>
Date: Thu, 14 Nov 2024 16:25:03 -0500
Subject: [PATCH 15/15] Add redirect to index page

Signed-off-by: Fanit Kolchina <kolchfa@amazon.com>
---
 _analyzers/language-analyzers/index.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/_analyzers/language-analyzers/index.md b/_analyzers/language-analyzers/index.md
index 17c3cb613b..89a4a42254 100644
--- a/_analyzers/language-analyzers/index.md
+++ b/_analyzers/language-analyzers/index.md
@@ -7,6 +7,7 @@ has_children: true
 has_toc: true
 redirect_from:
   - /query-dsl/analyzers/language-analyzers/
+  - /analyzers/language-analyzers/
 ---
 
 # Language analyzers