diff --git a/build.xml b/build.xml
index 24107ac14b..35c55928d1 100644
--- a/build.xml
+++ b/build.xml
@@ -241,14 +241,6 @@
             </dependencies>
             <path refid="cql3-grammar.classpath" classpath="runtime"/>
         </resolver:resolve>
-        <resolver:resolve>
-            <remoterepos refid="all"/>
-            <dependencies>
-                <dependency groupId="de.jflex" artifactId="jflex" version="1.9.1" />
-            </dependencies>
-            <path refid="jflex.classpath" classpath="runtime"/>
-        </resolver:resolve>
-
         <macrodef name="install">
             <attribute name="pomFile"/>
             <attribute name="file"/>
@@ -389,27 +381,6 @@
             <arg value="10"/> <!-- default is 60 -->
         </java>
     </target>
-
-    <target name="generate-cql-html" depends="resolver-init" description="Generate HTML from textile source">
-        <taskdef classpathref="wikitext.classpath" resource="wikitexttasks.properties"/>
-        <wikitext-to-html markupLanguage="Textile">
-            <fileset dir="${basedir}">
-                <include name="doc/cql3/*.textile"/>
-            </fileset>
-        </wikitext-to-html>
-    </target>
-
-    <!--
-        Generates Java sources for tokenization support from jflex
-        grammar files
-    -->
-    <target name="generate-jflex-java" description="Generate Java from jflex grammar">
-<!--        <taskdef classname="jflex.anttask.JFlexTask" name="jflex" classpath="${build.dir.lib}/jars/jflex-1.9.1.jar" />-->
-        <taskdef classname="jflex.anttask.JFlexTask" classpathref="jflex.classpath" name="jflex"/>
-        <jflex file="${build.src.java}/org/apache/cassandra/index/sasi/analyzer/StandardTokenizerImpl.jflex"
-               destdir="${build.src.gen-java}/"/>
-    </target>
-
     <!--
        Fetch Maven Ant Tasks and Cassandra's dependencies
        These targets are intentionally free of dependencies so that they
@@ -588,9 +559,6 @@
                 </dependency>
                 <dependency groupId="joda-time" artifactId="joda-time" version="2.4"/>
                 <dependency groupId="com.carrotsearch" artifactId="hppc" version="0.5.4"/>
-                <dependency groupId="de.jflex" artifactId="jflex" version="1.9.1">
-                    <exclusion groupId="org.apache.ant" artifactId="ant"/>
-                </dependency>
                 <dependency groupId="com.github.rholder" artifactId="snowball-stemmer" version="1.3.0.581.1"/>
                 <dependency groupId="com.googlecode.concurrent-trees" artifactId="concurrent-trees" version="2.4.0"/>
                 <dependency groupId="com.github.ben-manes.caffeine" artifactId="caffeine" version="2.2.6"/>
@@ -785,8 +753,6 @@
             <dependency groupId="org.hdrhistogram" artifactId="HdrHistogram"/>
 
             <dependency groupId="com.github.luben" artifactId="zstd-jni"/>
-            <!-- sasi deps -->
-            <dependency groupId="de.jflex" artifactId="jflex"/>
             <dependency groupId="com.github.rholder" artifactId="snowball-stemmer"/>
             <dependency groupId="com.googlecode.concurrent-trees" artifactId="concurrent-trees"/>
 
@@ -909,7 +875,7 @@
             </classpath>
         </javac>
     </target>
-    <target depends="init,gen-cql3-grammar,generate-cql-html"
+    <target depends="init,gen-cql3-grammar"
             name="build-project">
         <echo message="${ant.project.name}: ${ant.file}"/>
         <antcall target="_build_multi_java"/>
diff --git a/src/java/org/apache/cassandra/index/sasi/analyzer/DelimiterAnalyzer.java b/src/java/org/apache/cassandra/index/sasi/analyzer/DelimiterAnalyzer.java
deleted file mode 100644
index 05dfedc6c4..0000000000
--- a/src/java/org/apache/cassandra/index/sasi/analyzer/DelimiterAnalyzer.java
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.cassandra.index.sasi.analyzer;
-
-import java.nio.CharBuffer;
-import java.nio.ByteBuffer;
-import java.nio.charset.Charset;
-import java.nio.charset.StandardCharsets;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.Map;
-
-import com.google.common.annotations.Beta;
-import com.google.common.base.Preconditions;
-
-import org.apache.cassandra.db.marshal.AbstractType;
-import org.apache.cassandra.db.marshal.AsciiType;
-import org.apache.cassandra.db.marshal.UTF8Type;
-import org.apache.cassandra.utils.AbstractIterator;
-
-@Beta
-public class DelimiterAnalyzer extends AbstractAnalyzer
-{
-
-    private static final Map<AbstractType<?>, Charset> VALID_ANALYZABLE_TYPES = new HashMap<AbstractType<?>, Charset>()
-    {{
-        put(UTF8Type.instance, StandardCharsets.UTF_8);
-        put(AsciiType.instance, StandardCharsets.US_ASCII);
-    }};
-
-    private char delimiter;
-    private Charset charset;
-    private Iterator<ByteBuffer> iter;
-
-    public DelimiterAnalyzer()
-    {
-    }
-
-    @Override
-    public ByteBuffer next()
-    {
-        return iter.next();
-    }
-
-    public void init(Map<String, String> options, AbstractType<?> validator)
-    {
-        DelimiterTokenizingOptions tokenizingOptions = DelimiterTokenizingOptions.buildFromMap(options);
-        delimiter = tokenizingOptions.getDelimiter();
-        charset = VALID_ANALYZABLE_TYPES.get(validator);
-    }
-
-    public boolean hasNext()
-    {
-        return iter.hasNext();
-    }
-
-    public void reset(ByteBuffer input)
-    {
-        Preconditions.checkNotNull(input);
-        final CharBuffer cb = charset.decode(input);
-
-        this.iter = new AbstractIterator<ByteBuffer>() {
-            protected ByteBuffer computeNext() {
-
-                if (!cb.hasRemaining())
-                    return endOfData();
-
-                CharBuffer readahead = cb.duplicate();
-                // loop until we see the next delimiter character, or reach end of data
-                boolean readaheadRemaining;
-                while ((readaheadRemaining = readahead.hasRemaining()) && readahead.get() != delimiter);
-
-                char[] chars = new char[readahead.position() - cb.position() - (readaheadRemaining ? 1 : 0)];
-                cb.get(chars);
-                Preconditions.checkState(!cb.hasRemaining() || cb.get() == delimiter);
-
-                return 0 < chars.length
-                        ? charset.encode(CharBuffer.wrap(chars))
-                        // blank partition keys not permitted, ref ConcurrentRadixTree.putIfAbsent(..)
-                        : computeNext();
-            }
-        };
-    }
-
-    @Override
-    public boolean isTokenizing()
-    {
-        return true;
-    }
-
-    @Override
-    public boolean isCompatibleWith(AbstractType<?> validator)
-    {
-        return VALID_ANALYZABLE_TYPES.containsKey(validator);
-    }
-}
\ No newline at end of file
diff --git a/src/java/org/apache/cassandra/index/sasi/analyzer/DelimiterTokenizingOptions.java b/src/java/org/apache/cassandra/index/sasi/analyzer/DelimiterTokenizingOptions.java
deleted file mode 100644
index c2c8ef7d53..0000000000
--- a/src/java/org/apache/cassandra/index/sasi/analyzer/DelimiterTokenizingOptions.java
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.cassandra.index.sasi.analyzer;
-
-import java.util.Map;
-
-/** Simple tokenizer based on a specified delimiter (rather than whitespace).
- */
-public class DelimiterTokenizingOptions
-{
-    public static final String DELIMITER = "delimiter";
-
-    private final char delimiter;
-
-    private DelimiterTokenizingOptions(char delimiter)
-    {
-        this.delimiter = delimiter;
-    }
-
-    char getDelimiter()
-    {
-        return delimiter;
-    }
-
-    private static class OptionsBuilder
-    {
-        private char delimiter = ',';
-
-        public DelimiterTokenizingOptions build()
-        {
-            return new DelimiterTokenizingOptions(delimiter);
-        }
-    }
-
-    static DelimiterTokenizingOptions buildFromMap(Map<String, String> optionsMap)
-    {
-        OptionsBuilder optionsBuilder = new OptionsBuilder();
-
-        for (Map.Entry<String, String> entry : optionsMap.entrySet())
-        {
-            switch (entry.getKey())
-            {
-                case DELIMITER:
-                {
-                    String value = entry.getValue();
-                    if (1 != value.length())
-                        throw new IllegalArgumentException(String.format("Only single character delimiters supported, was %s", value));
-
-                    optionsBuilder.delimiter = entry.getValue().charAt(0);
-                    break;
-                }
-            }
-        }
-        return optionsBuilder.build();
-    }
-}
diff --git a/src/java/org/apache/cassandra/index/sasi/analyzer/SUPPLEMENTARY.jflex-macro b/src/java/org/apache/cassandra/index/sasi/analyzer/SUPPLEMENTARY.jflex-macro
deleted file mode 100644
index f5bf68e254..0000000000
--- a/src/java/org/apache/cassandra/index/sasi/analyzer/SUPPLEMENTARY.jflex-macro
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-// Generated using ICU4J 52.1.0.0
-// by org.apache.lucene.analysis.icu.GenerateJFlexSupplementaryMacros
-
-
-ALetterSupp = (
-	  ([\ud83b][\uDE00-\uDE03\uDE05-\uDE1F\uDE21\uDE22\uDE24\uDE27\uDE29-\uDE32\uDE34-\uDE37\uDE39\uDE3B\uDE42\uDE47\uDE49\uDE4B\uDE4D-\uDE4F\uDE51\uDE52\uDE54\uDE57\uDE59\uDE5B\uDE5D\uDE5F\uDE61\uDE62\uDE64\uDE67-\uDE6A\uDE6C-\uDE72\uDE74-\uDE77\uDE79-\uDE7C\uDE7E\uDE80-\uDE89\uDE8B-\uDE9B\uDEA1-\uDEA3\uDEA5-\uDEA9\uDEAB-\uDEBB])
-	| ([\ud81a][\uDC00-\uDE38])
-	| ([\ud81b][\uDF00-\uDF44\uDF50\uDF93-\uDF9F])
-	| ([\ud835][\uDC00-\uDC54\uDC56-\uDC9C\uDC9E\uDC9F\uDCA2\uDCA5\uDCA6\uDCA9-\uDCAC\uDCAE-\uDCB9\uDCBB\uDCBD-\uDCC3\uDCC5-\uDD05\uDD07-\uDD0A\uDD0D-\uDD14\uDD16-\uDD1C\uDD1E-\uDD39\uDD3B-\uDD3E\uDD40-\uDD44\uDD46\uDD4A-\uDD50\uDD52-\uDEA5\uDEA8-\uDEC0\uDEC2-\uDEDA\uDEDC-\uDEFA\uDEFC-\uDF14\uDF16-\uDF34\uDF36-\uDF4E\uDF50-\uDF6E\uDF70-\uDF88\uDF8A-\uDFA8\uDFAA-\uDFC2\uDFC4-\uDFCB])
-	| ([\ud80d][\uDC00-\uDC2E])
-	| ([\ud80c][\uDC00-\uDFFF])
-	| ([\ud809][\uDC00-\uDC62])
-	| ([\ud808][\uDC00-\uDF6E])
-	| ([\ud805][\uDE80-\uDEAA])
-	| ([\ud804][\uDC03-\uDC37\uDC83-\uDCAF\uDCD0-\uDCE8\uDD03-\uDD26\uDD83-\uDDB2\uDDC1-\uDDC4])
-	| ([\ud801][\uDC00-\uDC9D])
-	| ([\ud800][\uDC00-\uDC0B\uDC0D-\uDC26\uDC28-\uDC3A\uDC3C\uDC3D\uDC3F-\uDC4D\uDC50-\uDC5D\uDC80-\uDCFA\uDD40-\uDD74\uDE80-\uDE9C\uDEA0-\uDED0\uDF00-\uDF1E\uDF30-\uDF4A\uDF80-\uDF9D\uDFA0-\uDFC3\uDFC8-\uDFCF\uDFD1-\uDFD5])
-	| ([\ud803][\uDC00-\uDC48])
-	| ([\ud802][\uDC00-\uDC05\uDC08\uDC0A-\uDC35\uDC37\uDC38\uDC3C\uDC3F-\uDC55\uDD00-\uDD15\uDD20-\uDD39\uDD80-\uDDB7\uDDBE\uDDBF\uDE00\uDE10-\uDE13\uDE15-\uDE17\uDE19-\uDE33\uDE60-\uDE7C\uDF00-\uDF35\uDF40-\uDF55\uDF60-\uDF72])
-)
-FormatSupp = (
-	  ([\ud804][\uDCBD])
-	| ([\ud834][\uDD73-\uDD7A])
-	| ([\udb40][\uDC01\uDC20-\uDC7F])
-)
-NumericSupp = (
-	  ([\ud805][\uDEC0-\uDEC9])
-	| ([\ud804][\uDC66-\uDC6F\uDCF0-\uDCF9\uDD36-\uDD3F\uDDD0-\uDDD9])
-	| ([\ud835][\uDFCE-\uDFFF])
-	| ([\ud801][\uDCA0-\uDCA9])
-)
-ExtendSupp = (
-	  ([\ud81b][\uDF51-\uDF7E\uDF8F-\uDF92])
-	| ([\ud805][\uDEAB-\uDEB7])
-	| ([\ud804][\uDC00-\uDC02\uDC38-\uDC46\uDC80-\uDC82\uDCB0-\uDCBA\uDD00-\uDD02\uDD27-\uDD34\uDD80-\uDD82\uDDB3-\uDDC0])
-	| ([\ud834][\uDD65-\uDD69\uDD6D-\uDD72\uDD7B-\uDD82\uDD85-\uDD8B\uDDAA-\uDDAD\uDE42-\uDE44])
-	| ([\ud800][\uDDFD])
-	| ([\udb40][\uDD00-\uDDEF])
-	| ([\ud802][\uDE01-\uDE03\uDE05\uDE06\uDE0C-\uDE0F\uDE38-\uDE3A\uDE3F])
-)
-KatakanaSupp = (
-	  ([\ud82c][\uDC00])
-)
-MidLetterSupp = (
-	  []
-)
-MidNumSupp = (
-	  []
-)
-MidNumLetSupp = (
-	  []
-)
-ExtendNumLetSupp = (
-	  []
-)
-ExtendNumLetSupp = (
-	  []
-)
-ComplexContextSupp = (
-	  []
-)
-HanSupp = (
-	  ([\ud87e][\uDC00-\uDE1D])
-	| ([\ud86b][\uDC00-\uDFFF])
-	| ([\ud86a][\uDC00-\uDFFF])
-	| ([\ud869][\uDC00-\uDED6\uDF00-\uDFFF])
-	| ([\ud868][\uDC00-\uDFFF])
-	| ([\ud86e][\uDC00-\uDC1D])
-	| ([\ud86d][\uDC00-\uDF34\uDF40-\uDFFF])
-	| ([\ud86c][\uDC00-\uDFFF])
-	| ([\ud863][\uDC00-\uDFFF])
-	| ([\ud862][\uDC00-\uDFFF])
-	| ([\ud861][\uDC00-\uDFFF])
-	| ([\ud860][\uDC00-\uDFFF])
-	| ([\ud867][\uDC00-\uDFFF])
-	| ([\ud866][\uDC00-\uDFFF])
-	| ([\ud865][\uDC00-\uDFFF])
-	| ([\ud864][\uDC00-\uDFFF])
-	| ([\ud858][\uDC00-\uDFFF])
-	| ([\ud859][\uDC00-\uDFFF])
-	| ([\ud85a][\uDC00-\uDFFF])
-	| ([\ud85b][\uDC00-\uDFFF])
-	| ([\ud85c][\uDC00-\uDFFF])
-	| ([\ud85d][\uDC00-\uDFFF])
-	| ([\ud85e][\uDC00-\uDFFF])
-	| ([\ud85f][\uDC00-\uDFFF])
-	| ([\ud850][\uDC00-\uDFFF])
-	| ([\ud851][\uDC00-\uDFFF])
-	| ([\ud852][\uDC00-\uDFFF])
-	| ([\ud853][\uDC00-\uDFFF])
-	| ([\ud854][\uDC00-\uDFFF])
-	| ([\ud855][\uDC00-\uDFFF])
-	| ([\ud856][\uDC00-\uDFFF])
-	| ([\ud857][\uDC00-\uDFFF])
-	| ([\ud849][\uDC00-\uDFFF])
-	| ([\ud848][\uDC00-\uDFFF])
-	| ([\ud84b][\uDC00-\uDFFF])
-	| ([\ud84a][\uDC00-\uDFFF])
-	| ([\ud84d][\uDC00-\uDFFF])
-	| ([\ud84c][\uDC00-\uDFFF])
-	| ([\ud84f][\uDC00-\uDFFF])
-	| ([\ud84e][\uDC00-\uDFFF])
-	| ([\ud841][\uDC00-\uDFFF])
-	| ([\ud840][\uDC00-\uDFFF])
-	| ([\ud843][\uDC00-\uDFFF])
-	| ([\ud842][\uDC00-\uDFFF])
-	| ([\ud845][\uDC00-\uDFFF])
-	| ([\ud844][\uDC00-\uDFFF])
-	| ([\ud847][\uDC00-\uDFFF])
-	| ([\ud846][\uDC00-\uDFFF])
-)
-HiraganaSupp = (
-	  ([\ud83c][\uDE00])
-	| ([\ud82c][\uDC01])
-)
-SingleQuoteSupp = (
-	  []
-)
-DoubleQuoteSupp = (
-	  []
-)
-HebrewLetterSupp = (
-	  []
-)
-RegionalIndicatorSupp = (
-	  ([\ud83c][\uDDE6-\uDDFF])
-)
diff --git a/src/java/org/apache/cassandra/index/sasi/analyzer/StandardTokenizerImpl.jflex b/src/java/org/apache/cassandra/index/sasi/analyzer/StandardTokenizerImpl.jflex
deleted file mode 100644
index 86c645101d..0000000000
--- a/src/java/org/apache/cassandra/index/sasi/analyzer/StandardTokenizerImpl.jflex
+++ /dev/null
@@ -1,220 +0,0 @@
-package org.apache.cassandra.index.sasi.analyzer;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.util.Arrays;
-
-/**
- * This class implements Word Break rules from the Unicode Text Segmentation 
- * algorithm, as specified in 
- * <a href="http://unicode.org/reports/tr29/">Unicode Standard Annex #29</a>. 
- * <p/>
- * Tokens produced are of the following types:
- * <ul>
- *   <li>&lt;ALPHANUM&gt;: A sequence of alphabetic and numeric characters</li>
- *   <li>&lt;NUM&gt;: A number</li>
- *   <li>&lt;SOUTHEAST_ASIAN&gt;: A sequence of characters from South and Southeast
- *       Asian languages, including Thai, Lao, Myanmar, and Khmer</li>
- *   <li>&lt;IDEOGRAPHIC&gt;: A single CJKV ideographic character</li>
- *   <li>&lt;HIRAGANA&gt;: A single hiragana character</li>
- *   <li>&lt;KATAKANA&gt;: A sequence of katakana characters</li>
- *   <li>&lt;HANGUL&gt;: A sequence of Hangul characters</li>
- * </ul>
- */
-%%
-
-%unicode 6.3
-%integer
-%final
-%public
-%class StandardTokenizerImpl
-%implements StandardTokenizerInterface
-%function getNextToken
-%char
-%buffer 4096
-
-%include SUPPLEMENTARY.jflex-macro
-ALetter           = (\p{WB:ALetter}                                     | {ALetterSupp})
-Format            = (\p{WB:Format}                                      | {FormatSupp})
-Numeric           = ([\p{WB:Numeric}[\p{Blk:HalfAndFullForms}&&\p{Nd}]] | {NumericSupp})
-Extend            = (\p{WB:Extend}                                      | {ExtendSupp})
-Katakana          = (\p{WB:Katakana}                                    | {KatakanaSupp})
-MidLetter         = (\p{WB:MidLetter}                                   | {MidLetterSupp})
-MidNum            = (\p{WB:MidNum}                                      | {MidNumSupp})
-MidNumLet         = (\p{WB:MidNumLet}                                   | {MidNumLetSupp})
-ExtendNumLet      = (\p{WB:ExtendNumLet}                                | {ExtendNumLetSupp})
-ComplexContext    = (\p{LB:Complex_Context}                             | {ComplexContextSupp})
-Han               = (\p{Script:Han}                                     | {HanSupp})
-Hiragana          = (\p{Script:Hiragana}                                | {HiraganaSupp})
-SingleQuote       = (\p{WB:Single_Quote}                                | {SingleQuoteSupp})
-DoubleQuote       = (\p{WB:Double_Quote}                                | {DoubleQuoteSupp})
-HebrewLetter      = (\p{WB:Hebrew_Letter}                               | {HebrewLetterSupp})
-RegionalIndicator = (\p{WB:Regional_Indicator}                          | {RegionalIndicatorSupp})
-HebrewOrALetter   = ({HebrewLetter} | {ALetter})
-
-// UAX#29 WB4. X (Extend | Format)* --> X
-//
-HangulEx            = [\p{Script:Hangul}&&[\p{WB:ALetter}\p{WB:Hebrew_Letter}]] ({Format} | {Extend})*
-HebrewOrALetterEx   = {HebrewOrALetter}                                         ({Format} | {Extend})*
-NumericEx           = {Numeric}                                                 ({Format} | {Extend})*
-KatakanaEx          = {Katakana}                                                ({Format} | {Extend})* 
-MidLetterEx         = ({MidLetter} | {MidNumLet} | {SingleQuote})               ({Format} | {Extend})* 
-MidNumericEx        = ({MidNum} | {MidNumLet} | {SingleQuote})                  ({Format} | {Extend})*
-ExtendNumLetEx      = {ExtendNumLet}                                            ({Format} | {Extend})*
-HanEx               = {Han}                                                     ({Format} | {Extend})*
-HiraganaEx          = {Hiragana}                                                ({Format} | {Extend})*
-SingleQuoteEx       = {SingleQuote}                                             ({Format} | {Extend})*                                            
-DoubleQuoteEx       = {DoubleQuote}                                             ({Format} | {Extend})*
-HebrewLetterEx      = {HebrewLetter}                                            ({Format} | {Extend})*
-RegionalIndicatorEx = {RegionalIndicator}                                       ({Format} | {Extend})*
-
-
-%{
-  /** Alphanumeric sequences */
-  public static final int WORD_TYPE = StandardAnalyzer.TokenType.ALPHANUM.value;
-  
-  /** Numbers */
-  public static final int NUMERIC_TYPE = StandardAnalyzer.TokenType.NUM.value;
-  
-  /**
-   * Chars in class \p{Line_Break = Complex_Context} are from South East Asian
-   * scripts (Thai, Lao, Myanmar, Khmer, etc.).  Sequences of these are kept 
-   * together as as a single token rather than broken up, because the logic
-   * required to break them at word boundaries is too complex for UAX#29.
-   * <p>
-   * See Unicode Line Breaking Algorithm: http://www.unicode.org/reports/tr14/#SA
-   */
-  public static final int SOUTH_EAST_ASIAN_TYPE = StandardAnalyzer.TokenType.SOUTHEAST_ASIAN.value;
-  
-  public static final int IDEOGRAPHIC_TYPE = StandardAnalyzer.TokenType.IDEOGRAPHIC.value;
-  
-  public static final int HIRAGANA_TYPE = StandardAnalyzer.TokenType.HIRAGANA.value;
-  
-  public static final int KATAKANA_TYPE = StandardAnalyzer.TokenType.KATAKANA.value;
-  
-  public static final int HANGUL_TYPE = StandardAnalyzer.TokenType.HANGUL.value;
-
-  public final long yychar()
-  {
-    return yychar;
-  }
-
-  public String getText()
-  {
-    return String.valueOf(zzBuffer, zzStartRead, zzMarkedPos-zzStartRead);
-  }
-
-  public char[] getArray()
-  {
-    return Arrays.copyOfRange(zzBuffer, zzStartRead, zzMarkedPos);
-  }
-
-  public byte[] getBytes()
-  {
-    return getText().getBytes();
-  }
-
-%}
-
-%%
-
-// UAX#29 WB1.   sot   ÷
-//        WB2.     ÷   eot
-//
-<<EOF>> { return StandardAnalyzer.TokenType.EOF.value; }
-
-// UAX#29 WB8.   Numeric × Numeric
-//        WB11.  Numeric (MidNum | MidNumLet | Single_Quote) × Numeric
-//        WB12.  Numeric × (MidNum | MidNumLet | Single_Quote) Numeric
-//        WB13a. (ALetter | Hebrew_Letter | Numeric | Katakana | ExtendNumLet) × ExtendNumLet
-//        WB13b. ExtendNumLet × (ALetter | Hebrew_Letter | Numeric | Katakana) 
-//
-{ExtendNumLetEx}* {NumericEx} ( ( {ExtendNumLetEx}* | {MidNumericEx} ) {NumericEx} )* {ExtendNumLetEx}* 
-  { return NUMERIC_TYPE; }
-
-// subset of the below for typing purposes only!
-{HangulEx}+
-  { return HANGUL_TYPE; }
-  
-{KatakanaEx}+
-  { return KATAKANA_TYPE; }
-
-// UAX#29 WB5.   (ALetter | Hebrew_Letter) × (ALetter | Hebrew_Letter)
-//        WB6.   (ALetter | Hebrew_Letter) × (MidLetter | MidNumLet | Single_Quote) (ALetter | Hebrew_Letter)
-//        WB7.   (ALetter | Hebrew_Letter) (MidLetter | MidNumLet | Single_Quote) × (ALetter | Hebrew_Letter)
-//        WB7a.  Hebrew_Letter × Single_Quote
-//        WB7b.  Hebrew_Letter × Double_Quote Hebrew_Letter
-//        WB7c.  Hebrew_Letter Double_Quote × Hebrew_Letter
-//        WB9.   (ALetter | Hebrew_Letter) × Numeric
-//        WB10.  Numeric × (ALetter | Hebrew_Letter)
-//        WB13.  Katakana × Katakana
-//        WB13a. (ALetter | Hebrew_Letter | Numeric | Katakana | ExtendNumLet) × ExtendNumLet
-//        WB13b. ExtendNumLet × (ALetter | Hebrew_Letter | Numeric | Katakana) 
-//
-{ExtendNumLetEx}*  ( {KatakanaEx}          ( {ExtendNumLetEx}*   {KatakanaEx}                           )*
-                   | ( {HebrewLetterEx}    ( {SingleQuoteEx}     | {DoubleQuoteEx}  {HebrewLetterEx}    )
-                     | {NumericEx}         ( ( {ExtendNumLetEx}* | {MidNumericEx} ) {NumericEx}         )*
-                     | {HebrewOrALetterEx} ( ( {ExtendNumLetEx}* | {MidLetterEx}  ) {HebrewOrALetterEx} )*
-                     )+
-                   )
-({ExtendNumLetEx}+ ( {KatakanaEx}          ( {ExtendNumLetEx}*   {KatakanaEx}                           )*
-                   | ( {HebrewLetterEx}    ( {SingleQuoteEx}     | {DoubleQuoteEx}  {HebrewLetterEx}    )
-                     | {NumericEx}         ( ( {ExtendNumLetEx}* | {MidNumericEx} ) {NumericEx}         )*
-                     | {HebrewOrALetterEx} ( ( {ExtendNumLetEx}* | {MidLetterEx}  ) {HebrewOrALetterEx} )*
-                     )+
-                   )
-)*
-{ExtendNumLetEx}* 
-  { return WORD_TYPE; }
-
-
-// From UAX #29:
-//
-//    [C]haracters with the Line_Break property values of Contingent_Break (CB), 
-//    Complex_Context (SA/South East Asian), and XX (Unknown) are assigned word 
-//    boundary property values based on criteria outside of the scope of this
-//    annex.  That means that satisfactory treatment of languages like Chinese
-//    or Thai requires special handling.
-// 
-// In Unicode 6.3, only one character has the \p{Line_Break = Contingent_Break}
-// property: U+FFFC ( ￼ ) OBJECT REPLACEMENT CHARACTER.
-//
-// In the ICU implementation of UAX#29, \p{Line_Break = Complex_Context}
-// character sequences (from South East Asian scripts like Thai, Myanmar, Khmer,
-// Lao, etc.) are kept together.  This grammar does the same below.
-//
-// See also the Unicode Line Breaking Algorithm:
-//
-//    http://www.unicode.org/reports/tr14/#SA
-//
-{ComplexContext}+ { return SOUTH_EAST_ASIAN_TYPE; }
-
-// UAX#29 WB14.  Any ÷ Any
-//
-{HanEx} { return IDEOGRAPHIC_TYPE; }
-{HiraganaEx} { return HIRAGANA_TYPE; }
-
-
-// UAX#29 WB3.   CR × LF
-//        WB3a.  (Newline | CR | LF) ÷
-//        WB3b.  ÷ (Newline | CR | LF)
-//        WB13c. Regional_Indicator × Regional_Indicator
-//        WB14.  Any ÷ Any
-//
-{RegionalIndicatorEx} {RegionalIndicatorEx}+ | [^]
-  { /* Break so we don't hit fall-through warning: */ break; /* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */ }
diff --git a/src/java/org/apache/cassandra/index/sasi/analyzer/StandardTokenizerInterface.java b/src/java/org/apache/cassandra/index/sasi/analyzer/StandardTokenizerInterface.java
deleted file mode 100644
index f8b6bf773e..0000000000
--- a/src/java/org/apache/cassandra/index/sasi/analyzer/StandardTokenizerInterface.java
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.cassandra.index.sasi.analyzer;
-
-import java.io.IOException;
-import java.io.Reader;
-
-/**
- * Internal interface for supporting versioned grammars.
- */
-public interface StandardTokenizerInterface
-{
-
-    String getText();
-
-    char[] getArray();
-
-    byte[] getBytes();
-
-    /**
-     * Returns the current position.
-     */
-    long yychar();
-
-    /**
-     * Returns the length of the matched text region.
-     */
-    int yylength();
-
-    /**
-     * Resumes scanning until the next regular expression is matched,
-     * the end of input is encountered or an I/O-Error occurs.
-     *
-     * @return      the next token, {@link #YYEOF} on end of stream
-     * @exception   java.io.IOException  if any I/O-Error occurs
-     */
-    int getNextToken() throws IOException;
-
-    /**
-     * Resets the scanner to read from a new input stream.
-     * Does not close the old reader.
-     *
-     * All internal variables are reset, the old input stream
-     * <b>cannot</b> be reused (internal buffer is discarded and lost).
-     * Lexical state is set to <tt>ZZ_INITIAL</tt>.
-     *
-     * @param reader   the new input stream
-     */
-    void yyreset(Reader reader);
-}
diff --git a/src/java/org/apache/cassandra/index/sasi/analyzer/StandardTokenizerOptions.java b/src/java/org/apache/cassandra/index/sasi/analyzer/StandardTokenizerOptions.java
deleted file mode 100644
index da44f0ad7b..0000000000
--- a/src/java/org/apache/cassandra/index/sasi/analyzer/StandardTokenizerOptions.java
+++ /dev/null
@@ -1,273 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.cassandra.index.sasi.analyzer;
-
-import java.util.Locale;
-import java.util.Map;
-
-/**
- * Various options for controlling tokenization and enabling
- * or disabling features
- */
-public class StandardTokenizerOptions
-{
-    public static final String TOKENIZATION_ENABLE_STEMMING = "tokenization_enable_stemming";
-    public static final String TOKENIZATION_SKIP_STOP_WORDS = "tokenization_skip_stop_words";
-    public static final String TOKENIZATION_LOCALE = "tokenization_locale";
-    public static final String TOKENIZATION_NORMALIZE_LOWERCASE = "tokenization_normalize_lowercase";
-    public static final String TOKENIZATION_NORMALIZE_UPPERCASE = "tokenization_normalize_uppercase";
-
-    public static final int DEFAULT_MAX_TOKEN_LENGTH = 255;
-    public static final int DEFAULT_MIN_TOKEN_LENGTH = 0;
-
-    private boolean stemTerms;
-    private boolean ignoreStopTerms;
-    private Locale locale;
-    private boolean caseSensitive;
-    private boolean allTermsToUpperCase;
-    private boolean allTermsToLowerCase;
-    private int minTokenLength;
-    private int maxTokenLength;
-
-    public boolean shouldStemTerms()
-    {
-        return stemTerms;
-    }
-
-    public void setStemTerms(boolean stemTerms)
-    {
-        this.stemTerms = stemTerms;
-    }
-
-    public boolean shouldIgnoreStopTerms()
-    {
-        return ignoreStopTerms;
-    }
-
-    public void setIgnoreStopTerms(boolean ignoreStopTerms)
-    {
-        this.ignoreStopTerms = ignoreStopTerms;
-    }
-
-    public Locale getLocale()
-    {
-        return locale;
-    }
-
-    public void setLocale(Locale locale)
-    {
-        this.locale = locale;
-    }
-
-    public boolean isCaseSensitive()
-    {
-        return caseSensitive;
-    }
-
-    public void setCaseSensitive(boolean caseSensitive)
-    {
-        this.caseSensitive = caseSensitive;
-    }
-
-    public boolean shouldUpperCaseTerms()
-    {
-        return allTermsToUpperCase;
-    }
-
-    public void setAllTermsToUpperCase(boolean allTermsToUpperCase)
-    {
-        this.allTermsToUpperCase = allTermsToUpperCase;
-    }
-
-    public boolean shouldLowerCaseTerms()
-    {
-        return allTermsToLowerCase;
-    }
-
-    public void setAllTermsToLowerCase(boolean allTermsToLowerCase)
-    {
-        this.allTermsToLowerCase = allTermsToLowerCase;
-    }
-
-    public int getMinTokenLength()
-    {
-        return minTokenLength;
-    }
-
-    public void setMinTokenLength(int minTokenLength)
-    {
-        this.minTokenLength = minTokenLength;
-    }
-
-    public int getMaxTokenLength()
-    {
-        return maxTokenLength;
-    }
-
-    public void setMaxTokenLength(int maxTokenLength)
-    {
-        this.maxTokenLength = maxTokenLength;
-    }
-
-    public static class OptionsBuilder 
-    {
-        private boolean stemTerms;
-        private boolean ignoreStopTerms;
-        private Locale locale;
-        private boolean caseSensitive;
-        private boolean allTermsToUpperCase;
-        private boolean allTermsToLowerCase;
-        private int minTokenLength = DEFAULT_MIN_TOKEN_LENGTH;
-        private int maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH;
-
-        public OptionsBuilder()
-        {
-        }
-
-        public OptionsBuilder stemTerms(boolean stemTerms)
-        {
-            this.stemTerms = stemTerms;
-            return this;
-        }
-
-        public OptionsBuilder ignoreStopTerms(boolean ignoreStopTerms)
-        {
-            this.ignoreStopTerms = ignoreStopTerms;
-            return this;
-        }
-
-        public OptionsBuilder useLocale(Locale locale)
-        {
-            this.locale = locale;
-            return this;
-        }
-
-        public OptionsBuilder caseSensitive(boolean caseSensitive)
-        {
-            this.caseSensitive = caseSensitive;
-            return this;
-        }
-
-        public OptionsBuilder alwaysUpperCaseTerms(boolean allTermsToUpperCase)
-        {
-            this.allTermsToUpperCase = allTermsToUpperCase;
-            return this;
-        }
-
-        public OptionsBuilder alwaysLowerCaseTerms(boolean allTermsToLowerCase)
-        {
-            this.allTermsToLowerCase = allTermsToLowerCase;
-            return this;
-        }
-
-        /**
-         * Set the min allowed token length.  Any token shorter
-         * than this is skipped.
-         */
-        public OptionsBuilder minTokenLength(int minTokenLength)
-        {
-            if (minTokenLength < 1)
-                throw new IllegalArgumentException("minTokenLength must be greater than zero");
-            this.minTokenLength = minTokenLength;
-            return this;
-        }
-
-        /**
-         * Set the max allowed token length.  Any token longer
-         * than this is skipped.
-         */
-        public OptionsBuilder maxTokenLength(int maxTokenLength)
-        {
-            if (maxTokenLength < 1)
-                throw new IllegalArgumentException("maxTokenLength must be greater than zero");
-            this.maxTokenLength = maxTokenLength;
-            return this;
-        }
-
-        public StandardTokenizerOptions build()
-        {
-            if(allTermsToLowerCase && allTermsToUpperCase)
-                throw new IllegalArgumentException("Options to normalize terms cannot be " +
-                        "both uppercase and lowercase at the same time");
-
-            StandardTokenizerOptions options = new StandardTokenizerOptions();
-            options.setIgnoreStopTerms(ignoreStopTerms);
-            options.setStemTerms(stemTerms);
-            options.setLocale(locale);
-            options.setCaseSensitive(caseSensitive);
-            options.setAllTermsToLowerCase(allTermsToLowerCase);
-            options.setAllTermsToUpperCase(allTermsToUpperCase);
-            options.setMinTokenLength(minTokenLength);
-            options.setMaxTokenLength(maxTokenLength);
-            return options;
-        }
-    }
-
-    public static StandardTokenizerOptions buildFromMap(Map<String, String> optionsMap)
-    {
-        OptionsBuilder optionsBuilder = new OptionsBuilder();
-
-        for (Map.Entry<String, String> entry : optionsMap.entrySet())
-        {
-            switch(entry.getKey())
-            {
-                case TOKENIZATION_ENABLE_STEMMING:
-                {
-                    boolean bool = Boolean.parseBoolean(entry.getValue());
-                    optionsBuilder = optionsBuilder.stemTerms(bool);
-                    break;
-                }
-                case TOKENIZATION_SKIP_STOP_WORDS:
-                {
-                    boolean bool = Boolean.parseBoolean(entry.getValue());
-                    optionsBuilder = optionsBuilder.ignoreStopTerms(bool);
-                    break;
-                }
-                case TOKENIZATION_LOCALE:
-                {
-                    Locale locale = new Locale(entry.getValue());
-                    optionsBuilder = optionsBuilder.useLocale(locale);
-                    break;
-                }
-                case TOKENIZATION_NORMALIZE_UPPERCASE:
-                {
-                    boolean bool = Boolean.parseBoolean(entry.getValue());
-                    optionsBuilder = optionsBuilder.alwaysUpperCaseTerms(bool);
-                    break;
-                }
-                case TOKENIZATION_NORMALIZE_LOWERCASE:
-                {
-                    boolean bool = Boolean.parseBoolean(entry.getValue());
-                    optionsBuilder = optionsBuilder.alwaysLowerCaseTerms(bool);
-                    break;
-                }
-                default:
-                {
-                }
-            }
-        }
-        return optionsBuilder.build();
-    }
-
-    public static StandardTokenizerOptions getDefaultOptions()
-    {
-        return new OptionsBuilder()
-                .ignoreStopTerms(true).alwaysLowerCaseTerms(true)
-                .stemTerms(false).useLocale(Locale.ENGLISH).build();
-    }
-}
diff --git a/src/java/org/apache/cassandra/index/sasi/analyzer/filter/StemmerFactory.java b/src/java/org/apache/cassandra/index/sasi/analyzer/filter/StemmerFactory.java
deleted file mode 100644
index ae232db21d..0000000000
--- a/src/java/org/apache/cassandra/index/sasi/analyzer/filter/StemmerFactory.java
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.cassandra.index.sasi.analyzer.filter;
-
-import java.lang.reflect.Constructor;
-import java.util.HashMap;
-import java.util.Locale;
-import java.util.Map;
-
-import org.tartarus.snowball.SnowballStemmer;
-import org.tartarus.snowball.ext.*;
-
-import com.google.common.cache.CacheBuilder;
-import com.google.common.cache.CacheLoader;
-import com.google.common.cache.LoadingCache;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * Returns a SnowballStemmer instance appropriate for
- * a given language
- */
-public class StemmerFactory
-{
-    private static final Logger logger = LoggerFactory.getLogger(StemmerFactory.class);
-    private static final LoadingCache<Class, Constructor<?>> STEMMER_CONSTRUCTOR_CACHE = CacheBuilder.newBuilder()
-            .build(new CacheLoader<Class, Constructor<?>>()
-            {
-                public Constructor<?> load(Class aClass) throws Exception
-                {
-                    try
-                    {
-                        return aClass.getConstructor();
-                    }
-                    catch (Exception e) 
-                    {
-                        logger.error("Failed to get stemmer constructor", e);
-                    }
-                    return null;
-                }
-            });
-
-    private static final Map<String, Class> SUPPORTED_LANGUAGES;
-
-    static
-    {
-        SUPPORTED_LANGUAGES = new HashMap<>();
-        SUPPORTED_LANGUAGES.put("de", germanStemmer.class);
-        SUPPORTED_LANGUAGES.put("da", danishStemmer.class);
-        SUPPORTED_LANGUAGES.put("es", spanishStemmer.class);
-        SUPPORTED_LANGUAGES.put("en", englishStemmer.class);
-        SUPPORTED_LANGUAGES.put("fl", finnishStemmer.class);
-        SUPPORTED_LANGUAGES.put("fr", frenchStemmer.class);
-        SUPPORTED_LANGUAGES.put("hu", hungarianStemmer.class);
-        SUPPORTED_LANGUAGES.put("it", italianStemmer.class);
-        SUPPORTED_LANGUAGES.put("nl", dutchStemmer.class);
-        SUPPORTED_LANGUAGES.put("no", norwegianStemmer.class);
-        SUPPORTED_LANGUAGES.put("pt", portugueseStemmer.class);
-        SUPPORTED_LANGUAGES.put("ro", romanianStemmer.class);
-        SUPPORTED_LANGUAGES.put("ru", russianStemmer.class);
-        SUPPORTED_LANGUAGES.put("sv", swedishStemmer.class);
-        SUPPORTED_LANGUAGES.put("tr", turkishStemmer.class);
-    }
-
-    public static SnowballStemmer getStemmer(Locale locale)
-    {
-        if (locale == null)
-            return null;
-
-        String rootLang = locale.getLanguage().substring(0, 2);
-        try
-        {
-            Class clazz = SUPPORTED_LANGUAGES.get(rootLang);
-            if(clazz == null)
-                return null;
-            Constructor<?> ctor = STEMMER_CONSTRUCTOR_CACHE.get(clazz);
-            return (SnowballStemmer) ctor.newInstance();
-        }
-        catch (Exception e)
-        {
-            logger.debug("Failed to create new SnowballStemmer instance " +
-                    "for language [{}]", locale.getLanguage(), e);
-        }
-        return null;
-    }
-}
diff --git a/src/java/org/apache/cassandra/index/sasi/analyzer/filter/StemmingFilters.java b/src/java/org/apache/cassandra/index/sasi/analyzer/filter/StemmingFilters.java
deleted file mode 100644
index cb840a8705..0000000000
--- a/src/java/org/apache/cassandra/index/sasi/analyzer/filter/StemmingFilters.java
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.cassandra.index.sasi.analyzer.filter;
-
-import java.util.Locale;
-
-import org.tartarus.snowball.SnowballStemmer;
-
-/**
- * Filters for performing Stemming on tokens
- */
-public class StemmingFilters
-{
-    public static class DefaultStemmingFilter extends FilterPipelineTask<String, String>
-    {
-        private SnowballStemmer stemmer;
-
-        public DefaultStemmingFilter(Locale locale)
-        {
-            stemmer = StemmerFactory.getStemmer(locale);
-        }
-
-        public String process(String input) throws Exception
-        {
-            if (input == null || stemmer == null)
-                return input;
-            stemmer.setCurrent(input);
-            return (stemmer.stem()) ? stemmer.getCurrent() : input;
-        }
-    }
-}
diff --git a/src/java/org/apache/cassandra/index/sasi/analyzer/filter/StopWordFactory.java b/src/java/org/apache/cassandra/index/sasi/analyzer/filter/StopWordFactory.java
deleted file mode 100644
index 8ec02e0053..0000000000
--- a/src/java/org/apache/cassandra/index/sasi/analyzer/filter/StopWordFactory.java
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.cassandra.index.sasi.analyzer.filter;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.nio.charset.StandardCharsets;
-import java.util.Arrays;
-import java.util.HashSet;
-import java.util.Locale;
-import java.util.Set;
-import java.util.concurrent.ExecutionException;
-
-import com.google.common.cache.CacheBuilder;
-import com.google.common.cache.CacheLoader;
-import com.google.common.cache.LoadingCache;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * Provides a list of Stop Words for a given language
- */
-public class StopWordFactory
-{
-    private static final Logger logger = LoggerFactory.getLogger(StopWordFactory.class);
-
-    private static final String DEFAULT_RESOURCE_EXT = "_ST.txt";
-    private static final String DEFAULT_RESOURCE_PREFIX = StopWordFactory.class.getPackage()
-            .getName().replace(".", File.separator);
-    private static final Set<String> SUPPORTED_LANGUAGES = new HashSet<>(
-            Arrays.asList("ar","bg","cs","de","en","es","fi","fr","hi","hu","it",
-            "pl","pt","ro","ru","sv"));
-
-    private static final LoadingCache<String, Set<String>> STOP_WORDS_CACHE = CacheBuilder.newBuilder()
-            .build(new CacheLoader<String, Set<String>>()
-            {
-                public Set<String> load(String s)
-                {
-                    return getStopWordsFromResource(s);
-                }
-            });
-
-    public static Set<String> getStopWordsForLanguage(Locale locale)
-    {
-        if (locale == null)
-            return null;
-
-        String rootLang = locale.getLanguage().substring(0, 2);
-        try
-        {
-            return (!SUPPORTED_LANGUAGES.contains(rootLang)) ? null : STOP_WORDS_CACHE.get(rootLang);
-        }
-        catch (ExecutionException e)
-        {
-            logger.error("Failed to populate Stop Words Cache for language [{}]", locale.getLanguage(), e);
-            return null;
-        }
-    }
-
-    private static Set<String> getStopWordsFromResource(String language)
-    {
-        Set<String> stopWords = new HashSet<>();
-        String resourceName = DEFAULT_RESOURCE_PREFIX + File.separator + language + DEFAULT_RESOURCE_EXT;
-        try (InputStream is = StopWordFactory.class.getClassLoader().getResourceAsStream(resourceName);
-             BufferedReader r = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8)))
-        {
-                String line;
-                while ((line = r.readLine()) != null)
-                {
-                    //skip comments (lines starting with # char)
-                    if(line.charAt(0) == '#')
-                        continue;
-                    stopWords.add(line.trim());
-                }
-        }
-        catch (Exception e)
-        {
-            logger.error("Failed to retrieve Stop Terms resource for language [{}]", language, e);
-        }
-        return stopWords;
-    }
-}
diff --git a/src/java/org/apache/cassandra/index/sasi/analyzer/filter/StopWordFilters.java b/src/java/org/apache/cassandra/index/sasi/analyzer/filter/StopWordFilters.java
deleted file mode 100644
index 4ae849c1f4..0000000000
--- a/src/java/org/apache/cassandra/index/sasi/analyzer/filter/StopWordFilters.java
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.cassandra.index.sasi.analyzer.filter;
-
-import java.util.Locale;
-import java.util.Set;
-
-/**
- * Filter implementations for input matching Stop Words
- */
-public class StopWordFilters
-{
-    public static class DefaultStopWordFilter extends FilterPipelineTask<String, String>
-    {
-        private Set<String> stopWords = null;
-
-        public DefaultStopWordFilter(Locale locale)
-        {
-            this.stopWords = StopWordFactory.getStopWordsForLanguage(locale);
-        }
-
-        public String process(String input) throws Exception
-        {
-            return (stopWords != null && stopWords.contains(input)) ? null : input;
-        }
-    }
-}
diff --git a/test/unit/org/apache/cassandra/index/sasi/SASIIndexTest.java b/test/unit/org/apache/cassandra/index/sasi/SASIIndexTest.java
index c82aafd99d..cb8c3f77d2 100644
--- a/test/unit/org/apache/cassandra/index/sasi/SASIIndexTest.java
+++ b/test/unit/org/apache/cassandra/index/sasi/SASIIndexTest.java
@@ -62,7 +62,6 @@
 import org.apache.cassandra.exceptions.ConfigurationException;
 import org.apache.cassandra.exceptions.InvalidRequestException;
 import org.apache.cassandra.index.sasi.analyzer.AbstractAnalyzer;
-import org.apache.cassandra.index.sasi.analyzer.DelimiterAnalyzer;
 import org.apache.cassandra.index.sasi.analyzer.NoOpAnalyzer;
 import org.apache.cassandra.index.sasi.analyzer.NonTokenizingAnalyzer;
 import org.apache.cassandra.index.sasi.conf.ColumnIndex;
@@ -2538,9 +2537,7 @@ public void testAnalyzerValidation()
 
         new HashMap<Class<? extends AbstractAnalyzer>, List<String>>()
         {{
-            put(StandardAnalyzer.class, textColumns);
             put(NonTokenizingAnalyzer.class, textColumns);
-            put(DelimiterAnalyzer.class, textColumns);
             put(NoOpAnalyzer.class, allColumns);
         }}
         .forEach((analyzer, supportedColumns) -> {
diff --git a/test/unit/org/apache/cassandra/index/sasi/analyzer/DelimiterAnalyzerTest.java b/test/unit/org/apache/cassandra/index/sasi/analyzer/DelimiterAnalyzerTest.java
deleted file mode 100644
index f5f007f855..0000000000
--- a/test/unit/org/apache/cassandra/index/sasi/analyzer/DelimiterAnalyzerTest.java
+++ /dev/null
@@ -1,317 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.cassandra.index.sasi.analyzer;
-
-import java.nio.ByteBuffer;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.List;
-
-import org.apache.cassandra.config.ColumnDefinition;
-import org.apache.cassandra.db.marshal.Int32Type;
-import org.apache.cassandra.db.marshal.SetType;
-import org.apache.cassandra.db.marshal.UTF8Type;
-import org.apache.cassandra.exceptions.ConfigurationException;
-import org.apache.cassandra.utils.ByteBufferUtil;
-import org.apache.commons.io.IOUtils;
-
-import org.junit.Assert;
-import org.junit.Test;
-
-import static org.junit.Assert.assertEquals;
-
-public class DelimiterAnalyzerTest
-{
-
-    @Test
-    public void caseSensitiveAnalizer() throws Exception
-    {
-        DelimiterAnalyzer analyzer = new DelimiterAnalyzer();
-
-        analyzer.init(
-            new HashMap()
-                {{
-                    put(DelimiterTokenizingOptions.DELIMITER, " ");
-                }},
-            UTF8Type.instance);
-
-        String testString = "Nip it in the bud";
-        ByteBuffer toAnalyze = ByteBuffer.wrap(testString.getBytes());
-        analyzer.reset(toAnalyze);
-        StringBuilder output = new StringBuilder();
-        while (analyzer.hasNext())
-            output.append(ByteBufferUtil.string(analyzer.next()) + (analyzer.hasNext() ? ' ' : ""));
-
-        Assert.assertEquals(testString, output.toString());
-        Assert.assertFalse(testString.toLowerCase().equals(output.toString()));
-    }
-
-    @Test
-    public void testBlankEntries() throws Exception
-    {
-        DelimiterAnalyzer analyzer = new DelimiterAnalyzer();
-
-        analyzer.init(
-            new HashMap()
-                {{
-                    put(DelimiterTokenizingOptions.DELIMITER, ",");
-                }},
-            UTF8Type.instance);
-
-        String testString = ",Nip,,,,it,,,in,,the,bud,,,";
-        ByteBuffer toAnalyze = ByteBuffer.wrap(testString.getBytes());
-        analyzer.reset(toAnalyze);
-        StringBuilder output = new StringBuilder();
-        while (analyzer.hasNext())
-            output.append(ByteBufferUtil.string(analyzer.next()) + (analyzer.hasNext() ? ',' : ""));
-
-        Assert.assertEquals("Nip,it,in,the,bud", output.toString());
-        Assert.assertFalse(testString.toLowerCase().equals(output.toString()));
-    }
-
-    @Test(expected = ConfigurationException.class)
-    public void ensureIncompatibleInputOnCollectionTypeSkipped()
-    {
-        new DelimiterAnalyzer().validate(Collections.emptyMap(),
-                                         ColumnDefinition.regularDef("a", "b", "c", SetType.getInstance(UTF8Type.instance, true)));
-    }
-
-    @Test(expected = ConfigurationException.class)
-    public void ensureIncompatibleInputSkipped()
-    {
-        new DelimiterAnalyzer().validate(Collections.emptyMap(),
-                                         ColumnDefinition.regularDef("a", "b", "c", Int32Type.instance));
-    }
-
-    @Test
-    public void testTokenizationLoremIpsum() throws Exception
-    {
-        ByteBuffer bb = ByteBuffer.wrap(IOUtils.toByteArray(
-                DelimiterAnalyzerTest.class.getClassLoader().getResourceAsStream("tokenization/lorem_ipsum.txt")));
-
-        DelimiterAnalyzer tokenizer = new DelimiterAnalyzer();
-
-        tokenizer.init(
-            new HashMap()
-                {{
-                    put(DelimiterTokenizingOptions.DELIMITER, " ");
-                }},
-            UTF8Type.instance);
-
-        List<ByteBuffer> tokens = new ArrayList<>();
-        tokenizer.reset(bb);
-        while (tokenizer.hasNext())
-            tokens.add(tokenizer.next());
-
-        assertEquals(69, tokens.size());
-
-    }
-
-    @Test
-    public void testTokenizationJaJp1() throws Exception
-    {
-        ByteBuffer bb = ByteBuffer.wrap(IOUtils.toByteArray(
-                DelimiterAnalyzerTest.class.getClassLoader().getResourceAsStream("tokenization/ja_jp_1.txt")));
-
-        DelimiterAnalyzer tokenizer = new DelimiterAnalyzer();
-
-        tokenizer.init(
-            new HashMap()
-                {{
-                    put(DelimiterTokenizingOptions.DELIMITER, "。");
-                }},
-            UTF8Type.instance);
-
-        tokenizer.reset(bb);
-        List<ByteBuffer> tokens = new ArrayList<>();
-        while (tokenizer.hasNext())
-            tokens.add(tokenizer.next());
-
-        assertEquals(4, tokens.size());
-    }
-
-    @Test
-    public void testTokenizationJaJp2() throws Exception
-    {
-        ByteBuffer bb = ByteBuffer.wrap(IOUtils.toByteArray(
-                DelimiterAnalyzerTest.class.getClassLoader().getResourceAsStream("tokenization/ja_jp_2.txt")));
-
-        DelimiterAnalyzer tokenizer = new DelimiterAnalyzer();
-
-        tokenizer.init(
-            new HashMap()
-                {{
-                    put(DelimiterTokenizingOptions.DELIMITER, "。");
-                }},
-            UTF8Type.instance);
-
-        tokenizer.reset(bb);
-        List<ByteBuffer> tokens = new ArrayList<>();
-        while (tokenizer.hasNext())
-            tokens.add(tokenizer.next());
-
-        assertEquals(2, tokens.size());
-    }
-
-    @Test
-    public void testTokenizationRuRu1() throws Exception
-    {
-        ByteBuffer bb = ByteBuffer.wrap(IOUtils.toByteArray(
-                DelimiterAnalyzerTest.class.getClassLoader().getResourceAsStream("tokenization/ru_ru_1.txt")));
-
-        DelimiterAnalyzer tokenizer = new DelimiterAnalyzer();
-
-        tokenizer.init(
-            new HashMap()
-                {{
-                    put(DelimiterTokenizingOptions.DELIMITER, " ");
-                }},
-            UTF8Type.instance);
-
-        List<ByteBuffer> tokens = new ArrayList<>();
-        tokenizer.reset(bb);
-        while (tokenizer.hasNext())
-            tokens.add(tokenizer.next());
-
-        assertEquals(447, tokens.size());
-    }
-
-    @Test
-    public void testTokenizationZnTw1() throws Exception
-    {
-        ByteBuffer bb = ByteBuffer.wrap(IOUtils.toByteArray(
-                DelimiterAnalyzerTest.class.getClassLoader().getResourceAsStream("tokenization/zn_tw_1.txt")));
-
-        DelimiterAnalyzer tokenizer = new DelimiterAnalyzer();
-
-        tokenizer.init(
-            new HashMap()
-                {{
-                    put(DelimiterTokenizingOptions.DELIMITER, " ");
-                }},
-            UTF8Type.instance);
-
-        List<ByteBuffer> tokens = new ArrayList<>();
-        tokenizer.reset(bb);
-        while (tokenizer.hasNext())
-            tokens.add(tokenizer.next());
-
-        assertEquals(403, tokens.size());
-    }
-
-    @Test
-    public void testTokenizationAdventuresOfHuckFinn() throws Exception
-    {
-        ByteBuffer bb = ByteBuffer.wrap(IOUtils.toByteArray(
-                DelimiterAnalyzerTest.class.getClassLoader().getResourceAsStream("tokenization/adventures_of_huckleberry_finn_mark_twain.txt")));
-
-        DelimiterAnalyzer tokenizer = new DelimiterAnalyzer();
-
-        tokenizer.init(
-            new HashMap()
-                {{
-                    put(DelimiterTokenizingOptions.DELIMITER, " ");
-                }},
-            UTF8Type.instance);
-
-        List<ByteBuffer> tokens = new ArrayList<>();
-        tokenizer.reset(bb);
-        while (tokenizer.hasNext())
-            tokens.add(tokenizer.next());
-
-        assertEquals(104594, tokens.size());
-    }
-
-    @Test
-    public void testWorldCities() throws Exception
-    {
-        ByteBuffer bb = ByteBuffer.wrap(IOUtils.toByteArray(
-                DelimiterAnalyzerTest.class.getClassLoader().getResourceAsStream("tokenization/world_cities_a.csv")));
-
-        DelimiterAnalyzer tokenizer = new DelimiterAnalyzer();
-
-        tokenizer.init(
-            new HashMap()
-                {{
-                    put(DelimiterTokenizingOptions.DELIMITER, ",");
-                }},
-            UTF8Type.instance);
-
-        List<ByteBuffer> tokens = new ArrayList<>();
-        tokenizer.reset(bb);
-        while (tokenizer.hasNext())
-            tokens.add(tokenizer.next());
-
-        assertEquals(122265, tokens.size());
-    }
-
-    @Test
-    public void tokenizeDomainNamesAndUrls() throws Exception
-    {
-        ByteBuffer bb = ByteBuffer.wrap(IOUtils.toByteArray(
-                DelimiterAnalyzerTest.class.getClassLoader().getResourceAsStream("tokenization/top_visited_domains.txt")));
-
-        DelimiterAnalyzer tokenizer = new DelimiterAnalyzer();
-
-        tokenizer.init(
-            new HashMap()
-                {{
-                    put(DelimiterTokenizingOptions.DELIMITER, " ");
-                }},
-            UTF8Type.instance);
-
-        tokenizer.reset(bb);
-
-        List<ByteBuffer> tokens = new ArrayList<>();
-        while (tokenizer.hasNext())
-            tokens.add(tokenizer.next());
-
-        assertEquals(12, tokens.size());
-    }
-
-    @Test
-    public void testReuseAndResetTokenizerInstance() throws Exception
-    {
-        List<ByteBuffer> bbToTokenize = new ArrayList<>();
-        bbToTokenize.add(ByteBuffer.wrap("Nip it in the bud".getBytes()));
-        bbToTokenize.add(ByteBuffer.wrap("I couldn’t care less".getBytes()));
-        bbToTokenize.add(ByteBuffer.wrap("One and the same".getBytes()));
-        bbToTokenize.add(ByteBuffer.wrap("The squeaky wheel gets the grease.".getBytes()));
-        bbToTokenize.add(ByteBuffer.wrap("The pen is mightier than the sword.".getBytes()));
-
-        DelimiterAnalyzer tokenizer = new DelimiterAnalyzer();
-
-        tokenizer.init(
-            new HashMap()
-                {{
-                    put(DelimiterTokenizingOptions.DELIMITER, " ");
-                }},
-            UTF8Type.instance);
-
-        List<ByteBuffer> tokens = new ArrayList<>();
-        for (ByteBuffer bb : bbToTokenize)
-        {
-            tokenizer.reset(bb);
-            while (tokenizer.hasNext())
-                tokens.add(tokenizer.next());
-        }
-        assertEquals(26, tokens.size());
-    }
-
-}
diff --git a/test/unit/org/apache/cassandra/index/sasi/analyzer/StandardAnalyzerTest.java b/test/unit/org/apache/cassandra/index/sasi/analyzer/StandardAnalyzerTest.java
deleted file mode 100644
index 7a88a3dc9f..0000000000
--- a/test/unit/org/apache/cassandra/index/sasi/analyzer/StandardAnalyzerTest.java
+++ /dev/null
@@ -1,227 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.cassandra.index.sasi.analyzer;
-
-import java.io.InputStream;
-import java.nio.ByteBuffer;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Locale;
-
-import org.junit.Test;
-
-import org.apache.cassandra.serializers.UTF8Serializer;
-
-import static org.junit.Assert.assertEquals;
-
-public class StandardAnalyzerTest
-{
-    @Test
-    public void testTokenizationAscii() throws Exception
-    {
-        InputStream is = StandardAnalyzerTest.class.getClassLoader()
-                .getResourceAsStream("tokenization/apache_license_header.txt");
-
-        StandardTokenizerOptions options = new StandardTokenizerOptions.OptionsBuilder()
-                .maxTokenLength(5).build();
-        StandardAnalyzer tokenizer = new StandardAnalyzer();
-        tokenizer.init(options);
-
-        List<ByteBuffer> tokens = new ArrayList<>();
-        tokenizer.reset(is);
-        while (tokenizer.hasNext())
-            tokens.add(tokenizer.next());
-
-        assertEquals(67, tokens.size());
-    }
-
-    @Test
-    public void testTokenizationLoremIpsum() throws Exception
-    {
-        InputStream is = StandardAnalyzerTest.class.getClassLoader()
-                .getResourceAsStream("tokenization/lorem_ipsum.txt");
-
-        StandardAnalyzer tokenizer = new StandardAnalyzer();
-        tokenizer.init(StandardTokenizerOptions.getDefaultOptions());
-
-        List<ByteBuffer> tokens = new ArrayList<>();
-        tokenizer.reset(is);
-        while (tokenizer.hasNext())
-            tokens.add(tokenizer.next());
-
-        assertEquals(62, tokens.size());
-
-    }
-
-    @Test
-    public void testTokenizationJaJp1() throws Exception
-    {
-        InputStream is = StandardAnalyzerTest.class.getClassLoader()
-                .getResourceAsStream("tokenization/ja_jp_1.txt");
-
-        StandardAnalyzer tokenizer = new StandardAnalyzer();
-        tokenizer.init(StandardTokenizerOptions.getDefaultOptions());
-
-        tokenizer.reset(is);
-        List<ByteBuffer> tokens = new ArrayList<>();
-        while (tokenizer.hasNext())
-            tokens.add(tokenizer.next());
-
-        assertEquals(210, tokens.size());
-    }
-
-    @Test
-    public void testTokenizationJaJp2() throws Exception
-    {
-        InputStream is = StandardAnalyzerTest.class.getClassLoader()
-                .getResourceAsStream("tokenization/ja_jp_2.txt");
-
-        StandardTokenizerOptions options = new StandardTokenizerOptions.OptionsBuilder().stemTerms(true)
-                .ignoreStopTerms(true).alwaysLowerCaseTerms(true).build();
-        StandardAnalyzer tokenizer = new StandardAnalyzer();
-        tokenizer.init(options);
-
-        tokenizer.reset(is);
-        List<ByteBuffer> tokens = new ArrayList<>();
-        while (tokenizer.hasNext())
-            tokens.add(tokenizer.next());
-
-        assertEquals(57, tokens.size());
-    }
-
-    @Test
-    public void testTokenizationRuRu1() throws Exception
-    {
-        InputStream is = StandardAnalyzerTest.class.getClassLoader()
-                .getResourceAsStream("tokenization/ru_ru_1.txt");
-        StandardAnalyzer tokenizer = new StandardAnalyzer();
-        tokenizer.init(StandardTokenizerOptions.getDefaultOptions());
-
-        List<ByteBuffer> tokens = new ArrayList<>();
-        tokenizer.reset(is);
-        while (tokenizer.hasNext())
-            tokens.add(tokenizer.next());
-
-        assertEquals(456, tokens.size());
-    }
-
-    @Test
-    public void testTokenizationZnTw1() throws Exception
-    {
-        InputStream is = StandardAnalyzerTest.class.getClassLoader()
-                .getResourceAsStream("tokenization/zn_tw_1.txt");
-        StandardAnalyzer tokenizer = new StandardAnalyzer();
-        tokenizer.init(StandardTokenizerOptions.getDefaultOptions());
-
-        List<ByteBuffer> tokens = new ArrayList<>();
-        tokenizer.reset(is);
-        while (tokenizer.hasNext())
-            tokens.add(tokenizer.next());
-
-        assertEquals(963, tokens.size());
-    }
-
-    @Test
-    public void testTokenizationAdventuresOfHuckFinn() throws Exception
-    {
-        InputStream is = StandardAnalyzerTest.class.getClassLoader()
-                .getResourceAsStream("tokenization/adventures_of_huckleberry_finn_mark_twain.txt");
-
-        StandardTokenizerOptions options = new StandardTokenizerOptions.OptionsBuilder().stemTerms(true)
-                .ignoreStopTerms(true).useLocale(Locale.ENGLISH)
-                .alwaysLowerCaseTerms(true).build();
-        StandardAnalyzer tokenizer = new StandardAnalyzer();
-        tokenizer.init(options);
-
-        List<ByteBuffer> tokens = new ArrayList<>();
-        tokenizer.reset(is);
-        while (tokenizer.hasNext())
-            tokens.add(tokenizer.next());
-
-        assertEquals(37739, tokens.size());
-    }
-
-    @Test
-    public void testSkipStopWordBeforeStemmingFrench() throws Exception
-    {
-        InputStream is = StandardAnalyzerTest.class.getClassLoader()
-               .getResourceAsStream("tokenization/french_skip_stop_words_before_stemming.txt");
-
-        StandardTokenizerOptions options = new StandardTokenizerOptions.OptionsBuilder().stemTerms(true)
-                .ignoreStopTerms(true).useLocale(Locale.FRENCH)
-                .alwaysLowerCaseTerms(true).build();
-        StandardAnalyzer tokenizer = new StandardAnalyzer();
-        tokenizer.init(options);
-
-        List<ByteBuffer> tokens = new ArrayList<>();
-        List<String> words = new ArrayList<>();
-        tokenizer.reset(is);
-        while (tokenizer.hasNext())
-        {
-            final ByteBuffer nextToken = tokenizer.next();
-            tokens.add(nextToken);
-            words.add(UTF8Serializer.instance.deserialize(nextToken.duplicate()));
-        }
-
-        assertEquals(4, tokens.size());
-        assertEquals("dans", words.get(0));
-        assertEquals("plui", words.get(1));
-        assertEquals("chanson", words.get(2));
-        assertEquals("connu", words.get(3));
-    }
-
-    @Test
-    public void tokenizeDomainNamesAndUrls() throws Exception
-    {
-        InputStream is = StandardAnalyzerTest.class.getClassLoader()
-                .getResourceAsStream("tokenization/top_visited_domains.txt");
-
-        StandardAnalyzer tokenizer = new StandardAnalyzer();
-        tokenizer.init(StandardTokenizerOptions.getDefaultOptions());
-        tokenizer.reset(is);
-
-        List<ByteBuffer> tokens = new ArrayList<>();
-        while (tokenizer.hasNext())
-            tokens.add(tokenizer.next());
-
-        assertEquals(15, tokens.size());
-    }
-
-    @Test
-    public void testReuseAndResetTokenizerInstance() throws Exception
-    {
-        List<ByteBuffer> bbToTokenize = new ArrayList<>();
-        bbToTokenize.add(ByteBuffer.wrap("Nip it in the bud".getBytes()));
-        bbToTokenize.add(ByteBuffer.wrap("I couldn’t care less".getBytes()));
-        bbToTokenize.add(ByteBuffer.wrap("One and the same".getBytes()));
-        bbToTokenize.add(ByteBuffer.wrap("The squeaky wheel gets the grease.".getBytes()));
-        bbToTokenize.add(ByteBuffer.wrap("The pen is mightier than the sword.".getBytes()));
-
-        StandardAnalyzer tokenizer = new StandardAnalyzer();
-        tokenizer.init(StandardTokenizerOptions.getDefaultOptions());
-
-        List<ByteBuffer> tokens = new ArrayList<>();
-        for (ByteBuffer bb : bbToTokenize)
-        {
-            tokenizer.reset(bb);
-            while (tokenizer.hasNext())
-                tokens.add(tokenizer.next());
-        }
-        assertEquals(10, tokens.size());
-    }
-}