From 6bbea7c5cae29ecf63bdd3b6851e4be6b5f74181 Mon Sep 17 00:00:00 2001
From: Andrew Valencik <andrew.valencik@gmail.com>
Date: Thu, 20 Oct 2022 17:25:35 -0400
Subject: [PATCH 1/2] Add toSet helper, move scaladoc to abstract method

---
 .../textmogrify/lucene/AnalyzerBuilder.scala  | 66 +++++--------------
 1 file changed, 16 insertions(+), 50 deletions(-)

diff --git a/lucene/src/main/scala/textmogrify/lucene/AnalyzerBuilder.scala b/lucene/src/main/scala/textmogrify/lucene/AnalyzerBuilder.scala
index 7a6bd07..a5fc775 100644
--- a/lucene/src/main/scala/textmogrify/lucene/AnalyzerBuilder.scala
+++ b/lucene/src/main/scala/textmogrify/lucene/AnalyzerBuilder.scala
@@ -72,7 +72,15 @@ object Config {
 sealed abstract class AnalyzerBuilder private[lucene] (config: Config) {
   type Builder <: AnalyzerBuilder
 
+  private[lucene] def toSet(cs: CharArraySet): Set[String] =
+    cs.asScala.map(ca => String.valueOf(ca.asInstanceOf[Array[Char]])).toSet
+
+  /** A convenience value for debugging or investigating, to inspect the Lucene default stop words.
+    * This set is immutable, and unused; it is the underlying Lucene `CharArraySet` that we use to
+    * build the default StopFilter
+    */
   def defaultStopWords: Set[String]
+
   def withConfig(config: Config): Builder
 
   /** Adds a lowercasing stage to the analyzer pipeline */
@@ -194,12 +202,7 @@ final class EnglishAnalyzerBuilder private[lucene] (
   def withConfig(newConfig: Config): EnglishAnalyzerBuilder =
     copy(newConfig = newConfig)
 
-  /** A convenience value for debugging or investigating, to inspect the Lucene default stop words.
-    * This set is immutable, and unused; it is the underlying Lucene `CharArraySet` that we use to
-    * build the default StopFilter
-    */
-  lazy val defaultStopWords: Set[String] =
-    getEnglishStopSet().asScala.map(ca => String.valueOf(ca.asInstanceOf[Array[Char]])).toSet
+  lazy val defaultStopWords: Set[String] = toSet(getEnglishStopSet())
 
   /** Adds the Porter Stemmer to the end of the analyzer pipeline and enables lowercasing.
     * Stemming reduces words like `jumping` and `jumps` to their root word `jump`.
@@ -231,12 +234,7 @@ final class FrenchAnalyzerBuilder private[lucene] (
   def withConfig(newConfig: Config): FrenchAnalyzerBuilder =
     copy(newConfig = newConfig)
 
-  /** A convenience value for debugging or investigating, to inspect the Lucene default stop words.
-    * This set is immutable, and unused; it is the underlying Lucene `CharArraySet` that we use to
-    * build the default StopFilter
-    */
-  lazy val defaultStopWords: Set[String] =
-    getFrenchStopSet().asScala.map(ca => String.valueOf(ca.asInstanceOf[Array[Char]])).toSet
+  lazy val defaultStopWords: Set[String] = toSet(getFrenchStopSet())
 
   /** Adds the FrenchLight Stemmer to the end of the analyzer pipeline and enables lowercasing.
     * Stemming reduces words like `jumping` and `jumps` to their root word `jump`.
@@ -267,12 +265,7 @@ final class SpanishAnalyzerBuilder private[lucene] (
   def withConfig(newConfig: Config): SpanishAnalyzerBuilder =
     copy(newConfig = newConfig)
 
-  /** A convenience value for debugging or investigating, to inspect the Lucene default stop words.
-    * This set is immutable, and unused; it is the underlying Lucene `CharArraySet` that we use to
-    * build the default StopFilter
-    */
-  lazy val defaultStopWords: Set[String] =
-    getSpanishStopSet().asScala.map(ca => String.valueOf(ca.asInstanceOf[Array[Char]])).toSet
+  lazy val defaultStopWords: Set[String] = toSet(getSpanishStopSet())
 
   /** Adds the SpanishLight Stemmer to the end of the analyzer pipeline and enables lowercasing.
     * Stemming reduces words like `jumping` and `jumps` to their root word `jump`.
@@ -303,12 +296,7 @@ final class ItalianAnalyzerBuilder private[lucene] (
   def withConfig(newConfig: Config): ItalianAnalyzerBuilder =
     copy(newConfig = newConfig)
 
-  /** A convenience value for debugging or investigating, to inspect the Lucene default stop words.
-    * This set is immutable, and unused; it is the underlying Lucene `CharArraySet` that we use to
-    * build the default StopFilter
-    */
-  lazy val defaultStopWords: Set[String] =
-    getItalianStopSet().asScala.map(ca => String.valueOf(ca.asInstanceOf[Array[Char]])).toSet
+  lazy val defaultStopWords: Set[String] = toSet(getItalianStopSet())
 
   /** Adds the ItalianLight Stemmer to the end of the analyzer pipeline and enables lowercasing.
     * Stemming reduces words like `jumping` and `jumps` to their root word `jump`.
@@ -339,12 +327,7 @@ final class GermanAnalyzerBuilder private[lucene] (
   def withConfig(newConfig: Config): GermanAnalyzerBuilder =
     copy(newConfig = newConfig)
 
-  /** A convenience value for debugging or investigating, to inspect the Lucene default stop words.
-    * This set is immutable, and unused; it is the underlying Lucene `CharArraySet` that we use to
-    * build the default StopFilter
-    */
-  lazy val defaultStopWords: Set[String] =
-    getGermanStopSet().asScala.map(ca => String.valueOf(ca.asInstanceOf[Array[Char]])).toSet
+  lazy val defaultStopWords: Set[String] = toSet(getGermanStopSet())
 
   /** Adds the GermanLight Stemmer to the end of the analyzer pipeline and enables lowercasing.
     * Stemming reduces words like `jumping` and `jumps` to their root word `jump`.
@@ -375,12 +358,7 @@ final class DutchAnalyzerBuilder private[lucene] (
   def withConfig(newConfig: Config): DutchAnalyzerBuilder =
     copy(newConfig = newConfig)
 
-  /** A convenience value for debugging or investigating, to inspect the Lucene default stop words.
-    * This set is immutable, and unused; it is the underlying Lucene `CharArraySet` that we use to
-    * build the default StopFilter
-    */
-  lazy val defaultStopWords: Set[String] =
-    getDutchStopSet().asScala.map(ca => String.valueOf(ca.asInstanceOf[Array[Char]])).toSet
+  lazy val defaultStopWords: Set[String] = toSet(getDutchStopSet())
 
   /** Adds the Dutch Snowball Stemmer to the end of the analyzer pipeline and enables lowercasing.
     * Stemming reduces words like `jumping` and `jumps` to their root word `jump`.
@@ -413,12 +391,7 @@ final class PortugueseAnalyzerBuilder private[lucene] (
   def withConfig(newConfig: Config): PortugueseAnalyzerBuilder =
     copy(newConfig = newConfig)
 
-  /** A convenience value for debugging or investigating, to inspect the Lucene default stop words.
-    * This set is immutable, and unused; it is the underlying Lucene `CharArraySet` that we use to
-    * build the default StopFilter
-    */
-  lazy val defaultStopWords: Set[String] =
-    getPortugueseStopSet().asScala.map(ca => String.valueOf(ca.asInstanceOf[Array[Char]])).toSet
+  lazy val defaultStopWords: Set[String] = toSet(getPortugueseStopSet())
 
   /** Adds the PortugueseLight Stemmer to the end of the analyzer pipeline and enables lowercasing.
     * Stemming reduces words like `jumping` and `jumps` to their root word `jump`.
@@ -450,14 +423,7 @@ final class BrazilianPortugueseAnalyzerBuilder private[lucene] (
   def withConfig(newConfig: Config): BrazilianPortugueseAnalyzerBuilder =
     copy(newConfig = newConfig)
 
-  /** A convenience value for debugging or investigating, to inspect the Lucene default stop words.
-    * This set is immutable, and unused; it is the underlying Lucene `CharArraySet` that we use to
-    * build the default StopFilter
-    */
-  lazy val defaultStopWords: Set[String] =
-    getBrazilianPortugueseStopSet().asScala
-      .map(ca => String.valueOf(ca.asInstanceOf[Array[Char]]))
-      .toSet
+  lazy val defaultStopWords: Set[String] = toSet(getBrazilianPortugueseStopSet())
 
   /** Adds the Brazilian Stemmer to the end of the analyzer pipeline and enables lowercasing.
     * Stemming reduces words like `jumping` and `jumps` to their root word `jump`.

From 78b6978676496848c2589e00172c5a38d9f42c4a Mon Sep 17 00:00:00 2001
From: Andrew Valencik <andrew.valencik@gmail.com>
Date: Thu, 20 Oct 2022 17:26:33 -0400
Subject: [PATCH 2/2] Don't need lazy val in DefaultAnalyzerBuilder

---
 lucene/src/main/scala/textmogrify/lucene/AnalyzerBuilder.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lucene/src/main/scala/textmogrify/lucene/AnalyzerBuilder.scala b/lucene/src/main/scala/textmogrify/lucene/AnalyzerBuilder.scala
index a5fc775..4f7e69b 100644
--- a/lucene/src/main/scala/textmogrify/lucene/AnalyzerBuilder.scala
+++ b/lucene/src/main/scala/textmogrify/lucene/AnalyzerBuilder.scala
@@ -154,7 +154,7 @@ final class DefaultAnalyzerBuilder private[lucene] (config: Config)
     extends AnalyzerBuilder(config) { self =>
   type Builder = DefaultAnalyzerBuilder
 
-  lazy val defaultStopWords: Set[String] = Set.empty
+  val defaultStopWords: Set[String] = Set.empty
 
   def withConfig(newConfig: Config): DefaultAnalyzerBuilder =
     new DefaultAnalyzerBuilder(newConfig)