From b3ae1658347d2323d3cb4293c622a542fe70e090 Mon Sep 17 00:00:00 2001 From: Hiroshi Miura Date: Tue, 6 Aug 2024 16:23:46 +0900 Subject: [PATCH] fix: update for migrating lucene 8.11.3 Signed-off-by: Hiroshi Miura --- .../languagemodel/LuceneSingleIndexLanguageModel.java | 10 +++++----- .../languagetool/dev/HomophoneOccurrenceDumper.java | 6 ++---- .../main/java/org/languagetool/dev/index/Searcher.java | 2 +- 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/languagetool-core/src/main/java/org/languagetool/languagemodel/LuceneSingleIndexLanguageModel.java b/languagetool-core/src/main/java/org/languagetool/languagemodel/LuceneSingleIndexLanguageModel.java index 0b0bf65fe165..77bf868ee913 100644 --- a/languagetool-core/src/main/java/org/languagetool/languagemodel/LuceneSingleIndexLanguageModel.java +++ b/languagetool-core/src/main/java/org/languagetool/languagemodel/LuceneSingleIndexLanguageModel.java @@ -146,10 +146,10 @@ public long getTotalTokenCount() { try { RegexpQuery query = new RegexpQuery(new Term("totalTokenCount", ".*")); TopDocs docs = luceneSearcher.searcher.search(query, 1000); // Integer.MAX_VALUE might cause OOE on wrong index - if (docs.totalHits == 0) { + if (docs.totalHits.value == 0) { throw new RuntimeException("Expected 'totalTokenCount' meta documents not found in 1grams index: " + luceneSearcher.directory); - } else if (docs.totalHits > 1000) { - throw new RuntimeException("Did not expect more than 1000 'totalTokenCount' meta documents: " + docs.totalHits + " in " + luceneSearcher.directory); + } else if (docs.totalHits.value > 1000) { + throw new RuntimeException("Did not expect more than 1000 'totalTokenCount' meta documents: " + docs.totalHits.value + " in " + luceneSearcher.directory); } else { long result = 0; for (ScoreDoc scoreDoc : docs.scoreDocs) { @@ -194,9 +194,9 @@ private long getCount(Term term, LuceneSearcher luceneSearcher) { long result = 0; try { TopDocs docs = luceneSearcher.searcher.search(new TermQuery(term), 2000); - if (docs.totalHits > 2000) { + if (docs.totalHits.value > 2000) { throw new RuntimeException("More than 2000 matches for '" + term + "' not supported for performance reasons: " + - docs.totalHits + " matches in " + luceneSearcher.directory); + docs.totalHits.value + " matches in " + luceneSearcher.directory); } for (ScoreDoc scoreDoc : docs.scoreDocs) { String countStr = luceneSearcher.reader.document(scoreDoc.doc).get("count"); diff --git a/languagetool-standalone/src/main/java/org/languagetool/dev/HomophoneOccurrenceDumper.java b/languagetool-standalone/src/main/java/org/languagetool/dev/HomophoneOccurrenceDumper.java index 2b4d22c4c684..3fa2ff50743a 100644 --- a/languagetool-standalone/src/main/java/org/languagetool/dev/HomophoneOccurrenceDumper.java +++ b/languagetool-standalone/src/main/java/org/languagetool/dev/HomophoneOccurrenceDumper.java @@ -18,8 +18,7 @@ */ package org.languagetool.dev; -import org.apache.lucene.index.Fields; -import org.apache.lucene.index.MultiFields; +import org.apache.lucene.index.MultiTerms; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.util.BytesRef; @@ -112,8 +111,7 @@ private void dumpOccurrences(Set tokens) throws IOException { private TermsEnum getIterator() throws IOException { LuceneSearcher luceneSearcher = getLuceneSearcher(3); - Fields fields = MultiFields.getFields(luceneSearcher.getReader()); - Terms terms = fields.terms("ngram"); + Terms terms = MultiTerms.getTerms(luceneSearcher.getReader(), "ngram"); return terms.iterator(); } diff --git a/languagetool-wikipedia/src/main/java/org/languagetool/dev/index/Searcher.java b/languagetool-wikipedia/src/main/java/org/languagetool/dev/index/Searcher.java index 8564e197a446..8bc871e3cd57 100644 --- a/languagetool-wikipedia/src/main/java/org/languagetool/dev/index/Searcher.java +++ b/languagetool-wikipedia/src/main/java/org/languagetool/dev/index/Searcher.java @@ -201,7 +201,7 @@ public SearcherResult findRuleMatchesOnIndex(PatternRule rule, Language language } private PossiblyLimitedTopDocs getTopDocs(Query query) throws IOException { - TopScoreDocCollector topCollector = TopScoreDocCollector.create(maxHits); + TopScoreDocCollector topCollector = TopScoreDocCollector.create(maxHits, Integer.MAX_VALUE); Counter clock = Counter.newCounter(true); int waitMillis = 1000; // TODO: if we interrupt the whole thread anyway, do we still need the TimeLimitingCollector?