diff --git a/src/converter/converter_test.cc b/src/converter/converter_test.cc index 7a74b0e7b..deb8040f9 100644 --- a/src/converter/converter_test.cc +++ b/src/converter/converter_test.cc @@ -2369,4 +2369,18 @@ TEST_F(ConverterTest, IntegrationWithUnicodeRewriter) { } } +TEST_F(ConverterTest, IntegrationWithSmallLetterRewriter) { + std::unique_ptr engine = + MockDataEngineFactory::Create().value(); + ConverterInterface *converter = engine->GetConverter(); + + { + Segments segments; + const ConversionRequest convreq = + ConversionRequestBuilder().SetKey("^123").Build(); + ASSERT_TRUE(converter->StartConversion(convreq, &segments)); + EXPECT_EQ(segments.conversion_segments_size(), 1); + EXPECT_TRUE(FindCandidateByValue("¹²³", segments.conversion_segment(0))); + } +} } // namespace mozc diff --git a/src/rewriter/BUILD.bazel b/src/rewriter/BUILD.bazel index 17dd78689..7a9e81459 100644 --- a/src/rewriter/BUILD.bazel +++ b/src/rewriter/BUILD.bazel @@ -356,6 +356,7 @@ mozc_cc_test( size = "small", srcs = ["small_letter_rewriter_test.cc"], deps = [ + ":rewriter_interface", ":small_letter_rewriter", "//base/strings:assign", "//converter:segments", diff --git a/src/rewriter/rewriter.cc b/src/rewriter/rewriter.cc index deb565b81..417e9f3ac 100644 --- a/src/rewriter/rewriter.cc +++ b/src/rewriter/rewriter.cc @@ -109,7 +109,7 @@ Rewriter::Rewriter(const engine::Modules &modules, AddRewriter(std::make_unique(pos_matcher)); AddRewriter(std::make_unique(pos_matcher)); AddRewriter(std::make_unique()); - AddRewriter(std::make_unique(&parent_converter)); + AddRewriter(std::make_unique()); if (absl::GetFlag(FLAGS_use_history_rewriter)) { AddRewriter( diff --git a/src/rewriter/small_letter_rewriter.cc b/src/rewriter/small_letter_rewriter.cc index 6ad4a5c4e..a607bb851 100644 --- a/src/rewriter/small_letter_rewriter.cc +++ b/src/rewriter/small_letter_rewriter.cc @@ -29,7 +29,10 @@ #include "rewriter/small_letter_rewriter.h" +#include #include +#include +#include #include #include @@ -39,7 +42,6 @@ #include "absl/strings/str_cat.h" #include "absl/strings/string_view.h" #include "base/util.h" -#include "converter/converter_interface.h" #include "converter/segments.h" #include "protocol/commands.pb.h" #include "request/conversion_request.h" @@ -194,30 +196,6 @@ bool ConvertExpressions(const absl::string_view input, std::string *value) { return input != *value; } -// Resizes the segment size if not previously modified. Returns true if the -// segment size is 1 after resize. -bool EnsureSingleSegment(const ConversionRequest &request, Segments *segments, - const ConverterInterface *parent_converter, - const absl::string_view key) { - if (segments->conversion_segments_size() == 1) { - return true; - } - - if (segments->resized()) { - // The given segments are resized by user so don't modify anymore. - return false; - } - - const uint32_t resize_len = - Util::CharsLen(key) - - Util::CharsLen(segments->conversion_segment(0).key()); - if (!parent_converter->ResizeSegment(segments, request, 0, resize_len)) { - return false; - } - DCHECK_EQ(1, segments->conversion_segments_size()); - return true; -} - void AddCandidate(std::string key, std::string description, std::string value, int index, Segment *segment) { DCHECK(segment); @@ -238,13 +216,18 @@ void AddCandidate(std::string key, std::string description, std::string value, Segment::Candidate::NO_VARIANTS_EXPANSION); } -} // namespace +std::optional GetValue(absl::string_view key) { + std::string value; + if (!ConvertExpressions(key, &value)) { + return std::nullopt; + } -SmallLetterRewriter::SmallLetterRewriter( - const ConverterInterface *parent_converter) - : parent_converter_(parent_converter) { - DCHECK(parent_converter_); + if (value.empty()) { + return std::nullopt; + } + return value; } +} // namespace int SmallLetterRewriter::capability(const ConversionRequest &request) const { if (request.request().mixed_conversion()) { @@ -253,23 +236,41 @@ int SmallLetterRewriter::capability(const ConversionRequest &request) const { return RewriterInterface::CONVERSION; } -bool SmallLetterRewriter::Rewrite(const ConversionRequest &request, - Segments *segments) const { - std::string key; - for (const Segment &segment : segments->conversion_segments()) { - key += segment.key(); +std::optional +SmallLetterRewriter::CheckResizeSegmentsRequest( + const ConversionRequest &request, const Segments &segments) const { + if (segments.resized() || segments.conversion_segments_size() <= 1) { + return std::nullopt; } - std::string value; - if (!ConvertExpressions(key, &value)) { - return false; + absl::string_view key = request.key(); + const size_t key_len = Util::CharsLen(key); + if (key_len > std::numeric_limits::max()) { + return std::nullopt; } + const uint8_t segment_size = static_cast(key_len); - if (value.empty()) { + std::optional value = GetValue(key); + if (!value.has_value()) { + return std::nullopt; + } + + ResizeSegmentsRequest resize_request = { + .segment_index = 0, + .segment_sizes = {segment_size, 0, 0, 0, 0, 0, 0, 0}, + }; + return resize_request; +} + +bool SmallLetterRewriter::Rewrite(const ConversionRequest &request, + Segments *segments) const { + if (segments->conversion_segments_size() != 1) { return false; } - if (!EnsureSingleSegment(request, segments, parent_converter_, key)) { + absl::string_view key = request.key(); + std::optional value = GetValue(key); + if (!value.has_value()) { return false; } @@ -277,7 +278,8 @@ bool SmallLetterRewriter::Rewrite(const ConversionRequest &request, // Candidates from this function should not be on high position. -1 will // overwritten with the last index of candidates. - AddCandidate(std::move(key), "上下付き文字", std::move(value), -1, segment); + AddCandidate(std::string(key), "上下付き文字", std::move(value.value()), -1, + segment); return true; } diff --git a/src/rewriter/small_letter_rewriter.h b/src/rewriter/small_letter_rewriter.h index 8b6159233..edc4732f6 100644 --- a/src/rewriter/small_letter_rewriter.h +++ b/src/rewriter/small_letter_rewriter.h @@ -30,7 +30,7 @@ #ifndef MOZC_REWRITER_SMALL_LETTER_REWRITER_H_ #define MOZC_REWRITER_SMALL_LETTER_REWRITER_H_ -#include "converter/converter_interface.h" +#include #include "converter/segments.h" #include "request/conversion_request.h" #include "rewriter/rewriter_interface.h" @@ -40,17 +40,14 @@ namespace mozc { // A rewriter which converts text to superscripts and subscripts. class SmallLetterRewriter : public RewriterInterface { public: - explicit SmallLetterRewriter(const ConverterInterface *parent_converter); - SmallLetterRewriter(const SmallLetterRewriter &) = default; - SmallLetterRewriter &operator=(const SmallLetterRewriter &) = default; - int capability(const ConversionRequest &request) const override; + std::optional CheckResizeSegmentsRequest( + const ConversionRequest &request, + const Segments &segments) const override; + bool Rewrite(const ConversionRequest &request, Segments *segments) const override; - - private: - const ConverterInterface *parent_converter_; }; } // namespace mozc #endif // MOZC_REWRITER_SMALL_LETTER_REWRITER_H_ diff --git a/src/rewriter/small_letter_rewriter_test.cc b/src/rewriter/small_letter_rewriter_test.cc index 39767b0ed..0ca63bd18 100644 --- a/src/rewriter/small_letter_rewriter_test.cc +++ b/src/rewriter/small_letter_rewriter_test.cc @@ -29,8 +29,10 @@ #include "rewriter/small_letter_rewriter.h" +#include #include #include +#include #include "absl/strings/string_view.h" #include "base/strings/assign.h" @@ -40,6 +42,7 @@ #include "protocol/commands.pb.h" #include "protocol/config.pb.h" #include "request/conversion_request.h" +#include "rewriter/rewriter_interface.h" #include "testing/gunit.h" #include "testing/mozctest.h" @@ -88,8 +91,7 @@ class SmallLetterRewriterTest : public testing::TestWithTempUserProfile { TEST_F(SmallLetterRewriterTest, ScriptConversionTest) { Segments segments; - SmallLetterRewriter rewriter(engine_->GetConverter()); - const ConversionRequest request; + SmallLetterRewriter rewriter; struct InputOutputData { absl::string_view input; @@ -112,7 +114,7 @@ TEST_F(SmallLetterRewriterTest, ScriptConversionTest) { // Math Formula {"x^2+y^2=z^2", "x²+y²=z²"}, - // Chemical Forumula + // Chemical Formula {"Na_2CO_3", "Na₂CO₃"}, {"C_6H_12O_6", "C₆H₁₂O₆"}, {"(NH_4)_2CO_3", "(NH₄)₂CO₃"}, @@ -156,6 +158,11 @@ TEST_F(SmallLetterRewriterTest, ScriptConversionTest) { // Test behavior for each test cases in kInputOutputData. for (const InputOutputData &item : kInputOutputData) { InitSegments(item.input, item.input, &segments); + const ConversionRequest request = + ConversionRequestBuilder().SetKey(item.input).Build(); + std::optional resize_request = + rewriter.CheckResizeSegmentsRequest(request, segments); + EXPECT_FALSE(resize_request.has_value()); EXPECT_TRUE(rewriter.Rewrite(request, &segments)); EXPECT_TRUE(ContainCandidate(segments, item.output)); } @@ -163,51 +170,73 @@ TEST_F(SmallLetterRewriterTest, ScriptConversionTest) { // Mozc does not accept some superscript/subscript supported in Unicode for (const absl::string_view &item : kMozcUnsupportedInput) { InitSegments(item, item, &segments); + const ConversionRequest request = + ConversionRequestBuilder().SetKey(item).Build(); + std::optional resize_request = + rewriter.CheckResizeSegmentsRequest(request, segments); + EXPECT_FALSE(resize_request.has_value()); EXPECT_FALSE(rewriter.Rewrite(request, &segments)); } - // Invalid style input - InitSegments("^", "^", &segments); - EXPECT_FALSE(rewriter.Rewrite(request, &segments)); - - InitSegments("_", "_", &segments); - EXPECT_FALSE(rewriter.Rewrite(request, &segments)); - - InitSegments("12345", "12345", &segments); - EXPECT_FALSE(rewriter.Rewrite(request, &segments)); - - InitSegments("^^12345", "^^12345", &segments); - EXPECT_FALSE(rewriter.Rewrite(request, &segments)); + constexpr std::array kInvalidInput = {"^", "_", "12345", + "^^12345"}; + for (absl::string_view invalid_input : kInvalidInput) { + InitSegments("^", "^", &segments); + const ConversionRequest request = + ConversionRequestBuilder().SetKey(invalid_input).Build(); + std::optional resize_request = + rewriter.CheckResizeSegmentsRequest(request, segments); + EXPECT_FALSE(resize_request.has_value()); + EXPECT_FALSE(rewriter.Rewrite(request, &segments)); + } } TEST_F(SmallLetterRewriterTest, MultipleSegment) { Segments segments; - SmallLetterRewriter rewriter(engine_->GetConverter()); + SmallLetterRewriter rewriter; const ConversionRequest request; - // Multiple segments are combined. - InitSegments("^123", "^123", &segments); - AddSegment("45", "45", &segments); - AddSegment("6", "6", &segments); - EXPECT_TRUE(rewriter.Rewrite(request, &segments)); - EXPECT_EQ(segments.conversion_segments_size(), 1); - EXPECT_EQ(segments.conversion_segment(0).candidate(2).value, "¹²³⁴⁵⁶"); - - // If the segments is already resized, returns false. - InitSegments("^123", "^123", &segments); - AddSegment("^123", "^123", &segments); - segments.set_resized(true); - EXPECT_FALSE(rewriter.Rewrite(request, &segments)); - - // History segment has to be ignored. - // In this case 1st segment is HISTORY - // so this rewriting returns true. - InitSegments("^123", "^123", &segments); - AddSegment("^123", "^123", &segments); - segments.set_resized(true); - segments.mutable_segment(0)->set_segment_type(Segment::HISTORY); - EXPECT_TRUE(rewriter.Rewrite(request, &segments)); - EXPECT_EQ(segments.conversion_segment(0).candidate(1).value, "¹²³"); + { + // Multiple segments are combined. + InitSegments("^123", "^123", &segments); + AddSegment("45", "45", &segments); + AddSegment("6", "6", &segments); + const ConversionRequest request = + ConversionRequestBuilder().SetKey("^123456").Build(); + std::optional resize_request = + rewriter.CheckResizeSegmentsRequest(request, segments); + ASSERT_TRUE(resize_request.has_value()); + EXPECT_EQ(resize_request->segment_index, 0); + EXPECT_EQ(resize_request->segment_sizes[0], 7); + EXPECT_EQ(resize_request->segment_sizes[1], 0); + } + { + // If the segments is already resized, returns false. + InitSegments("^123", "^123", &segments); + AddSegment("^123", "^123", &segments); + segments.set_resized(true); + const ConversionRequest request = + ConversionRequestBuilder().SetKey("^123").Build(); + std::optional resize_request = + rewriter.CheckResizeSegmentsRequest(request, segments); + EXPECT_FALSE(resize_request.has_value()); + } + { + // History segment has to be ignored. + // In this case 1st segment is HISTORY + // so this rewriting returns true. + InitSegments("^123", "^123", &segments); + AddSegment("^123", "^123", &segments); + segments.set_resized(true); + segments.mutable_segment(0)->set_segment_type(Segment::HISTORY); + const ConversionRequest request = + ConversionRequestBuilder().SetKey("^123").Build(); + std::optional resize_request = + rewriter.CheckResizeSegmentsRequest(request, segments); + EXPECT_FALSE(resize_request.has_value()); + EXPECT_TRUE(rewriter.Rewrite(request, &segments)); + EXPECT_EQ(segments.conversion_segment(0).candidate(1).value, "¹²³"); + } } } // namespace