Skip to content

Commit

Permalink
Implement CheckResizeSegmentsRequest for SmallLetterRewriter.
Browse files Browse the repository at this point in the history
#codehealth

PiperOrigin-RevId: 712808009
  • Loading branch information
hiroyuki-komatsu committed Jan 7, 2025
1 parent 91729ed commit a080156
Show file tree
Hide file tree
Showing 6 changed files with 132 additions and 89 deletions.
14 changes: 14 additions & 0 deletions src/converter/converter_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2369,4 +2369,18 @@ TEST_F(ConverterTest, IntegrationWithUnicodeRewriter) {
}
}

TEST_F(ConverterTest, IntegrationWithSmallLetterRewriter) {
std::unique_ptr<EngineInterface> engine =
MockDataEngineFactory::Create().value();
ConverterInterface *converter = engine->GetConverter();

{
Segments segments;
const ConversionRequest convreq =
ConversionRequestBuilder().SetKey("^123").Build();
ASSERT_TRUE(converter->StartConversion(convreq, &segments));
EXPECT_EQ(segments.conversion_segments_size(), 1);
EXPECT_TRUE(FindCandidateByValue("¹²³", segments.conversion_segment(0)));
}
}
} // namespace mozc
1 change: 1 addition & 0 deletions src/rewriter/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,7 @@ mozc_cc_test(
size = "small",
srcs = ["small_letter_rewriter_test.cc"],
deps = [
":rewriter_interface",
":small_letter_rewriter",
"//base/strings:assign",
"//converter:segments",
Expand Down
2 changes: 1 addition & 1 deletion src/rewriter/rewriter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ Rewriter::Rewriter(const engine::Modules &modules,
AddRewriter(std::make_unique<VariantsRewriter>(pos_matcher));
AddRewriter(std::make_unique<ZipcodeRewriter>(pos_matcher));
AddRewriter(std::make_unique<DiceRewriter>());
AddRewriter(std::make_unique<SmallLetterRewriter>(&parent_converter));
AddRewriter(std::make_unique<SmallLetterRewriter>());

if (absl::GetFlag(FLAGS_use_history_rewriter)) {
AddRewriter(
Expand Down
84 changes: 43 additions & 41 deletions src/rewriter/small_letter_rewriter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,10 @@

#include "rewriter/small_letter_rewriter.h"

#include <cstddef>
#include <cstdint>
#include <limits>
#include <optional>
#include <string>
#include <utility>

Expand All @@ -39,7 +42,6 @@
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "base/util.h"
#include "converter/converter_interface.h"
#include "converter/segments.h"
#include "protocol/commands.pb.h"
#include "request/conversion_request.h"
Expand Down Expand Up @@ -194,30 +196,6 @@ bool ConvertExpressions(const absl::string_view input, std::string *value) {
return input != *value;
}

// Resizes the segment size if not previously modified. Returns true if the
// segment size is 1 after resize.
bool EnsureSingleSegment(const ConversionRequest &request, Segments *segments,
const ConverterInterface *parent_converter,
const absl::string_view key) {
if (segments->conversion_segments_size() == 1) {
return true;
}

if (segments->resized()) {
// The given segments are resized by user so don't modify anymore.
return false;
}

const uint32_t resize_len =
Util::CharsLen(key) -
Util::CharsLen(segments->conversion_segment(0).key());
if (!parent_converter->ResizeSegment(segments, request, 0, resize_len)) {
return false;
}
DCHECK_EQ(1, segments->conversion_segments_size());
return true;
}

void AddCandidate(std::string key, std::string description, std::string value,
int index, Segment *segment) {
DCHECK(segment);
Expand All @@ -238,13 +216,18 @@ void AddCandidate(std::string key, std::string description, std::string value,
Segment::Candidate::NO_VARIANTS_EXPANSION);
}

} // namespace
std::optional<std::string> GetValue(absl::string_view key) {
std::string value;
if (!ConvertExpressions(key, &value)) {
return std::nullopt;
}

SmallLetterRewriter::SmallLetterRewriter(
const ConverterInterface *parent_converter)
: parent_converter_(parent_converter) {
DCHECK(parent_converter_);
if (value.empty()) {
return std::nullopt;
}
return value;
}
} // namespace

int SmallLetterRewriter::capability(const ConversionRequest &request) const {
if (request.request().mixed_conversion()) {
Expand All @@ -253,31 +236,50 @@ int SmallLetterRewriter::capability(const ConversionRequest &request) const {
return RewriterInterface::CONVERSION;
}

bool SmallLetterRewriter::Rewrite(const ConversionRequest &request,
Segments *segments) const {
std::string key;
for (const Segment &segment : segments->conversion_segments()) {
key += segment.key();
std::optional<RewriterInterface::ResizeSegmentsRequest>
SmallLetterRewriter::CheckResizeSegmentsRequest(
const ConversionRequest &request, const Segments &segments) const {
if (segments.resized() || segments.conversion_segments_size() <= 1) {
return std::nullopt;
}

std::string value;
if (!ConvertExpressions(key, &value)) {
return false;
absl::string_view key = request.key();
const size_t key_len = Util::CharsLen(key);
if (key_len > std::numeric_limits<uint8_t>::max()) {
return std::nullopt;
}
const uint8_t segment_size = static_cast<uint8_t>(key_len);

if (value.empty()) {
std::optional<std::string> value = GetValue(key);
if (!value.has_value()) {
return std::nullopt;
}

ResizeSegmentsRequest resize_request = {
.segment_index = 0,
.segment_sizes = {segment_size, 0, 0, 0, 0, 0, 0, 0},
};
return resize_request;
}

bool SmallLetterRewriter::Rewrite(const ConversionRequest &request,
Segments *segments) const {
if (segments->conversion_segments_size() != 1) {
return false;
}

if (!EnsureSingleSegment(request, segments, parent_converter_, key)) {
absl::string_view key = request.key();
std::optional<std::string> value = GetValue(key);
if (!value.has_value()) {
return false;
}

Segment *segment = segments->mutable_conversion_segment(0);

// Candidates from this function should not be on high position. -1 will
// overwritten with the last index of candidates.
AddCandidate(std::move(key), "上下付き文字", std::move(value), -1, segment);
AddCandidate(std::string(key), "上下付き文字", std::move(value.value()), -1,
segment);
return true;
}

Expand Down
13 changes: 5 additions & 8 deletions src/rewriter/small_letter_rewriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
#ifndef MOZC_REWRITER_SMALL_LETTER_REWRITER_H_
#define MOZC_REWRITER_SMALL_LETTER_REWRITER_H_

#include "converter/converter_interface.h"
#include <optional>
#include "converter/segments.h"
#include "request/conversion_request.h"
#include "rewriter/rewriter_interface.h"
Expand All @@ -40,17 +40,14 @@ namespace mozc {
// A rewriter which converts text to superscripts and subscripts.
class SmallLetterRewriter : public RewriterInterface {
public:
explicit SmallLetterRewriter(const ConverterInterface *parent_converter);
SmallLetterRewriter(const SmallLetterRewriter &) = default;
SmallLetterRewriter &operator=(const SmallLetterRewriter &) = default;

int capability(const ConversionRequest &request) const override;

std::optional<ResizeSegmentsRequest> CheckResizeSegmentsRequest(
const ConversionRequest &request,
const Segments &segments) const override;

bool Rewrite(const ConversionRequest &request,
Segments *segments) const override;

private:
const ConverterInterface *parent_converter_;
};
} // namespace mozc
#endif // MOZC_REWRITER_SMALL_LETTER_REWRITER_H_
107 changes: 68 additions & 39 deletions src/rewriter/small_letter_rewriter_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,10 @@

#include "rewriter/small_letter_rewriter.h"

#include <array>
#include <cstddef>
#include <memory>
#include <optional>

#include "absl/strings/string_view.h"
#include "base/strings/assign.h"
Expand All @@ -40,6 +42,7 @@
#include "protocol/commands.pb.h"
#include "protocol/config.pb.h"
#include "request/conversion_request.h"
#include "rewriter/rewriter_interface.h"
#include "testing/gunit.h"
#include "testing/mozctest.h"

Expand Down Expand Up @@ -88,8 +91,7 @@ class SmallLetterRewriterTest : public testing::TestWithTempUserProfile {

TEST_F(SmallLetterRewriterTest, ScriptConversionTest) {
Segments segments;
SmallLetterRewriter rewriter(engine_->GetConverter());
const ConversionRequest request;
SmallLetterRewriter rewriter;

struct InputOutputData {
absl::string_view input;
Expand All @@ -112,7 +114,7 @@ TEST_F(SmallLetterRewriterTest, ScriptConversionTest) {
// Math Formula
{"x^2+y^2=z^2", "x²+y²=z²"},

// Chemical Forumula
// Chemical Formula
{"Na_2CO_3", "Na₂CO₃"},
{"C_6H_12O_6", "C₆H₁₂O₆"},
{"(NH_4)_2CO_3", "(NH₄)₂CO₃"},
Expand Down Expand Up @@ -156,58 +158,85 @@ TEST_F(SmallLetterRewriterTest, ScriptConversionTest) {
// Test behavior for each test cases in kInputOutputData.
for (const InputOutputData &item : kInputOutputData) {
InitSegments(item.input, item.input, &segments);
const ConversionRequest request =
ConversionRequestBuilder().SetKey(item.input).Build();
std::optional<RewriterInterface::ResizeSegmentsRequest> resize_request =
rewriter.CheckResizeSegmentsRequest(request, segments);
EXPECT_FALSE(resize_request.has_value());
EXPECT_TRUE(rewriter.Rewrite(request, &segments));
EXPECT_TRUE(ContainCandidate(segments, item.output));
}

// Mozc does not accept some superscript/subscript supported in Unicode
for (const absl::string_view &item : kMozcUnsupportedInput) {
InitSegments(item, item, &segments);
const ConversionRequest request =
ConversionRequestBuilder().SetKey(item).Build();
std::optional<RewriterInterface::ResizeSegmentsRequest> resize_request =
rewriter.CheckResizeSegmentsRequest(request, segments);
EXPECT_FALSE(resize_request.has_value());
EXPECT_FALSE(rewriter.Rewrite(request, &segments));
}

// Invalid style input
InitSegments("^", "^", &segments);
EXPECT_FALSE(rewriter.Rewrite(request, &segments));

InitSegments("_", "_", &segments);
EXPECT_FALSE(rewriter.Rewrite(request, &segments));

InitSegments("12345", "12345", &segments);
EXPECT_FALSE(rewriter.Rewrite(request, &segments));

InitSegments("^^12345", "^^12345", &segments);
EXPECT_FALSE(rewriter.Rewrite(request, &segments));
constexpr std::array<absl::string_view, 4> kInvalidInput = {"^", "_", "12345",
"^^12345"};
for (absl::string_view invalid_input : kInvalidInput) {
InitSegments("^", "^", &segments);
const ConversionRequest request =
ConversionRequestBuilder().SetKey(invalid_input).Build();
std::optional<RewriterInterface::ResizeSegmentsRequest> resize_request =
rewriter.CheckResizeSegmentsRequest(request, segments);
EXPECT_FALSE(resize_request.has_value());
EXPECT_FALSE(rewriter.Rewrite(request, &segments));
}
}

TEST_F(SmallLetterRewriterTest, MultipleSegment) {
Segments segments;
SmallLetterRewriter rewriter(engine_->GetConverter());
SmallLetterRewriter rewriter;
const ConversionRequest request;

// Multiple segments are combined.
InitSegments("^123", "^123", &segments);
AddSegment("45", "45", &segments);
AddSegment("6", "6", &segments);
EXPECT_TRUE(rewriter.Rewrite(request, &segments));
EXPECT_EQ(segments.conversion_segments_size(), 1);
EXPECT_EQ(segments.conversion_segment(0).candidate(2).value, "¹²³⁴⁵⁶");

// If the segments is already resized, returns false.
InitSegments("^123", "^123", &segments);
AddSegment("^123", "^123", &segments);
segments.set_resized(true);
EXPECT_FALSE(rewriter.Rewrite(request, &segments));

// History segment has to be ignored.
// In this case 1st segment is HISTORY
// so this rewriting returns true.
InitSegments("^123", "^123", &segments);
AddSegment("^123", "^123", &segments);
segments.set_resized(true);
segments.mutable_segment(0)->set_segment_type(Segment::HISTORY);
EXPECT_TRUE(rewriter.Rewrite(request, &segments));
EXPECT_EQ(segments.conversion_segment(0).candidate(1).value, "¹²³");
{
// Multiple segments are combined.
InitSegments("^123", "^123", &segments);
AddSegment("45", "45", &segments);
AddSegment("6", "6", &segments);
const ConversionRequest request =
ConversionRequestBuilder().SetKey("^123456").Build();
std::optional<RewriterInterface::ResizeSegmentsRequest> resize_request =
rewriter.CheckResizeSegmentsRequest(request, segments);
ASSERT_TRUE(resize_request.has_value());
EXPECT_EQ(resize_request->segment_index, 0);
EXPECT_EQ(resize_request->segment_sizes[0], 7);
EXPECT_EQ(resize_request->segment_sizes[1], 0);
}
{
// If the segments is already resized, returns false.
InitSegments("^123", "^123", &segments);
AddSegment("^123", "^123", &segments);
segments.set_resized(true);
const ConversionRequest request =
ConversionRequestBuilder().SetKey("^123").Build();
std::optional<RewriterInterface::ResizeSegmentsRequest> resize_request =
rewriter.CheckResizeSegmentsRequest(request, segments);
EXPECT_FALSE(resize_request.has_value());
}
{
// History segment has to be ignored.
// In this case 1st segment is HISTORY
// so this rewriting returns true.
InitSegments("^123", "^123", &segments);
AddSegment("^123", "^123", &segments);
segments.set_resized(true);
segments.mutable_segment(0)->set_segment_type(Segment::HISTORY);
const ConversionRequest request =
ConversionRequestBuilder().SetKey("^123").Build();
std::optional<RewriterInterface::ResizeSegmentsRequest> resize_request =
rewriter.CheckResizeSegmentsRequest(request, segments);
EXPECT_FALSE(resize_request.has_value());
EXPECT_TRUE(rewriter.Rewrite(request, &segments));
EXPECT_EQ(segments.conversion_segment(0).candidate(1).value, "¹²³");
}
}

} // namespace
Expand Down

0 comments on commit a080156

Please sign in to comment.