Skip to content

Commit

Permalink
Implement CheckResizeSegmentsRequest for UserBoundaryHistoryRewriter.
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 712809555
  • Loading branch information
hiroyuki-komatsu committed Jan 7, 2025
1 parent a080156 commit 9e51d33
Show file tree
Hide file tree
Showing 5 changed files with 146 additions and 154 deletions.
2 changes: 1 addition & 1 deletion src/rewriter/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -202,11 +202,11 @@ mozc_cc_test(
size = "small",
srcs = ["user_boundary_history_rewriter_test.cc"],
deps = [
":rewriter_interface",
":user_boundary_history_rewriter",
"//base:file_util",
"//base:system_util",
"//config:config_handler",
"//converter:converter_mock",
"//converter:segments",
"//protocol:config_cc_proto",
"//request:conversion_request",
Expand Down
3 changes: 1 addition & 2 deletions src/rewriter/rewriter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,7 @@ Rewriter::Rewriter(const engine::Modules &modules,
AddRewriter(std::make_unique<SmallLetterRewriter>());

if (absl::GetFlag(FLAGS_use_history_rewriter)) {
AddRewriter(
std::make_unique<UserBoundaryHistoryRewriter>(&parent_converter));
AddRewriter(std::make_unique<UserBoundaryHistoryRewriter>());
AddRewriter(
std::make_unique<UserSegmentHistoryRewriter>(&pos_matcher, pos_group));
}
Expand Down
110 changes: 50 additions & 60 deletions src/rewriter/user_boundary_history_rewriter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,10 @@
#include "base/file_util.h"
#include "base/util.h"
#include "base/vlog.h"
#include "converter/converter_interface.h"
#include "converter/segments.h"
#include "protocol/config.pb.h"
#include "request/conversion_request.h"
#include "rewriter/rewriter_interface.h"
#include "storage/lru_storage.h"
#include "usage_stats/usage_stats.h"

Expand Down Expand Up @@ -166,10 +166,7 @@ class SegmentsKey {

} // namespace

UserBoundaryHistoryRewriter::UserBoundaryHistoryRewriter(
const ConverterInterface *parent_converter)
: parent_converter_(parent_converter) {
DCHECK(parent_converter_);
UserBoundaryHistoryRewriter::UserBoundaryHistoryRewriter() {
Reload();
}

Expand Down Expand Up @@ -213,78 +210,45 @@ void UserBoundaryHistoryRewriter::Finish(const ConversionRequest &request,
}
}

bool UserBoundaryHistoryRewriter::Rewrite(const ConversionRequest &request,
Segments *segments) const {
std::optional<RewriterInterface::ResizeSegmentsRequest>
UserBoundaryHistoryRewriter::CheckResizeSegmentsRequest(
const ConversionRequest &request, const Segments &segments) const {
if (segments.resized()) {
return std::nullopt;
}

if (request.config().incognito_mode()) {
MOZC_VLOG(2) << "incognito mode";
return false;
return std::nullopt;
}

if (request.config().history_learning_level() == config::Config::NO_HISTORY) {
MOZC_VLOG(2) << "history_learning_level is NO_HISTORY";
return false;
return std::nullopt;
}

if (!request.enable_user_history_for_conversion()) {
MOZC_VLOG(2) << "user history for conversion is disabled";
return false;
return std::nullopt;
}

if (request.skip_slow_rewriters()) {
return false;
}

if (!segments->resized()) {
return Resize(request, *segments);
}

return false;
}

bool UserBoundaryHistoryRewriter::Sync() {
storage_.DeleteElementsUntouchedFor62Days();
return true;
}

bool UserBoundaryHistoryRewriter::Reload() {
const std::string filename = ConfigFileStream::GetFileName(kFileName);
if (!storage_.OpenOrCreate(filename.c_str(), kValueSize, kLruSize,
kSeedValue)) {
LOG(WARNING) << "cannot initialize UserBoundaryHistoryRewriter";
storage_.Clear();
return false;
}

constexpr absl::string_view kFileSuffix = ".merge_pending";
const std::string merge_pending_file = absl::StrCat(filename, kFileSuffix);

// merge pending file does not always exist.
if (absl::Status s = FileUtil::FileExists(merge_pending_file); s.ok()) {
storage_.Merge(merge_pending_file.c_str());
FileUtil::UnlinkOrLogError(merge_pending_file);
} else if (!absl::IsNotFound(s)) {
LOG(ERROR) << "Cannot check if " << merge_pending_file << " exists: " << s;
return std::nullopt;
}

return true;
}

bool UserBoundaryHistoryRewriter::Resize(
const ConversionRequest &request, Segments &segments) const {
const size_t target_segments_size = segments.conversion_segments_size();

// No effective segments found
if (target_segments_size == 0) {
return false;
return std::nullopt;
}

std::optional<const SegmentsKey> segments_key = SegmentsKey::Create(segments);
if (!segments_key) {
MOZC_VLOG(2) << "too long segment";
return false;
return std::nullopt;
}

bool result = false;
for (size_t seg_idx = 0; seg_idx < target_segments_size; ++seg_idx) {
constexpr int kMaxKeysSize = 5;
const int keys_size =
Expand Down Expand Up @@ -312,18 +276,44 @@ bool UserBoundaryHistoryRewriter::Resize(
MOZC_VLOG(2) << "ResizeSegment key: " << key << " segments: [" << seg_idx
<< ", " << seg_size << "] "
<< "resize: [" << absl::StrJoin(updated_array, " ") << "]";
if (parent_converter_->ResizeSegments(&segments, request, seg_idx,
updated_array)) {
result = true;
} else {
LOG(WARNING) << "ResizeSegment failed for key: " << key;
}
seg_idx += seg_size - 1; // -1 as the main loop will add +1.
break;

const ResizeSegmentsRequest resize_request = {
.segment_index = seg_idx,
.segment_sizes = std::move(updated_array),
};
return resize_request;
}
}

return result;
return std::nullopt;
}

bool UserBoundaryHistoryRewriter::Sync() {
storage_.DeleteElementsUntouchedFor62Days();
return true;
}

bool UserBoundaryHistoryRewriter::Reload() {
const std::string filename = ConfigFileStream::GetFileName(kFileName);
if (!storage_.OpenOrCreate(filename.c_str(), kValueSize, kLruSize,
kSeedValue)) {
LOG(WARNING) << "cannot initialize UserBoundaryHistoryRewriter";
storage_.Clear();
return false;
}

constexpr absl::string_view kFileSuffix = ".merge_pending";
const std::string merge_pending_file = absl::StrCat(filename, kFileSuffix);

// merge pending file does not always exist.
if (absl::Status s = FileUtil::FileExists(merge_pending_file); s.ok()) {
storage_.Merge(merge_pending_file.c_str());
FileUtil::UnlinkOrLogError(merge_pending_file);
} else if (!absl::IsNotFound(s)) {
LOG(ERROR) << "Cannot check if " << merge_pending_file << " exists: " << s;
}

return true;
}

bool UserBoundaryHistoryRewriter::Insert(const ConversionRequest &request,
Expand Down
12 changes: 6 additions & 6 deletions src/rewriter/user_boundary_history_rewriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
#ifndef MOZC_REWRITER_USER_BOUNDARY_HISTORY_REWRITER_H_
#define MOZC_REWRITER_USER_BOUNDARY_HISTORY_REWRITER_H_

#include "converter/converter_interface.h"
#include <optional>
#include "converter/segments.h"
#include "request/conversion_request.h"
#include "rewriter/rewriter_interface.h"
Expand All @@ -40,22 +40,22 @@ namespace mozc {

class UserBoundaryHistoryRewriter : public RewriterInterface {
public:
explicit UserBoundaryHistoryRewriter(
const ConverterInterface *parent_converter);
UserBoundaryHistoryRewriter();

std::optional<ResizeSegmentsRequest> CheckResizeSegmentsRequest(
const ConversionRequest &request,
const Segments &segments) const override;
bool Rewrite(const ConversionRequest &request,
Segments *segments) const override;
Segments *segments) const override { return false; }

void Finish(const ConversionRequest &request, Segments *segments) override;
bool Sync() override;
bool Reload() override;
void Clear() override;

private:
bool Resize(const ConversionRequest &request, Segments &segments) const;
bool Insert(const ConversionRequest &request, Segments &segments);

const ConverterInterface *parent_converter_;
storage::LruStorage storage_;
};

Expand Down
Loading

0 comments on commit 9e51d33

Please sign in to comment.