diff --git a/docs/tglogutil-compaction-man.md b/docs/tglogutil-compaction-man.md new file mode 100644 index 00000000..9b39c4ac --- /dev/null +++ b/docs/tglogutil-compaction-man.md @@ -0,0 +1,47 @@ +# `tglogutil-compaction` - トランザクションログを再編し、ディスク容量を削減する + +`tglogutil compaction` コマンドは Tsurugi トランザクションログを再編し、ディスク容量を削減する + +## SYNOPSIS + +``` +$ tglogutil compaction [options] +``` + +## DESCRIPTION + +`` で指定されたトランザクションログを再編する. +Specify the location set in the `log_location` parameter in the `[datastore]` section of the configuration file of Tsurugi server (`tsurugi.ini`). + +Options: +* `--thread-num=` + * Number (default `1`) of concurrent processing thread of reading log files +* `--epoch=` + * Upper limit epoch number to be accepted as valid data (default is the value recorded in the transaction log directory) +* `--force=` + * 実行開始前のプロンプトを出さない (default `false`) +* `--dry-run=` (**未実装**) + * トランザクションログファイルを変更しない. 作業ディレクトリに再編したデータを作成するが, その生成物を採用せず破棄する (default `false`) +* `--verbose=` + * verbose mode (default `false`) +* `--make-backup=` + * 作業ディレクトリ上で再編したデータを作成したあと, 元の dblogdir の内容のバックアップを残すかどうか. `false` の場合には元のディレクトリの内容は消去される (default `false`) + +## EXIT STATUS + +* 0: No errors + * Compaction process completed successfully +* 16: Error + * 一時ディレクトリの削除に失敗した (処理を継続) +* 64 or more: Unable to handle + * `dblogdir` does not exist + * `dblogdir` is inaccessible + * `dblogdir` has file format error + * Specified a directory that is not the transaction log directory + * Specified a transaction log directory of unsupported format version + * `epoch` file does not exist + * files in `dblogdir` are damaged + +## PRECAUTIONS FOR USE + +The compaction process involves rewriting data, so it is recommended to back up the entire directory before using this tool. diff --git a/docs/tglogutil_ja.md b/docs/tglogutil_ja.md index fdb695d1..ebcb945e 100644 --- a/docs/tglogutil_ja.md +++ b/docs/tglogutil_ja.md @@ -2,6 +2,8 @@ * 2024-04-10 ban * for 1.0.0-BETA4 +* 2024-06-21 ban + * for 1.0.0-BETA5, compaction サブコマンド ## この文書について @@ -26,6 +28,7 @@ ## 機能一覧 (サブコマンド) * repair: 永続化データディレクトリの内容を修復する. +* compaction: 永続化データディレクトリの内容を再編し冗長な情報を削除する. ### サブコマンド repair (自動修復) @@ -63,3 +66,7 @@ $ tglogutil repair [options] 使用上の注意 * データの書き換えを伴うため, ディレクトリのバックアップを取っておくことが望ましい. + +### サブコマンド compaction + + diff --git a/src/limestone/datastore_snapshot.cpp b/src/limestone/datastore_snapshot.cpp index 6d63765e..22f65af8 100644 --- a/src/limestone/datastore_snapshot.cpp +++ b/src/limestone/datastore_snapshot.cpp @@ -30,8 +30,7 @@ #include "log_entry.h" #include "sortdb_wrapper.h" -namespace limestone::api { -using namespace limestone::internal; +namespace limestone::internal { constexpr std::size_t write_version_size = sizeof(epoch_id_type) + sizeof(std::uint64_t); static_assert(write_version_size == 16); @@ -53,59 +52,57 @@ static int comp_twisted_key(const std::string_view& a, const std::string_view& b return std::memcmp(b.data(), a.data(), write_version_size); } -void datastore::create_snapshot() { - auto& from_dir = location_; +[[maybe_unused]] +static void insert_entry_or_update_to_max(sortdb_wrapper* sortdb, log_entry& e) { + bool need_write = true; + // skip older entry than already inserted + std::string value; + if (sortdb->get(e.key_sid(), &value)) { + write_version_type write_version; + e.write_version(write_version); + if (write_version < write_version_type(value.substr(1))) { + need_write = false; + } + } + if (need_write) { + std::string db_value; + db_value.append(1, static_cast(e.type())); + db_value.append(e.value_etc()); + sortdb->put(e.key_sid(), db_value); + } +} + +[[maybe_unused]] +static void insert_twisted_entry(sortdb_wrapper* sortdb, log_entry& e) { + // key_sid: storage_id[8] key[*], value_etc: epoch[8]LE minor_version[8]LE value[*], type: type[1] + // db_key: epoch[8]BE minor_version[8]BE storage_id[8] key[*], db_value: type[1] value[*] + std::string db_key(write_version_size + e.key_sid().size(), '\0'); + store_bswap64_value(&db_key[0], &e.value_etc()[0]); // NOLINT(readability-container-data-pointer) + store_bswap64_value(&db_key[8], &e.value_etc()[8]); + std::memcpy(&db_key[write_version_size], e.key_sid().data(), e.key_sid().size()); + std::string db_value(1, static_cast(e.type())); + db_value.append(e.value_etc().substr(write_version_size)); + sortdb->put(db_key, db_value); +} + +static std::pair> create_sortdb_from_wals(const boost::filesystem::path& from_dir, int num_worker) { #if defined SORT_METHOD_PUT_ONLY auto sortdb = std::make_unique(from_dir, comp_twisted_key); #else auto sortdb = std::make_unique(from_dir); #endif - dblog_scan logscan{location_}; + dblog_scan logscan{from_dir}; epoch_id_type ld_epoch = logscan.last_durable_epoch_in_dir(); - [[maybe_unused]] - auto insert_entry_or_update_to_max = [&sortdb](log_entry& e){ - bool need_write = true; - - // skip older entry than already inserted - std::string value; - if (sortdb->get(e.key_sid(), &value)) { - write_version_type write_version; - e.write_version(write_version); - if (write_version < write_version_type(value.substr(1))) { - need_write = false; - } - } - - if (need_write) { - std::string db_value; - db_value.append(1, static_cast(e.type())); - db_value.append(e.value_etc()); - sortdb->put(e.key_sid(), db_value); - } - }; - [[maybe_unused]] - auto insert_twisted_entry = [&sortdb](log_entry& e){ - // key_sid: storage_id[8] key[*], value_etc: epoch[8]LE minor_version[8]LE value[*], type: type[1] - // db_key: epoch[8]BE minor_version[8]BE storage_id[8] key[*], db_value: type[1] value[*] - std::string db_key(write_version_size + e.key_sid().size(), '\0'); - store_bswap64_value(&db_key[0], &e.value_etc()[0]); // NOLINT(readability-container-data-pointer) - store_bswap64_value(&db_key[8], &e.value_etc()[8]); - std::memcpy(&db_key[write_version_size], e.key_sid().data(), e.key_sid().size()); - std::string db_value(1, static_cast(e.type())); - db_value.append(e.value_etc().substr(write_version_size)); - sortdb->put(db_key, db_value); - }; #if defined SORT_METHOD_PUT_ONLY - auto add_entry = insert_twisted_entry; + auto add_entry = [&sortdb](log_entry& e){insert_twisted_entry(sortdb.get(), e);}; bool works_with_multi_thread = true; #else - auto add_entry = insert_entry_or_update_to_max; + auto add_entry = [&sortdb](log_entry& e){insert_entry_or_update_to_max(sortdb.get(), e);}; bool works_with_multi_thread = false; #endif - int num_worker = recover_max_parallelism_; if (!works_with_multi_thread && num_worker > 1) { LOG(INFO) << "/:limestone:config:datastore this sort method does not work correctly with multi-thread, so force the number of recover process thread = 1"; num_worker = 1; @@ -113,38 +110,20 @@ void datastore::create_snapshot() { logscan.set_thread_num(num_worker); try { epoch_id_type max_appeared_epoch = logscan.scan_pwal_files_throws(ld_epoch, add_entry); - epoch_id_switched_.store(max_appeared_epoch); - epoch_id_informed_.store(max_appeared_epoch); + return {max_appeared_epoch, std::move(sortdb)}; } catch (std::runtime_error& e) { VLOG_LP(log_info) << "failed to scan pwal files: " << e.what(); LOG(ERROR) << "/:limestone recover process failed. (cause: corruption detected in transaction log data directory), " << "see https://github.com/project-tsurugi/tsurugidb/blob/master/docs/troubleshooting-guide.md"; - LOG(ERROR) << "/:limestone dblogdir (transaction log directory): " << location_; + LOG(ERROR) << "/:limestone dblogdir (transaction log directory): " << from_dir; throw std::runtime_error("dblogdir is corrupted"); } +} - boost::filesystem::path sub_dir = location_ / boost::filesystem::path(std::string(snapshot::subdirectory_name_)); - boost::system::error_code error; - const bool result_check = boost::filesystem::exists(sub_dir, error); - if (!result_check || error) { - const bool result_mkdir = boost::filesystem::create_directory(sub_dir, error); - if (!result_mkdir || error) { - LOG_LP(ERROR) << "fail to create directory"; - throw std::runtime_error("I/O error"); - } - } - - boost::filesystem::path snapshot_file = sub_dir / boost::filesystem::path(std::string(snapshot::file_name_)); - VLOG_LP(log_info) << "generating snapshot file: " << snapshot_file; - FILE* ostrm = fopen(snapshot_file.c_str(), "w"); // NOLINT(*-owning-memory) - if (!ostrm) { - LOG_LP(ERROR) << "cannot create snapshot file (" << snapshot_file << ")"; - throw std::runtime_error("I/O error"); - } - setvbuf(ostrm, nullptr, _IOFBF, 128L * 1024L); // NOLINT, NB. glibc may ignore size when _IOFBF and buffer=NULL +static void sortdb_foreach(sortdb_wrapper *sortdb, std::function write_snapshot_entry) { static_assert(sizeof(log_entry::entry_type) == 1); #if defined SORT_METHOD_PUT_ONLY - sortdb->each([&ostrm, last_key = std::string{}](std::string_view db_key, std::string_view db_value) mutable { + sortdb->each([write_snapshot_entry, last_key = std::string{}](const std::string_view db_key, const std::string_view db_value) mutable { // using the first entry in GROUP BY (original-)key // NB: max versions comes first (by the custom-comparator) std::string_view key(db_key.data() + write_version_size, db_key.size() - write_version_size); @@ -160,7 +139,7 @@ void datastore::create_snapshot() { store_bswap64_value(&value[0], &db_key[0]); store_bswap64_value(&value[8], &db_key[8]); std::memcpy(&value[write_version_size], &db_value[1], db_value.size() - 1); - log_entry::write(ostrm, key, value); + write_snapshot_entry(key, value); break; } case log_entry::entry_type::remove_entry: @@ -171,12 +150,11 @@ void datastore::create_snapshot() { } }); #else - sortdb->each([&ostrm](std::string_view db_key, std::string_view db_value) { + sortdb->each([&write_snapshot_entry](const std::string_view db_key, const std::string_view db_value) { auto entry_type = static_cast(db_value[0]); - db_value.remove_prefix(1); switch (entry_type) { case log_entry::entry_type::normal_entry: - log_entry::write(ostrm, db_key, db_value); + write_snapshot_entry(db_key, db_value.substr(1)); break; case log_entry::entry_type::remove_entry: break; // skip @@ -186,6 +164,82 @@ void datastore::create_snapshot() { } }); #endif +} + +void create_comapct_pwal(const boost::filesystem::path& from_dir, boost::filesystem::path& to_dir, int num_worker) { + auto [max_appeared_epoch, sortdb] = create_sortdb_from_wals(from_dir, num_worker); + + boost::system::error_code error; + const bool result_check = boost::filesystem::exists(to_dir, error); + if (!result_check || error) { + const bool result_mkdir = boost::filesystem::create_directory(to_dir, error); + if (!result_mkdir || error) { + LOG_LP(ERROR) << "fail to create directory " << to_dir; + throw std::runtime_error("I/O error"); + } + } + + boost::filesystem::path snapshot_file = to_dir / boost::filesystem::path("pwal_0000.compacted"); + VLOG_LP(log_info) << "generating compacted pwal file: " << snapshot_file; + FILE* ostrm = fopen(snapshot_file.c_str(), "w"); // NOLINT(*-owning-memory) + if (!ostrm) { + LOG_LP(ERROR) << "cannot create snapshot file (" << snapshot_file << ")"; + throw std::runtime_error("I/O error"); + } + setvbuf(ostrm, nullptr, _IOFBF, 128L * 1024L); // NOLINT, NB. glibc may ignore size when _IOFBF and buffer=NULL + bool rewind = true; // TODO: change by flag + epoch_id_type epoch = rewind ? 0 : max_appeared_epoch; + log_entry::begin_session(ostrm, epoch); + auto write_snapshot_entry = [&ostrm, &rewind](std::string_view key_stid, std::string_view value_etc) { + if (rewind) { + static std::string value{}; + value = value_etc; + std::memset(value.data(), 0, 16); + log_entry::write(ostrm, key_stid, value); + } else { + log_entry::write(ostrm, key_stid, value_etc); + } + }; + sortdb_foreach(sortdb.get(), write_snapshot_entry); + //log_entry::end_session(ostrm, epoch); + if (fclose(ostrm) != 0) { // NOLINT(*-owning-memory) + LOG_LP(ERROR) << "cannot close snapshot file (" << snapshot_file << "), errno = " << errno; + throw std::runtime_error("I/O error"); + } +} + +} + +namespace limestone::api { +using namespace limestone::internal; + +void datastore::create_snapshot() { + const auto& from_dir = location_; + auto [max_appeared_epoch, sortdb] = create_sortdb_from_wals(from_dir, recover_max_parallelism_); + epoch_id_switched_.store(max_appeared_epoch); + epoch_id_informed_.store(max_appeared_epoch); + + boost::filesystem::path sub_dir = location_ / boost::filesystem::path(std::string(snapshot::subdirectory_name_)); + boost::system::error_code error; + const bool result_check = boost::filesystem::exists(sub_dir, error); + if (!result_check || error) { + const bool result_mkdir = boost::filesystem::create_directory(sub_dir, error); + if (!result_mkdir || error) { + LOG_LP(ERROR) << "fail to create directory"; + throw std::runtime_error("I/O error"); + } + } + + boost::filesystem::path snapshot_file = sub_dir / boost::filesystem::path(std::string(snapshot::file_name_)); + VLOG_LP(log_info) << "generating snapshot file: " << snapshot_file; + FILE* ostrm = fopen(snapshot_file.c_str(), "w"); // NOLINT(*-owning-memory) + if (!ostrm) { + LOG_LP(ERROR) << "cannot create snapshot file (" << snapshot_file << ")"; + throw std::runtime_error("I/O error"); + } + setvbuf(ostrm, nullptr, _IOFBF, 128L * 1024L); // NOLINT, NB. glibc may ignore size when _IOFBF and buffer=NULL + auto write_snapshot_entry = [&ostrm](std::string_view key, std::string_view value){log_entry::write(ostrm, key, value);}; + sortdb_foreach(sortdb.get(), write_snapshot_entry); if (fclose(ostrm) != 0) { // NOLINT(*-owning-memory) LOG_LP(ERROR) << "cannot close snapshot file (" << snapshot_file << "), errno = " << errno; throw std::runtime_error("I/O error"); diff --git a/src/limestone/dblog_scan.h b/src/limestone/dblog_scan.h index ef377807..c92c8d5c 100644 --- a/src/limestone/dblog_scan.h +++ b/src/limestone/dblog_scan.h @@ -86,6 +86,7 @@ class dblog_scan { explicit dblog_scan(const boost::filesystem::path& logdir) : dblogdir_(logdir) { } explicit dblog_scan(boost::filesystem::path&& logdir) : dblogdir_(std::move(logdir)) { } + const boost::filesystem::path& get_dblogdir() { return dblogdir_; } void set_thread_num(int thread_num) noexcept { thread_num_ = thread_num; } void set_fail_fast(bool fail_fast) noexcept { fail_fast_ = fail_fast; } void detach_wal_files(bool skip_empty_files = true); diff --git a/src/limestone/dblogutil/dblogutil.cpp b/src/limestone/dblogutil/dblogutil.cpp index 7b7314b5..d95459f3 100644 --- a/src/limestone/dblogutil/dblogutil.cpp +++ b/src/limestone/dblogutil/dblogutil.cpp @@ -15,6 +15,7 @@ */ #include +#include // NOLINT(*-deprecated-headers): does not provide std::mkdtemp #include #include #include "logging_helper.h" @@ -32,8 +33,18 @@ DEFINE_int32(thread_num, 1, "specify thread num of scanning wal file"); DEFINE_bool(cut, false, "repair by cutting for error-truncate and error-broken"); DEFINE_string(rotate, "all", "rotate files"); DEFINE_string(output_format, "human-readable", "format of output (human-readable/machine-readable)"); +DEFINE_bool(h, false, "display help message"); +DEFINE_string(working_dir, "", "working directory"); +DEFINE_bool(verbose, false, "verbose"); +DEFINE_bool(dry_run, false, "dry run"); +DEFINE_bool(force, false, "(subcommand compaction) skip start prompt"); +DEFINE_bool(make_backup, false, "(subcommand compaction) make backup of target dblogdir"); -enum subcommand { cmd_inspect, cmd_repair }; +enum subcommand { + cmd_inspect, + cmd_repair, + cmd_compaction, +}; void log_and_exit(int error) { VLOG(10) << "exiting with code " << error; @@ -146,7 +157,111 @@ void repair(dblog_scan &ds, std::optional epoch) { } } +namespace internal { + void create_comapct_pwal(const boost::filesystem::path& from_dir, boost::filesystem::path& to_dir, int num_worker); +} + +boost::filesystem::path make_tmp_dir_next_to(const boost::filesystem::path& target_dir) { + // assume: already checked existence and is_dir + + auto tmpdirname = boost::filesystem::canonical(target_dir).string() + ".work_XXXXXX"; + if (::mkdtemp(tmpdirname.data()) == nullptr) { + LOG_LP(ERROR) << "mkdtemp failed, errno = " << errno; + throw std::runtime_error("I/O error"); + } + return {tmpdirname}; +} + +boost::filesystem::path make_backup_dir_next_to(const boost::filesystem::path& target_dir) { + auto tmpdirname = boost::filesystem::canonical(target_dir).string() + ".backup_XXXXXX"; + if (::mkdtemp(tmpdirname.data()) == nullptr) { + LOG_LP(ERROR) << "mkdtemp failed, errno = " << errno; + throw std::runtime_error("I/O error"); + } + return {tmpdirname}; +} + +void compaction(dblog_scan &ds, std::optional epoch) { + epoch_id_type ld_epoch{}; + if (epoch.has_value()) { + ld_epoch = epoch.value(); + } else { + try { + ld_epoch = ds.last_durable_epoch_in_dir(); + } catch (std::runtime_error& ex) { + LOG(ERROR) << "reading epoch file is failed: " << ex.what(); + log_and_exit(64); + } + std::cout << "durable-epoch: " << ld_epoch << std::endl; + } + auto from_dir = ds.get_dblogdir(); + boost::filesystem::path tmp; + if (!FLAGS_working_dir.empty()) { + tmp = FLAGS_working_dir; + // TODO: check, error if exist and non-empty + } else { + tmp = make_tmp_dir_next_to(from_dir); + } + std::cout << "working-directory: " << tmp << std::endl; + + if (!FLAGS_force) { + // prompt + char yn = 'N'; + std::cout << "execute? (y/N) "; + std::cin >> yn; + if (yn != 'y' && yn != 'Y') { + LOG(ERROR) << "aborted"; + log_and_exit(0); + } + } + + setup_initial_logdir(tmp); + + VLOG_LP(log_info) << "making compact pwal file to " << tmp; + create_comapct_pwal(from_dir, tmp, FLAGS_thread_num); + + // epoch file + VLOG_LP(log_info) << "making compact epoch file to " << tmp; + FILE* strm = fopen((tmp / "epoch").c_str(), "a"); // NOLINT(*-owning-memory) + if (!strm) { + LOG_LP(ERROR) << "fopen failed, errno = " << errno; + throw std::runtime_error("I/O error"); + } + // TODO: if to-flat mode, set ld_epoch := 1 + log_entry::durable_epoch(strm, ld_epoch); + if (fflush(strm) != 0) { + LOG_LP(ERROR) << "fflush failed, errno = " << errno; + throw std::runtime_error("I/O error"); + } + if (fsync(fileno(strm)) != 0) { + LOG_LP(ERROR) << "fsync failed, errno = " << errno; + throw std::runtime_error("I/O error"); + } + if (fclose(strm) != 0) { // NOLINT(*-owning-memory) + LOG_LP(ERROR) << "fclose failed, errno = " << errno; + throw std::runtime_error("I/O error"); + } + + if (FLAGS_make_backup) { + auto bkdir = make_backup_dir_next_to(from_dir); + VLOG_LP(log_info) << "renaming " << from_dir << " to " << bkdir << " for backup"; + boost::filesystem::rename(from_dir, bkdir); + } else { + VLOG_LP(log_info) << "deleting " << from_dir; + boost::filesystem::remove_all(from_dir); + } + VLOG_LP(log_info) << "renaming " << tmp << " to " << from_dir; + boost::filesystem::rename(tmp, from_dir); + + std::cout << "compaction was successfully completed: " << from_dir << std::endl; +} + int main(char *dir, subcommand mode) { // NOLINT + if (FLAGS_verbose) { + if (FLAGS_v < log_info) { + FLAGS_v = log_info; + } + } std::optional opt_epoch; if (FLAGS_epoch.empty()) { opt_epoch = std::nullopt; @@ -175,6 +290,7 @@ int main(char *dir, subcommand mode) { // NOLINT ds.set_thread_num(FLAGS_thread_num); if (mode == cmd_inspect) inspect(ds, opt_epoch); if (mode == cmd_repair) repair(ds, opt_epoch); + if (mode == cmd_compaction) compaction(ds, opt_epoch); } catch (std::runtime_error& e) { LOG(ERROR) << e.what(); log_and_exit(64); @@ -185,16 +301,24 @@ int main(char *dir, subcommand mode) { // NOLINT } int main(int argc, char *argv[]) { // NOLINT + gflags::SetUsageMessage("Tsurugi dblog maintenance command\n\n" + //"usage: tglogutil {inspect | repair | compaction} [options] " + "usage: tglogutil {repair | compaction} [options] " + ); FLAGS_logtostderr = true; gflags::ParseCommandLineFlags(&argc, &argv, true); const char *arg0 = argv[0]; // NOLINT(*-pointer-arithmetic) google::InitGoogleLogging(arg0); subcommand mode{}; auto usage = [&arg0]() { - //std::cout << "usage: " << arg0 << " {inspect | repair} [options] " << std::endl; - std::cout << "usage: " << arg0 << " repair [options] " << std::endl; + //std::cout << "usage: " << arg0 << " {inspect | repair | compaction} [options] " << std::endl; + std::cout << "usage: " << arg0 << " {repair | compaction} [options] " << std::endl; log_and_exit(1); }; + if (FLAGS_h) { + gflags::ShowUsageWithFlags(arg0); + exit(1); + } if (argc < 3) { LOG(ERROR) << "missing parameters"; usage(); @@ -205,6 +329,8 @@ int main(int argc, char *argv[]) { // NOLINT mode = cmd_inspect; } else if (strcmp(arg1, "repair") == 0) { mode = cmd_repair; + } else if (strcmp(arg1, "compaction") == 0) { + mode = cmd_compaction; } else { LOG(ERROR) << "unknown subcommand: " << arg1; usage(); diff --git a/test/limestone/utils/dblogutil_compaction_test.cpp b/test/limestone/utils/dblogutil_compaction_test.cpp new file mode 100644 index 00000000..8c667540 --- /dev/null +++ b/test/limestone/utils/dblogutil_compaction_test.cpp @@ -0,0 +1,185 @@ + +#include +#include +#include + +#include + +#include "dblog_scan.h" +#include "internal.h" +#include "log_entry.h" + +#include "test_root.h" + +namespace limestone::testing { + +using namespace std::literals; +using namespace limestone::api; +using namespace limestone::internal; + +extern void create_file(const boost::filesystem::path& path, std::string_view content); +extern std::string read_entire_file(const boost::filesystem::path& path); +extern std::string data_manifest(int persistent_format_version = 1); + +extern const std::string_view epoch_0x100_str; + +extern const std::string_view data_normal; +extern const std::string_view data_normal2; +extern const std::string_view data_nondurable; +extern const std::string_view data_repaired_nondurable; +extern const std::string_view data_zerofill; +extern const std::string_view data_truncated_normal_entry; +extern const std::string_view data_truncated_epoch_header; +extern const std::string_view data_truncated_invalidated_normal_entry; +extern const std::string_view data_truncated_invalidated_epoch_header; +extern const std::string_view data_allzero; + +#define UTIL_COMMAND "../src/tglogutil" + +static int invoke(const std::string& command, std::string& out) { + FILE* fp; + fp = popen(command.c_str(), "r"); + char buf[4096]; + std::ostringstream ss; + std::size_t rc; + while ((rc = fread(buf, 1, 4095, fp)) > 0) { + ss.write(buf, rc); + } + out.assign(ss.str()); + LOG(INFO) << "\n" << out; + return pclose(fp); +} + +class dblogutil_compaction_test : public ::testing::Test { +public: +static constexpr const char* location = "/tmp/dblogutil_compaction_test"; + + void SetUp() { + boost::filesystem::remove_all(location); + if (!boost::filesystem::create_directory(location)) { + std::cerr << "cannot make directory" << std::endl; + } + } + + void TearDown() { + boost::filesystem::remove_all(location); + } + + bool starts_with(std::string a, std::string b) { return a.substr(0, b.length()) == b; } + bool contains(std::string a, std::string b) { return a.find(b) != a.npos; } + bool contains_line_starts_with(std::string a, std::string b) { return starts_with(a, b) || contains(a, "\n" + b); } + + std::vector list_dir() { + return list_dir(boost::filesystem::path(location)); + } + + std::vector list_dir(boost::filesystem::path dir) { + std::vector ret; + for (const boost::filesystem::path& p : boost::filesystem::directory_iterator(dir)) { + if (dblog_scan::is_wal(p)) { + ret.emplace_back(p); + } + } + return ret; + } + +}; + +extern constexpr const std::string_view data_case1_epoch = + "\x04\x00\x00\x00\x00\x00\x00\x00\x00" // epoch 0 + "\x04\x00\x01\x00\x00\x00\x00\x00\x00" // epoch 0x100 + ""sv; + +extern constexpr const std::string_view data_case1_pwal0 = + "\x02\xf0\x00\x00\x00\x00\x00\x00\x00" // marker_begin 0xf0 + "\x01\x01\x00\x00\x00\x01\x00\x00\x00" "storage1" "A" "\xf0\0\0\0\0\0\0\0" "verminor" "0" // normal_entry + "\x01\x01\x00\x00\x00\x01\x00\x00\x00" "storage1" "B" "\xf0\0\0\0\0\0\0\0" "verminor" "0" // normal_entry + "\x01\x01\x00\x00\x00\x01\x00\x00\x00" "storage1" "C" "\xf0\0\0\0\0\0\0\0" "verminor" "0" // normal_entry + // XXX: epoch footer... + ""sv; + +extern constexpr const std::string_view data_case1_pwal1 = + "\x02\xf1\x00\x00\x00\x00\x00\x00\x00" // marker_begin 0xf1 + "\x01\x01\x00\x00\x00\x01\x00\x00\x00" "storage1" "A" "\xf1\0\0\0\0\0\0\0" "verminor" "1" // normal_entry + // XXX: epoch footer... + "\x02\xf2\x00\x00\x00\x00\x00\x00\x00" // marker_begin 0xf2 + "\x05\x01\x00\x00\x00" "storage1" "C" "\xf2\0\0\0\0\0\0\0" "verminor" // remove_entry + // XXX: epoch footer... + ""sv; + +extern constexpr const std::string_view data_case1_pwalcompact = + "\x02\x00\x00\x00\x00\x00\x00\x00\x00" + "\x01\x01\x00\x00\x00\x01\x00\x00\x00" "storage1" "A" "\0\0\0\0\0\0\0\0" "\0\0\0\0\0\0\0\0" "1" // normal_entry + "\x01\x01\x00\x00\x00\x01\x00\x00\x00" "storage1" "B" "\0\0\0\0\0\0\0\0" "\0\0\0\0\0\0\0\0" "0" // normal_entry + // XXX: epoch footer... + ""sv; + +extern const std::string_view data_case1_epochcompact = epoch_0x100_str; + +TEST_F(dblogutil_compaction_test, case1force) { + boost::filesystem::path dir{location}; + dir /= "log"; + boost::filesystem::create_directory(dir); + create_file(dir / "epoch", data_case1_epoch); + create_file(dir / std::string(manifest_file_name), data_manifest()); + create_file(dir / "pwal_0000", data_case1_pwal0); + create_file(dir / "pwal_0001", data_case1_pwal1); + std::string command; + command = UTIL_COMMAND " compaction --force " + dir.string() + " 2>&1"; + std::string out; + int rc = invoke(command, out); + EXPECT_GE(rc, 0 << 8); + EXPECT_TRUE(contains(out, "compaction was successfully completed: ")); + EXPECT_EQ(read_entire_file(list_dir(dir)[0]), data_case1_pwalcompact); + EXPECT_EQ(read_entire_file(dir / "epoch"), data_case1_epochcompact); +} + +TEST_F(dblogutil_compaction_test, case1prompt) { + boost::filesystem::path dir{location}; + dir /= "log"; + boost::filesystem::create_directory(dir); + create_file(dir / "epoch", data_case1_epoch); + create_file(dir / std::string(manifest_file_name), data_manifest()); + create_file(dir / "pwal_0000", data_case1_pwal0); + create_file(dir / "pwal_0001", data_case1_pwal1); + std::string command; + command = "echo y | " UTIL_COMMAND " compaction " + dir.string() + " 2>&1"; + std::string out; + int rc = invoke(command, out); + EXPECT_GE(rc, 0 << 8); + EXPECT_TRUE(contains(out, "y/N")); + EXPECT_TRUE(contains(out, "compaction was successfully completed: ")); + EXPECT_EQ(read_entire_file(list_dir(dir)[0]), data_case1_pwalcompact); + EXPECT_EQ(read_entire_file(dir / "epoch"), data_case1_epochcompact); +} + +TEST_F(dblogutil_compaction_test, unreadable) { + // root can read directories w/o permissions + if (geteuid() == 0) { GTEST_SKIP() << "skip when run by root"; } + + boost::filesystem::path dir{location}; + dir /= "unreadable"; + boost::filesystem::create_directory(dir); + boost::filesystem::permissions(dir, boost::filesystem::no_perms); // drop dir permission + std::string command; + command = UTIL_COMMAND " compaction --force " + dir.string() + " 2>&1"; + std::string out; + int rc = invoke(command, out); + EXPECT_GE(rc, 64 << 8); + EXPECT_TRUE(contains_line_starts_with(out, "E")); // LOG(ERROR) + EXPECT_TRUE(contains(out, "Permission denied")); + boost::filesystem::permissions(dir, boost::filesystem::owner_all); +} + +TEST_F(dblogutil_compaction_test, nondblogdir) { + boost::filesystem::path dir{location}; // assume empty dir + std::string command; + command = UTIL_COMMAND " compaction --force " + dir.string() + " 2>&1"; + std::string out; + int rc = invoke(command, out); + EXPECT_GE(rc, 64 << 8); + EXPECT_TRUE(contains_line_starts_with(out, "E")); // LOG(ERROR) + EXPECT_TRUE(contains(out, "unsupport")); +} + +} // namespace limestone::testing