Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(clp-s): json to irv2 #657

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 102 additions & 1 deletion components/core/src/clp_s/CommandLineArguments.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,11 +106,13 @@ CommandLineArguments::parse_arguments(int argc, char const** argv) {
std::cerr << " c - compress" << std::endl;
std::cerr << " x - decompress" << std::endl;
std::cerr << " s - search" << std::endl;
std::cerr << " r - JSON to IR Format" << std::endl;
std::cerr << std::endl;
std::cerr << "Try "
<< " c --help OR"
<< " x --help OR"
<< " s --help for command-specific details." << std::endl;
<< " s --help OR"
<< " r --help for command-specific details." << std::endl;

po::options_description visible_options;
visible_options.add(general_options);
Expand All @@ -125,6 +127,7 @@ CommandLineArguments::parse_arguments(int argc, char const** argv) {
case (char)Command::Compress:
case (char)Command::Extract:
case (char)Command::Search:
case (char)Command::JsonToIr:
m_command = (Command)command_input;
break;
default:
Expand Down Expand Up @@ -727,6 +730,100 @@ CommandLineArguments::parse_arguments(int argc, char const** argv) {
"The --count-by-time and --count options are mutually exclusive."
);
}
} else if ((char)Command::JsonToIr == command_input) {
po::options_description compression_positional_options;
// clang-format off
compression_positional_options.add_options()(
"ir-dir",
po::value<std::string>(&m_archives_dir)->value_name("DIR"),
"output directory"
)(
"input-paths",
po::value<std::vector<std::string>>(&m_file_paths)->value_name("PATHS"),
"input paths"
);
// clang-format on

po::options_description compression_options("Compression options");
std::string input_path_list_file_path;
// clang-format off
compression_options.add_options()(
"compression-level",
po::value<int>(&m_compression_level)->value_name("LEVEL")->
default_value(m_compression_level),
"1 (fast/low compression) to 9 (slow/high compression)."
)(
"max-document-size",
po::value<size_t>(&m_max_document_size)->value_name("DOC_SIZE")->
default_value(m_max_document_size),
"Maximum allowed size (B) for a single document before ir generation fails."
)(
"max-ir-buffer-size",
po::value<size_t>(&m_max_ir_buffer_size)->value_name("BUFFER_SIZE")->
default_value(m_max_ir_buffer_size),
"Maximum allowed size (B) for an in memory IR buffer befroe being written to file."
)(
AVMatthews marked this conversation as resolved.
Show resolved Hide resolved
"encoding-type",
po::value<int>(&m_encoding_type)->value_name("ENCODING_TYPE")->
default_value(m_encoding_type),
"4 (four byte encoding) or 8 (eight byte encoding)"
AVMatthews marked this conversation as resolved.
Show resolved Hide resolved
)(
"files-from,f",
po::value<std::string>(&input_path_list_file_path)
->value_name("FILE")
->default_value(input_path_list_file_path),
"Compress files specified in FILE"
);
// clang-format on

po::positional_options_description positional_options;
positional_options.add("ir-dir", 1);
positional_options.add("input-paths", -1);

po::options_description all_compression_options;
all_compression_options.add(compression_options);
all_compression_options.add(compression_positional_options);

std::vector<std::string> unrecognized_options
= po::collect_unrecognized(parsed.options, po::include_positional);
unrecognized_options.erase(unrecognized_options.begin());
po::store(
po::command_line_parser(unrecognized_options)
.options(all_compression_options)
.positional(positional_options)
.run(),
parsed_command_line_options
);
po::notify(parsed_command_line_options);

if (parsed_command_line_options.count("help")) {
print_json_to_ir_usage();

std::cerr << "Examples:\n";
std::cerr << " # Parse file1.json and dir1 into irs-dir\n";
std::cerr << " " << m_program_name << " r irs-dir file1.json dir1\n";

po::options_description visible_options;
visible_options.add(general_options);
visible_options.add(compression_options);
std::cerr << visible_options << '\n';
return ParsingResult::InfoCommand;
}

if (m_archives_dir.empty()) {
throw std::invalid_argument("No IRs directory specified.");
}

if (false == input_path_list_file_path.empty()) {
if (false == read_paths_from_file(input_path_list_file_path, m_file_paths)) {
SPDLOG_ERROR("Failed to read paths from {}", input_path_list_file_path);
return ParsingResult::Failure;
}
}

if (m_file_paths.empty()) {
throw std::invalid_argument("No input paths specified.");
}
}
} catch (std::exception& e) {
SPDLOG_ERROR("{}", e.what());
Expand Down Expand Up @@ -834,6 +931,10 @@ void CommandLineArguments::print_decompression_usage() const {
std::cerr << "Usage: " << m_program_name << " x [OPTIONS] ARCHIVES_DIR OUTPUT_DIR" << std::endl;
}

void CommandLineArguments::print_json_to_ir_usage() const {
std::cerr << "Usage: " << m_program_name << " r [OPTIONS] IRS_DIR [FILE/DIR ...]\n";
}

void CommandLineArguments::print_search_usage() const {
std::cerr << "Usage: " << m_program_name
<< " s [OPTIONS] ARCHIVES_DIR KQL_QUERY"
Expand Down
11 changes: 10 additions & 1 deletion components/core/src/clp_s/CommandLineArguments.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ class CommandLineArguments {
enum class Command : char {
Compress = 'c',
Extract = 'x',
Search = 's'
Search = 's',
JsonToIr = 'r'
};

enum class OutputHandlerType : uint8_t {
Expand Down Expand Up @@ -65,6 +66,10 @@ class CommandLineArguments {

size_t get_max_document_size() const { return m_max_document_size; }

[[nodiscard]] auto get_max_ir_buffer_size() const -> size_t { return m_max_ir_buffer_size; }

[[nodiscard]] auto get_encoding_type() const -> int { return m_encoding_type; }

[[nodiscard]] bool print_archive_stats() const { return m_print_archive_stats; }

std::string const& get_mongodb_uri() const { return m_mongodb_uri; }
Expand Down Expand Up @@ -170,6 +175,8 @@ class CommandLineArguments {

void print_decompression_usage() const;

void print_json_to_ir_usage() const;

void print_search_usage() const;

// Variables
Expand All @@ -192,6 +199,8 @@ class CommandLineArguments {
size_t m_minimum_table_size{1ULL * 1024 * 1024}; // 1 MB
bool m_disable_log_order{false};
FileType m_file_type{FileType::Json};
int m_encoding_type{8};
size_t m_max_ir_buffer_size{512ULL * 1024 * 1024};

// Metadata db variables
std::optional<clp::GlobalMetadataDBConfig> m_metadata_db_config;
Expand Down
9 changes: 9 additions & 0 deletions components/core/src/clp_s/JsonParser.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,15 @@ struct JsonParserOption {
std::shared_ptr<clp::GlobalMySQLMetadataDB> metadata_db;
};

struct JsonToIrParserOption {
std::vector<std::string> file_paths;
std::string irs_dir;
size_t max_document_size;
size_t max_ir_buffer_size;
int compression_level;
int encoding;
};

class JsonParser {
public:
class OperationFailed : public TraceableException {
Expand Down
Loading
Loading