Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat!: Improve command line argument handling #56

Open
wants to merge 17 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/src/dev-docs/testing.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ require this storage backend.
4. Set the `cStorageUrl` in `tests/storage/StorageTestHelper.hpp` to
`jdbc:mariadb://localhost:3306/<db_name>?user=<usr>&password=<pwd>`.

5. Set the `storage_url` in `tests/integration/client.py` to
5. Set the `storage-url` in `tests/integration/client.py` to
`jdbc:mariadb://localhost:3306/<db_name>?user=<usr>&password=<pwd>`.

## Running unit tests
Expand Down
8 changes: 4 additions & 4 deletions docs/src/user-docs/guides-quick-start.md
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ To start the scheduler, run:

```shell
build/spider/src/spider/spider_scheduler \
--storage_url \
--storage-url \
"jdbc:mariadb://localhost:3306/spider-storage?user=spider&password=password" \
--host "127.0.0.1" \
--port 6000
Expand All @@ -174,7 +174,7 @@ build/spider/src/spider/spider_scheduler \
NOTE:

* If you used a different set of arguments to set up the storage backend, ensure you update the
`storage_url` argument in the command.
`storage-url` argument in the command.
* In production, change the host to the real IP address of the machine running the scheduler.
* If the scheduler fails to bind to port `6000`, change the port in the command and try again.

Expand All @@ -190,7 +190,7 @@ To start a worker, run:

```shell
build/spider/src/spider/spider_worker \
--storage_url \
--storage-url \
"jdbc:mariadb://localhost:3306/spider-storage?user=spider&password=password" \
--host "127.0.0.1" \
--libs "build/libtasks.so"
Expand All @@ -199,7 +199,7 @@ build/spider/src/spider/spider_worker \
NOTE:

* If you used a different set of arguments to set up the storage backend, ensure you update the
`storage_url` argument in the command.
`storage-url` argument in the command.
* In production, change the host to the real IP address of the machine running the worker.
* You can specify multiple task libraries to load. The task libraries must be built with linkage
to the Spider client library.
Expand Down
5 changes: 4 additions & 1 deletion src/spider/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ set(SPIDER_CORE_HEADERS
io/MsgPack.hpp
io/msgpack_message.hpp
io/Serializer.hpp
utils/TimedCache.hpp
storage/MetadataStorage.hpp
storage/DataStorage.hpp
storage/MySqlConnection.hpp
Expand Down Expand Up @@ -55,6 +54,7 @@ set(SPIDER_WORKER_SOURCES
worker/message_pipe.hpp
worker/WorkerClient.hpp
worker/WorkerClient.cpp
utils/ProgramOptions.hpp
utils/StopToken.hpp
CACHE INTERNAL
"spider worker source files"
Expand All @@ -66,6 +66,7 @@ set(SPIDER_TASK_EXECUTOR_SOURCES
worker/message_pipe.cpp
worker/message_pipe.hpp
worker/task_executor.cpp
utils/ProgramOptions.hpp
CACHE INTERNAL
"spider task executor source files"
)
Expand Down Expand Up @@ -112,7 +113,9 @@ set(SPIDER_SCHEDULER_SOURCES
scheduler/SchedulerServer.hpp
scheduler/SchedulerTaskCache.cpp
scheduler/SchedulerTaskCache.hpp
utils/ProgramOptions.hpp
utils/StopToken.hpp
utils/TimedCache.hpp
CACHE INTERNAL
"spider scheduler source files"
)
Expand Down
115 changes: 67 additions & 48 deletions src/spider/scheduler/scheduler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@
#include <chrono>
#include <cstddef>
#include <functional>
#include <iostream>
#include <memory>
#include <string>
#include <system_error>
#include <thread>

#include <boost/any/bad_any_cast.hpp>
#include <boost/program_options/errors.hpp>
#include <boost/program_options/options_description.hpp>
#include <boost/program_options/parsers.hpp>
Expand All @@ -24,6 +24,7 @@
#include "../storage/DataStorage.hpp"
#include "../storage/MetadataStorage.hpp"
#include "../storage/MySqlStorage.hpp"
#include "../utils/ProgramOptions.hpp"
#include "../utils/StopToken.hpp"
#include "FifoPolicy.hpp"
#include "SchedulerPolicy.hpp"
Expand All @@ -38,33 +39,71 @@ constexpr int cCleanupInterval = 5;
constexpr int cRetryCount = 5;

namespace {
auto parse_args(int const argc, char** argv) -> boost::program_options::variables_map {

auto parse_args(
int const argc,
char** argv,
std::string& host,
unsigned short& port,
std::string& storage_url
) -> bool {
boost::program_options::options_description desc;
desc.add_options()("help", "spider scheduler");
desc.add_options()(
"host",
boost::program_options::value<std::string>(),
"scheduler host address"
);
desc.add_options()(
"port",
boost::program_options::value<unsigned short>(),
"port to listen on"
);
desc.add_options()(
"storage_url",
boost::program_options::value<std::string>(),
"storage server url"
);

boost::program_options::variables_map variables;
boost::program_options::store(
// NOLINTNEXTLINE(misc-include-cleaner)
boost::program_options::parse_command_line(argc, argv, desc),
variables
);
boost::program_options::notify(variables);
return variables;
// clang-format off
desc.add_options()
(spider::core::cHelpOption.data(), spider::core::cHelpMessage.data())
(
spider::core::cHostOption.data(),
boost::program_options::value<std::string>(&host)->required(),
spider::core::cHostMessage.data()
)
(
spider::core::cPortOption.data(),
boost::program_options::value<unsigned short>(&port)->required(),
spider::core::cPortMessage.data()
)
(
spider::core::cStorageUrlOption.data(),
boost::program_options::value<std::string>(&storage_url)->required(),
spider::core::cStorageUrlMessage.data()
);
// clang-format on

try {
boost::program_options::variables_map variables;
boost::program_options::store(
// NOLINTNEXTLINE(misc-include-cleaner)
boost::program_options::parse_command_line(argc, argv, desc),
variables
);

if (false == variables.contains(std::string(spider::core::cHostOption))
&& false == variables.contains(std::string(spider::core::cPortOption))
&& false == variables.contains(std::string(spider::core::cStorageUrlOption)))
{
std::cout << spider::core::cSchedulerUsage << "\n";
std::cout << desc << "\n";
return false;
}

boost::program_options::notify(variables);

if (host.empty()) {
std::cerr << spider::core::cHostEmptyMessage << "\n";
return false;
}

if (storage_url.empty()) {
std::cerr << spider::core::cStorageUrlEmptyMessage << "\n";
return false;
}

return true;
} catch (boost::program_options::error& e) {
std::cerr << "spider_scheduler: " << e.what() << "\n";
std::cerr << spider::core::cSchedulerUsage << "\n";
std::cerr << spider::core::cSchedulerHelpMessage;
return false;
}
}

auto heartbeat_loop(
Expand Down Expand Up @@ -137,30 +176,10 @@ auto main(int argc, char** argv) -> int {
spdlog::set_level(spdlog::level::trace);
#endif

boost::program_options::variables_map const args = parse_args(argc, argv);

unsigned short port = 0;
std::string scheduler_addr;
std::string storage_url;
try {
if (!args.contains("port")) {
spdlog::error("port is required");
return cCmdArgParseErr;
}
port = args["port"].as<unsigned short>();
if (!args.contains("host")) {
spdlog::error("host is required");
return cCmdArgParseErr;
}
scheduler_addr = args["host"].as<std::string>();
if (!args.contains("storage_url")) {
spdlog::error("storage_url is required");
return cCmdArgParseErr;
}
storage_url = args["storage_url"].as<std::string>();
} catch (boost::bad_any_cast& e) {
return cCmdArgParseErr;
} catch (boost::program_options::error& e) {
if (false == parse_args(argc, argv, scheduler_addr, port, storage_url)) {
return cCmdArgParseErr;
}

Expand Down
64 changes: 64 additions & 0 deletions src/spider/utils/ProgramOptions.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#ifndef SPIDER_UTILS_PROGRAMOPTIONS_HPP
#define SPIDER_UTILS_PROGRAMOPTIONS_HPP

#include <string_view>

namespace spider::core {

constexpr std::string_view cSchedulerUsage
= {"Usage: spider_scheduler --host <host> --port <port> --storage-url <url>"};

constexpr std::string_view cSchedulerHelpMessage
= {"Try 'spider_scheduler --help' for detailed usage instructions.\n"};

constexpr std::string_view cWorkerUsage
= {"Usage: spider_worker --host <host> --storage-url <storage_url> --libs <libs>"};
Comment on lines +14 to +15
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Fix inconsistent argument naming in worker usage message.

The usage message shows storage_url with an underscore, but the actual option is defined with a hyphen as storage-url. This inconsistency could confuse users.

Apply this diff to fix the inconsistency:

-        = {"Usage: spider_worker --host <host> --storage-url <storage_url> --libs <libs>"};
+        = {"Usage: spider_worker --host <host> --storage-url <storage-url> --libs <libs>"};
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
constexpr std::string_view cWorkerUsage
= {"Usage: spider_worker --host <host> --storage-url <storage_url> --libs <libs>"};
constexpr std::string_view cWorkerUsage
= {"Usage: spider_worker --host <host> --storage-url <storage-url> --libs <libs>"};


constexpr std::string_view cWorkerHelpMessage
= {"Try 'spider_worker --help' for detailed usage instructions.\n"};

constexpr std::string_view cTaskExecutorUsage
= {"Usage: spider_task_executor --func <function> --task-id <task_id> --storage-url "
"<storage_url> --libs <libs>"};
Comment on lines +20 to +22
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Fix inconsistent argument naming in task executor usage message.

The option task-id is defined with a hyphen, but task_id appears with an underscore in the usage message.

Apply this diff to fix the inconsistency:

-        = {"Usage: spider_task_executor --func <function> --task-id <task_id> --storage-url "
+        = {"Usage: spider_task_executor --func <function> --task-id <task-id> --storage-url "
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
constexpr std::string_view cTaskExecutorUsage
= {"Usage: spider_task_executor --func <function> --task-id <task_id> --storage-url "
"<storage_url> --libs <libs>"};
constexpr std::string_view cTaskExecutorUsage
= {"Usage: spider_task_executor --func <function> --task-id <task-id> --storage-url "
"<storage_url> --libs <libs>"};


constexpr std::string_view cTaskExecutorHelpMessage
= {"Try 'spider_task_executor --help' for detailed usage instructions.\n"};

constexpr std::string_view cHelpOption = {"help"};

constexpr std::string_view cHelpMessage = {"Print this help text."};

constexpr std::string_view cHostOption = {"host"};

constexpr std::string_view cHostMessage = {"The host address to bind to"};

constexpr std::string_view cHostEmptyMessage = {"The host address should not be empty"};

constexpr std::string_view cPortOption = {"port"};

constexpr std::string_view cPortMessage = {"The port to listen on"};

constexpr std::string_view cStorageUrlOption = {"storage-url"};

constexpr std::string_view cStorageUrlMessage = {"The storage server's URL"};

constexpr std::string_view cStorageUrlEmptyMessage
= {"The storage server's URL should not be empty"};

constexpr std::string_view cLibsOption = {"libs"};

constexpr std::string_view cLibsMessage = {"The tasks libraries to load"};

constexpr std::string_view cLibsEmptyMessage = {"The tasks libraries should not be empty"};

constexpr std::string_view cFunctionOption = {"func"};

constexpr std::string_view cFunctionMessage = {"The function to execute"};

constexpr std::string_view cTaskIdOption = {"task-id"};

constexpr std::string_view cTaskIdMessage = {"The id of the task to execute"};

} // namespace spider::core

#endif
18 changes: 10 additions & 8 deletions src/spider/worker/TaskExecutor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,11 @@
#include <boost/process/v2/stdio.hpp>
#include <boost/uuid/uuid.hpp>
#include <boost/uuid/uuid_io.hpp>
#include <fmt/format.h>

#include "../io/BoostAsio.hpp" // IWYU pragma: keep
#include "../io/MsgPack.hpp" // IWYU pragma: keep
#include "../utils/ProgramOptions.hpp"
#include "FunctionManager.hpp"
#include "message_pipe.hpp"

Expand Down Expand Up @@ -51,13 +53,13 @@ class TaskExecutor {
: m_read_pipe(context),
m_write_pipe(context) {
std::vector<std::string> process_args{
"--func",
fmt::format("--{}", core::cFunctionOption),
func_name,
"--task_id",
fmt::format("--{}", core::cTaskIdOption),
to_string(task_id),
"--storage_url",
fmt::format("--{}", core::cStorageUrlOption),
storage_url,
"--libs"
fmt::format("--{}", core::cLibsOption),
};
process_args.insert(process_args.end(), libs.begin(), libs.end());
boost::filesystem::path const exe = boost::process::v2::environment::find_executable(
Expand Down Expand Up @@ -99,13 +101,13 @@ class TaskExecutor {
: m_read_pipe(context),
m_write_pipe(context) {
std::vector<std::string> process_args{
"--func",
fmt::format("--{}", core::cFunctionOption),
func_name,
"--task_id",
fmt::format("--{}", core::cTaskIdOption),
to_string(task_id),
"--storage_url",
fmt::format("--{}", core::cStorageUrlOption),
storage_url,
"--libs"
fmt::format("--{}", core::cLibsOption),
};
process_args.insert(process_args.end(), libs.begin(), libs.end());
boost::filesystem::path const exe = boost::process::v2::environment::find_executable(
Expand Down
Loading