diff --git a/Dockerfile b/Dockerfile index 1a5aa25..188d0fb 100644 --- a/Dockerfile +++ b/Dockerfile @@ -30,11 +30,11 @@ RUN mkdir src && cd src/ && \ find_package(nav2_msgs REQUIRED)\n \ find_package(bond REQUIRED)\n \ add_executable(healthcheck_localization src/healthcheck_localization.cpp)\n \ - ament_target_dependencies(healthcheck_localization rclcpp std_msgs geometry_msgs)\n \ + ament_target_dependencies(healthcheck_localization rclcpp geometry_msgs)\n \ add_executable(healthcheck_slam src/healthcheck_slam.cpp)\n \ - ament_target_dependencies(healthcheck_slam rclcpp std_msgs nav2_msgs)\n \ + ament_target_dependencies(healthcheck_slam rclcpp nav2_msgs)\n \ add_executable(healthcheck_ src/healthcheck_navigation.cpp)\n \ - ament_target_dependencies(healthcheck_ rclcpp std_msgs bond)\n \ + ament_target_dependencies(healthcheck_ rclcpp bond)\n \ install(TARGETS \ healthcheck_localization \ healthcheck_slam \ @@ -42,9 +42,7 @@ RUN mkdir src && cd src/ && \ DESTINATION lib/${PROJECT_NAME})' \ /ros2_ws/src/healthcheck_pkg/CMakeLists.txt -COPY ./healthcheck_localization.cpp /ros2_ws/src/healthcheck_pkg/src/ -COPY ./healthcheck_slam.cpp /ros2_ws/src/healthcheck_pkg/src/ -COPY ./healthcheck_navigation.cpp /ros2_ws/src/healthcheck_pkg/src/ +COPY ./healthcheck /ros2_ws/src/healthcheck_pkg/src # Build RUN source /opt/ros/$ROS_DISTRO/setup.bash && \ @@ -52,7 +50,8 @@ RUN source /opt/ros/$ROS_DISTRO/setup.bash && \ RUN echo $(dpkg -s ros-$ROS_DISTRO-navigation2 | grep 'Version' | sed -r 's/Version:\s([0-9]+.[0-9]+.[0-9]+).*/\1/g') > /version.txt -HEALTHCHECK --interval=20s --timeout=10s --start-period=10s --retries=3 \ - CMD bash -c "/ros_entrypoint.sh ros2 run healthcheck_pkg healthcheck_$SLAM_MODE" +COPY ./healthcheck/healthcheck.sh / +HEALTHCHECK --interval=10s --timeout=2s --start-period=5s --retries=5 \ + CMD ["/healthcheck.sh"] #tip: gathering logs from healthcheck: docker inspect b39 | jq '.[0].State.Health.Log' diff --git a/healthcheck/healthcheck.sh b/healthcheck/healthcheck.sh new file mode 100755 index 0000000..dee593c --- /dev/null +++ b/healthcheck/healthcheck.sh @@ -0,0 +1,28 @@ +#!/bin/bash + +HEALTHCHECK_FILE="/health_status.txt" + +# Function to start the ROS 2 healthcheck node +start_healthcheck_node() { + /ros_entrypoint.sh ros2 run healthcheck_pkg healthcheck_$SLAM_MODE & +} + +if [ ! -f "$HEALTHCHECK_FILE" ]; then + echo "Healthcheck file not found. Starting ROS 2 healthcheck node..." + start_healthcheck_node + # Wait a bit to allow the node to start and write its initial status + sleep 2 +fi + +# Now check the health status +if [ -f "$HEALTHCHECK_FILE" ]; then + status=$(cat "$HEALTHCHECK_FILE") + if [ "$status" == "healthy" ]; then + exit 0 + else + exit 1 + fi +else + echo "Healthcheck file still not found. There may be an issue with the node." + exit 1 +fi diff --git a/healthcheck_localization.cpp b/healthcheck/healthcheck_localization.cpp similarity index 51% rename from healthcheck_localization.cpp rename to healthcheck/healthcheck_localization.cpp index 6fe6a13..f433794 100755 --- a/healthcheck_localization.cpp +++ b/healthcheck/healthcheck_localization.cpp @@ -1,24 +1,26 @@ #include "rclcpp/rclcpp.hpp" #include "geometry_msgs/msg/pose_with_covariance_stamped.hpp" -#include "cstdlib" +#include "fstream" using namespace std::chrono_literals; -#define TIMEOUT 2s +#define LOOP_PERIOD 2s -int msg_received = EXIT_FAILURE; +// State 'healthy' is latched, because topic 'amcl_pose' is transient_local and send only when moving. -void msg_callback(const geometry_msgs::msg::PoseWithCovarianceStamped::SharedPtr msg) +std::chrono::steady_clock::time_point last_msg_time; + +void write_health_status(const std::string& status) { - std::cout << "Message received" << std::endl; - msg_received = EXIT_SUCCESS; - rclcpp::shutdown(); + std::ofstream healthFile("/health_status.txt"); + healthFile << status; + healthFile.close(); } -void timeout_callback() +void msg_callback(const geometry_msgs::msg::PoseWithCovarianceStamped::SharedPtr msg) { - std::cout << "Timeout" << std::endl; - rclcpp::shutdown(); + std::cout << "Message received" << std::endl; + write_health_status("healthy"); } int main(int argc, char* argv[]) @@ -28,9 +30,13 @@ int main(int argc, char* argv[]) rclcpp::QoS qos(1); qos.transient_local(); auto sub = node->create_subscription("/amcl_pose", qos, msg_callback); - auto timer = node->create_wall_timer(TIMEOUT, timeout_callback); - rclcpp::spin(node); - return msg_received; -} + write_health_status("unhealthy"); + while (rclcpp::ok()) + { + rclcpp::spin_some(node); + std::this_thread::sleep_for(LOOP_PERIOD); + } + return 0; +} diff --git a/healthcheck/healthcheck_navigation.cpp b/healthcheck/healthcheck_navigation.cpp new file mode 100644 index 0000000..484c5e8 --- /dev/null +++ b/healthcheck/healthcheck_navigation.cpp @@ -0,0 +1,57 @@ +#include "rclcpp/rclcpp.hpp" +#include "bond/msg/status.hpp" +#include "fstream" + +using namespace std::chrono_literals; + +#define LOOP_PERIOD 2s +#define MSG_VALID_TIME 5s + +std::chrono::steady_clock::time_point last_msg_time; + +void write_health_status(const std::string& status) +{ + std::ofstream healthFile("/health_status.txt"); + healthFile << status; + healthFile.close(); +} + +void msg_callback(const bond::msg::Status::SharedPtr msg) +{ + write_health_status("healthy"); + last_msg_time = std::chrono::steady_clock::now(); +} + +void healthy_check(const rclcpp::Node::SharedPtr& node) +{ + std::chrono::steady_clock::time_point current_time = std::chrono::steady_clock::now(); + std::chrono::duration elapsed_time = current_time - last_msg_time; + bool is_msg_valid = elapsed_time.count() < MSG_VALID_TIME.count(); + + if (is_msg_valid) + { + RCLCPP_INFO(node->get_logger(), "Health check: healthy"); + write_health_status("healthy"); + } + else + { + RCLCPP_WARN(node->get_logger(), "Health check: unhealthy"); + write_health_status("unhealthy"); + } +} + +int main(int argc, char* argv[]) +{ + rclcpp::init(argc, argv); + auto node = rclcpp::Node::make_shared("healthcheck_node"); + auto sub = node->create_subscription("/bond", rclcpp::SystemDefaultsQoS(), msg_callback); + + while (rclcpp::ok()) + { + rclcpp::spin_some(node); + healthy_check(node); + std::this_thread::sleep_for(LOOP_PERIOD); + } + + return 0; +} diff --git a/healthcheck/healthcheck_slam.cpp b/healthcheck/healthcheck_slam.cpp new file mode 100755 index 0000000..c6c1ce4 --- /dev/null +++ b/healthcheck/healthcheck_slam.cpp @@ -0,0 +1,116 @@ +#include "rclcpp/rclcpp.hpp" +#include "nav_msgs/msg/occupancy_grid.hpp" +#include "nav2_msgs/srv/save_map.hpp" +#include // Include for file operations +#include // Include for sleep_for + +using namespace std::chrono_literals; + +#define LOOP_PERIOD 2s +#define MSG_VALID_TIME 5s +#define SAVE_MAP_PERIOD 15s +#define SAVE_MAP_CONNECTION_TIMEOUT 2s +#define SAVE_MAP_VALID_TIME 20s + +class HealthCheckNode : public rclcpp::Node +{ +public: + HealthCheckNode() : Node("healthcheck_node") + { + rclcpp::QoS qos(1); + qos.transient_local(); + map_subscriber_ = create_subscription( + "/map", qos, std::bind(&HealthCheckNode::msgCallback, this, std::placeholders::_1)); + save_map_client_ = create_client("/map_saver/save_map"); + } + + void healthyCheck() + { + auto current_time = std::chrono::steady_clock::now(); + std::chrono::duration msg_elapsed_time = current_time - last_msg_time; + std::chrono::duration saved_map_elapsed_time = current_time - last_saved_map_time; + bool is_msg_valid = msg_elapsed_time.count() < MSG_VALID_TIME.count(); + bool is_save_map_valid = saved_map_elapsed_time.count() < SAVE_MAP_VALID_TIME.count(); + + if (is_msg_valid && is_save_map_valid) + { + RCLCPP_INFO(get_logger(), "Health check: healthy"); + writeHealthStatus("healthy"); + } + else + { + RCLCPP_WARN(get_logger(), "Health check: unhealthy"); + writeHealthStatus("unhealthy"); + } + } + + void saveMapPeriodically() + { + auto current_time = std::chrono::steady_clock::now(); + std::chrono::duration elapsed_time = current_time - last_saved_map_time; + + if (elapsed_time.count() > SAVE_MAP_PERIOD.count()) + { + if (save_map_client_->wait_for_service(SAVE_MAP_CONNECTION_TIMEOUT)) + { + RCLCPP_DEBUG(get_logger(), "Service available"); + auto request = std::make_shared(); + request->free_thresh = 0.15; + request->map_topic = "/map"; + request->map_url = "/maps/map"; + request->map_mode = "trinary"; + request->image_format = "png"; + + auto future = save_map_client_->async_send_request(request); + + if (rclcpp::spin_until_future_complete(shared_from_this(), future, SAVE_MAP_CONNECTION_TIMEOUT) == rclcpp::FutureReturnCode::SUCCESS) + { + RCLCPP_DEBUG(get_logger(), "Map saved"); + last_saved_map_time = std::chrono::steady_clock::now(); + } + else + { + RCLCPP_DEBUG(get_logger(), "Service response didn't arrived"); + } + } + else + { + RCLCPP_DEBUG(get_logger(), "Service unavailable"); + } + } + } + +private: + rclcpp::Subscription::SharedPtr map_subscriber_; + rclcpp::Client::SharedPtr save_map_client_; + std::chrono::steady_clock::time_point last_msg_time; + std::chrono::steady_clock::time_point last_saved_map_time; + + void writeHealthStatus(const std::string &status) + { + std::ofstream healthFile("/health_status.txt"); + healthFile << status; + } + + void msgCallback(const nav_msgs::msg::OccupancyGrid::SharedPtr msg) + { + RCLCPP_DEBUG(get_logger(), "Msg arrived"); + last_msg_time = std::chrono::steady_clock::now(); + } +}; + +int main(int argc, char *argv[]) +{ + rclcpp::init(argc, argv); + auto node = std::make_shared(); + + while (rclcpp::ok()) + { + rclcpp::spin_some(node); + node->healthyCheck(); + node->saveMapPeriodically(); + std::this_thread::sleep_for(LOOP_PERIOD); + } + + return 0; +} diff --git a/healthcheck_navigation.cpp b/healthcheck_navigation.cpp deleted file mode 100644 index 2ad2b29..0000000 --- a/healthcheck_navigation.cpp +++ /dev/null @@ -1,33 +0,0 @@ -#include "rclcpp/rclcpp.hpp" -#include "bond/msg/status.hpp" -#include "cstdlib" - -using namespace std::chrono_literals; - -#define TIMEOUT 2s - -int msg_received = EXIT_FAILURE; - -void msg_callback(const bond::msg::Status::SharedPtr msg) -{ - std::cout << "Message received" << std::endl; - msg_received = EXIT_SUCCESS; - rclcpp::shutdown(); -} - -void timeout_callback() -{ - std::cout << "Timeout" << std::endl; - rclcpp::shutdown(); -} - -int main(int argc, char* argv[]) -{ - rclcpp::init(argc, argv); - auto node = rclcpp::Node::make_shared("healthcheck_node"); - auto sub = node->create_subscription("/bond", rclcpp::SystemDefaultsQoS(), msg_callback); - auto timer = node->create_wall_timer(TIMEOUT, timeout_callback); - - rclcpp::spin(node); - return msg_received; -} diff --git a/healthcheck_slam.cpp b/healthcheck_slam.cpp deleted file mode 100755 index a24ee34..0000000 --- a/healthcheck_slam.cpp +++ /dev/null @@ -1,66 +0,0 @@ -#include "rclcpp/rclcpp.hpp" -#include "nav_msgs/msg/occupancy_grid.hpp" -#include "nav2_msgs/srv/save_map.hpp" -#include "cstdlib" - -using namespace std::chrono_literals; - -#define TIMEOUT 3s - -int msg_received = false; -int map_saved = false; - -void msg_callback(const nav_msgs::msg::OccupancyGrid::SharedPtr msg) -{ - std::cout << "Message received" << std::endl; - msg_received = true; - if (msg_received && map_saved) - { - rclcpp::shutdown(); - } -} - -void timeout_callback() -{ - std::cout << "Timeout" << std::endl; - rclcpp::shutdown(); -} - -int main(int argc, char* argv[]) -{ - rclcpp::init(argc, argv); - auto node = rclcpp::Node::make_shared("healthcheck_node"); - auto sub = node->create_subscription("/map", rclcpp::SensorDataQoS(), - msg_callback); - auto client = node->create_client("/map_saver/save_map"); - auto timer = node->create_wall_timer(TIMEOUT, timeout_callback); - - if (!client->wait_for_service(TIMEOUT)) - { - std::cout << "Failed to connect to the image save service" << std::endl; - rclcpp::shutdown(); - } - - auto request = std::make_shared(); - request->free_thresh = 0.15; - request->map_topic = "/map"; - request->map_url = "/maps/map"; - request->map_mode = "trinary"; - request->image_format = "png"; - - auto future = client->async_send_request(request); - - if (rclcpp::spin_until_future_complete(node, future) == rclcpp::FutureReturnCode::SUCCESS) - { - std::cout << "Response received" << std::endl; - map_saved = true; - if (msg_received && map_saved) - { - rclcpp::shutdown(); - } - } - - rclcpp::spin(node); - - return (msg_received && map_saved) ? EXIT_SUCCESS : EXIT_FAILURE; -}