Skip to content

Commit

Permalink
New version of Healthcheck
Browse files Browse the repository at this point in the history
  • Loading branch information
rafal-gorecki committed Nov 17, 2023
1 parent 1e98e56 commit 192c5bc
Show file tree
Hide file tree
Showing 7 changed files with 228 additions and 121 deletions.
15 changes: 7 additions & 8 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -30,29 +30,28 @@ RUN mkdir src && cd src/ && \
find_package(nav2_msgs REQUIRED)\n \
find_package(bond REQUIRED)\n \
add_executable(healthcheck_localization src/healthcheck_localization.cpp)\n \
ament_target_dependencies(healthcheck_localization rclcpp std_msgs geometry_msgs)\n \
ament_target_dependencies(healthcheck_localization rclcpp geometry_msgs)\n \
add_executable(healthcheck_slam src/healthcheck_slam.cpp)\n \
ament_target_dependencies(healthcheck_slam rclcpp std_msgs nav2_msgs)\n \
ament_target_dependencies(healthcheck_slam rclcpp nav2_msgs)\n \
add_executable(healthcheck_ src/healthcheck_navigation.cpp)\n \
ament_target_dependencies(healthcheck_ rclcpp std_msgs bond)\n \
ament_target_dependencies(healthcheck_ rclcpp bond)\n \
install(TARGETS \
healthcheck_localization \
healthcheck_slam \
healthcheck_ \
DESTINATION lib/${PROJECT_NAME})' \
/ros2_ws/src/healthcheck_pkg/CMakeLists.txt

COPY ./healthcheck_localization.cpp /ros2_ws/src/healthcheck_pkg/src/
COPY ./healthcheck_slam.cpp /ros2_ws/src/healthcheck_pkg/src/
COPY ./healthcheck_navigation.cpp /ros2_ws/src/healthcheck_pkg/src/
COPY ./healthcheck /ros2_ws/src/healthcheck_pkg/src

# Build
RUN source /opt/ros/$ROS_DISTRO/setup.bash && \
colcon build --event-handlers console_direct+ --cmake-args -DCMAKE_BUILD_TYPE=Release

RUN echo $(dpkg -s ros-$ROS_DISTRO-navigation2 | grep 'Version' | sed -r 's/Version:\s([0-9]+.[0-9]+.[0-9]+).*/\1/g') > /version.txt

HEALTHCHECK --interval=20s --timeout=10s --start-period=10s --retries=3 \
CMD bash -c "/ros_entrypoint.sh ros2 run healthcheck_pkg healthcheck_$SLAM_MODE"
COPY ./healthcheck/healthcheck.sh /
HEALTHCHECK --interval=10s --timeout=2s --start-period=5s --retries=5 \
CMD ["/healthcheck.sh"]

#tip: gathering logs from healthcheck: docker inspect b39 | jq '.[0].State.Health.Log'
28 changes: 28 additions & 0 deletions healthcheck/healthcheck.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#!/bin/bash

HEALTHCHECK_FILE="/health_status.txt"

# Function to start the ROS 2 healthcheck node
start_healthcheck_node() {
/ros_entrypoint.sh ros2 run healthcheck_pkg healthcheck_$SLAM_MODE &
}

if [ ! -f "$HEALTHCHECK_FILE" ]; then
echo "Healthcheck file not found. Starting ROS 2 healthcheck node..."
start_healthcheck_node
# Wait a bit to allow the node to start and write its initial status
sleep 2
fi

# Now check the health status
if [ -f "$HEALTHCHECK_FILE" ]; then
status=$(cat "$HEALTHCHECK_FILE")
if [ "$status" == "healthy" ]; then
exit 0
else
exit 1
fi
else
echo "Healthcheck file still not found. There may be an issue with the node."
exit 1
fi
Original file line number Diff line number Diff line change
@@ -1,24 +1,26 @@
#include "rclcpp/rclcpp.hpp"
#include "geometry_msgs/msg/pose_with_covariance_stamped.hpp"
#include "cstdlib"
#include "fstream"

using namespace std::chrono_literals;

#define TIMEOUT 2s
#define LOOP_PERIOD 2s

int msg_received = EXIT_FAILURE;
// State 'healthy' is latched, because topic 'amcl_pose' is transient_local and send only when moving.

void msg_callback(const geometry_msgs::msg::PoseWithCovarianceStamped::SharedPtr msg)
std::chrono::steady_clock::time_point last_msg_time;

void write_health_status(const std::string& status)
{
std::cout << "Message received" << std::endl;
msg_received = EXIT_SUCCESS;
rclcpp::shutdown();
std::ofstream healthFile("/health_status.txt");
healthFile << status;
healthFile.close();
}

void timeout_callback()
void msg_callback(const geometry_msgs::msg::PoseWithCovarianceStamped::SharedPtr msg)
{
std::cout << "Timeout" << std::endl;
rclcpp::shutdown();
std::cout << "Message received" << std::endl;
write_health_status("healthy");
}

int main(int argc, char* argv[])
Expand All @@ -28,9 +30,13 @@ int main(int argc, char* argv[])
rclcpp::QoS qos(1);
qos.transient_local();
auto sub = node->create_subscription<geometry_msgs::msg::PoseWithCovarianceStamped>("/amcl_pose", qos, msg_callback);
auto timer = node->create_wall_timer(TIMEOUT, timeout_callback);

rclcpp::spin(node);
return msg_received;
}
write_health_status("unhealthy");
while (rclcpp::ok())
{
rclcpp::spin_some(node);
std::this_thread::sleep_for(LOOP_PERIOD);
}

return 0;
}
57 changes: 57 additions & 0 deletions healthcheck/healthcheck_navigation.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#include "rclcpp/rclcpp.hpp"
#include "bond/msg/status.hpp"
#include "fstream"

using namespace std::chrono_literals;

#define LOOP_PERIOD 2s
#define MSG_VALID_TIME 5s

std::chrono::steady_clock::time_point last_msg_time;

void write_health_status(const std::string& status)
{
std::ofstream healthFile("/health_status.txt");
healthFile << status;
healthFile.close();
}

void msg_callback(const bond::msg::Status::SharedPtr msg)
{
write_health_status("healthy");
last_msg_time = std::chrono::steady_clock::now();
}

void healthy_check(const rclcpp::Node::SharedPtr& node)
{
std::chrono::steady_clock::time_point current_time = std::chrono::steady_clock::now();
std::chrono::duration<double> elapsed_time = current_time - last_msg_time;
bool is_msg_valid = elapsed_time.count() < MSG_VALID_TIME.count();

if (is_msg_valid)
{
RCLCPP_INFO(node->get_logger(), "Health check: healthy");
write_health_status("healthy");
}
else
{
RCLCPP_WARN(node->get_logger(), "Health check: unhealthy");
write_health_status("unhealthy");
}
}

int main(int argc, char* argv[])
{
rclcpp::init(argc, argv);
auto node = rclcpp::Node::make_shared("healthcheck_node");
auto sub = node->create_subscription<bond::msg::Status>("/bond", rclcpp::SystemDefaultsQoS(), msg_callback);

while (rclcpp::ok())
{
rclcpp::spin_some(node);
healthy_check(node);
std::this_thread::sleep_for(LOOP_PERIOD);
}

return 0;
}
116 changes: 116 additions & 0 deletions healthcheck/healthcheck_slam.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
#include "rclcpp/rclcpp.hpp"
#include "nav_msgs/msg/occupancy_grid.hpp"
#include "nav2_msgs/srv/save_map.hpp"
#include <fstream> // Include <fstream> for file operations
#include <thread> // Include <thread> for sleep_for

using namespace std::chrono_literals;

#define LOOP_PERIOD 2s
#define MSG_VALID_TIME 5s
#define SAVE_MAP_PERIOD 15s
#define SAVE_MAP_CONNECTION_TIMEOUT 2s
#define SAVE_MAP_VALID_TIME 20s

class HealthCheckNode : public rclcpp::Node
{
public:
HealthCheckNode() : Node("healthcheck_node")
{
rclcpp::QoS qos(1);
qos.transient_local();
map_subscriber_ = create_subscription<nav_msgs::msg::OccupancyGrid>(
"/map", qos, std::bind(&HealthCheckNode::msgCallback, this, std::placeholders::_1));
save_map_client_ = create_client<nav2_msgs::srv::SaveMap>("/map_saver/save_map");
}

void healthyCheck()
{
auto current_time = std::chrono::steady_clock::now();
std::chrono::duration<double> msg_elapsed_time = current_time - last_msg_time;
std::chrono::duration<double> saved_map_elapsed_time = current_time - last_saved_map_time;
bool is_msg_valid = msg_elapsed_time.count() < MSG_VALID_TIME.count();
bool is_save_map_valid = saved_map_elapsed_time.count() < SAVE_MAP_VALID_TIME.count();

if (is_msg_valid && is_save_map_valid)
{
RCLCPP_INFO(get_logger(), "Health check: healthy");
writeHealthStatus("healthy");
}
else
{
RCLCPP_WARN(get_logger(), "Health check: unhealthy");
writeHealthStatus("unhealthy");
}
}

void saveMapPeriodically()
{
auto current_time = std::chrono::steady_clock::now();
std::chrono::duration<double> elapsed_time = current_time - last_saved_map_time;

if (elapsed_time.count() > SAVE_MAP_PERIOD.count())
{
if (save_map_client_->wait_for_service(SAVE_MAP_CONNECTION_TIMEOUT))
{
RCLCPP_DEBUG(get_logger(), "Service available");
auto request = std::make_shared<nav2_msgs::srv::SaveMap::Request>();
request->free_thresh = 0.15;
request->map_topic = "/map";
request->map_url = "/maps/map";
request->map_mode = "trinary";
request->image_format = "png";

auto future = save_map_client_->async_send_request(request);

if (rclcpp::spin_until_future_complete(shared_from_this(), future, SAVE_MAP_CONNECTION_TIMEOUT) == rclcpp::FutureReturnCode::SUCCESS)
{
RCLCPP_DEBUG(get_logger(), "Map saved");
last_saved_map_time = std::chrono::steady_clock::now();
}
else
{
RCLCPP_DEBUG(get_logger(), "Service response didn't arrived");
}
}
else
{
RCLCPP_DEBUG(get_logger(), "Service unavailable");
}
}
}

private:
rclcpp::Subscription<nav_msgs::msg::OccupancyGrid>::SharedPtr map_subscriber_;
rclcpp::Client<nav2_msgs::srv::SaveMap>::SharedPtr save_map_client_;
std::chrono::steady_clock::time_point last_msg_time;
std::chrono::steady_clock::time_point last_saved_map_time;

void writeHealthStatus(const std::string &status)
{
std::ofstream healthFile("/health_status.txt");
healthFile << status;
}

void msgCallback(const nav_msgs::msg::OccupancyGrid::SharedPtr msg)
{
RCLCPP_DEBUG(get_logger(), "Msg arrived");
last_msg_time = std::chrono::steady_clock::now();
}
};

int main(int argc, char *argv[])
{
rclcpp::init(argc, argv);
auto node = std::make_shared<HealthCheckNode>();

while (rclcpp::ok())
{
rclcpp::spin_some(node);
node->healthyCheck();
node->saveMapPeriodically();
std::this_thread::sleep_for(LOOP_PERIOD);
}

return 0;
}
33 changes: 0 additions & 33 deletions healthcheck_navigation.cpp

This file was deleted.

66 changes: 0 additions & 66 deletions healthcheck_slam.cpp

This file was deleted.

0 comments on commit 192c5bc

Please sign in to comment.