Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add healthcheck #6

Merged
merged 25 commits into from
Nov 27, 2023
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 44 additions & 8 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,29 +1,65 @@
ARG ROS_DISTRO=humble
ARG PREFIX=

FROM husarnet/ros:${PREFIX}${ROS_DISTRO}-ros-core
### BUILD IMAGE ###
FROM husarnet/ros:${PREFIX}${ROS_DISTRO}-ros-base AS build

RUN apt update && apt upgrade -y && apt install -y \
rafal-gorecki marked this conversation as resolved.
Show resolved Hide resolved
ros-$ROS_DISTRO-nav2-msgs

# Create healthcheck package
WORKDIR /ros2_ws

RUN mkdir src && cd src/ && \
source /opt/ros/$ROS_DISTRO/setup.bash && \
ros2 pkg create healthcheck_pkg --build-type ament_cmake --dependencies rclcpp std_msgs && \
sed -i '/find_package(std_msgs REQUIRED)/a \
find_package(geometry_msgs REQUIRED)\n \
find_package(nav2_msgs REQUIRED)\n \
add_executable(healthcheck_localization src/healthcheck_localization.cpp)\n \
ament_target_dependencies(healthcheck_localization rclcpp geometry_msgs)\n \
add_executable(healthcheck_navigation src/healthcheck_navigation.cpp)\n \
ament_target_dependencies(healthcheck_navigation rclcpp)\n \
add_executable(healthcheck_slam src/healthcheck_slam.cpp)\n \
ament_target_dependencies(healthcheck_slam rclcpp nav2_msgs)\n \
install(TARGETS \
healthcheck_localization \
healthcheck_navigation \
healthcheck_slam \
DESTINATION lib/${PROJECT_NAME})' \
/ros2_ws/src/healthcheck_pkg/CMakeLists.txt

SHELL ["/bin/bash", "-c"]
COPY ./healthcheck /ros2_ws/src/healthcheck_pkg/src

# Build
RUN source /opt/ros/$ROS_DISTRO/setup.bash && \
colcon build --event-handlers console_direct+ --cmake-args -DCMAKE_BUILD_TYPE=Release

### RUNTIME IMAGE ###
FROM husarnet/ros:${PREFIX}${ROS_DISTRO}-ros-core

ARG PREFIX
ENV SLAM_MODE=localization
ENV SLAM_MODE=navigation
ENV PREFIX_ENV=$PREFIX

WORKDIR /ros2_ws

RUN apt update && apt upgrade -y && apt install -y \
ros-$ROS_DISTRO-navigation2 \
ros-$ROS_DISTRO-nav2-bringup \
ros-$ROS_DISTRO-pointcloud-to-laserscan && \
mkdir maps && \
apt-get autoremove -y && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*

COPY ./nav2_params /nav2_params

COPY --chmod=755 healthcheck_* /
COPY --from=build /ros2_ws /ros2_ws

RUN echo $(dpkg -s ros-$ROS_DISTRO-navigation2 | grep 'Version' | sed -r 's/Version:\s([0-9]+.[0-9]+.[0-9]+).*/\1/g') > /version.txt

HEALTHCHECK --interval=10s --timeout=10s --start-period=5s --retries=6 \
CMD bash -c "source /opt/$([ -n "${PREFIX_ENV}" ] && echo "${PREFIX_ENV//-/}" || echo "ros")/$ROS_DISTRO/setup.bash && /healthcheck_$SLAM_MODE.py"
COPY ./healthcheck/healthcheck.sh /
HEALTHCHECK --interval=10s --timeout=3s --start-period=5s --retries=5 \
CMD ["/healthcheck.sh"]

#tip: gathering logs from healthcheck: docker inspect b39 | jq '.[0].State.Health.Log'
#tip: gathering logs from healthcheck: docker inspect b39 | jq '.[0].State.Health.Log'
28 changes: 28 additions & 0 deletions healthcheck/healthcheck.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#!/bin/bash

HEALTHCHECK_FILE="/health_status.txt"

# Function to start the ROS 2 healthcheck node
start_healthcheck_node() {
/ros_entrypoint.sh ros2 run healthcheck_pkg healthcheck_$SLAM_MODE &
}

if [ ! -f "$HEALTHCHECK_FILE" ]; then
echo "Healthcheck file not found. Starting ROS 2 healthcheck node..."
start_healthcheck_node
# Wait a bit to allow the node to start and write its initial status
sleep 2
fi

# Now check the health status
if [ -f "$HEALTHCHECK_FILE" ]; then
status=$(cat "$HEALTHCHECK_FILE")
if [ "$status" == "healthy" ]; then
exit 0
else
exit 1
fi
else
echo "Healthcheck file still not found. There may be an issue with the node."
exit 1
fi
38 changes: 38 additions & 0 deletions healthcheck/healthcheck_localization.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#include "fstream"
#include "rclcpp/rclcpp.hpp"

using namespace std::chrono_literals;

#define LOOP_PERIOD 2s

void write_health_status(const std::string &status) {
std::ofstream healthFile("/health_status.txt");
healthFile << status;
}

int main(int argc, char *argv[]) {
rclcpp::init(argc, argv);
auto node = rclcpp::Node::make_shared("healthcheck_node");
std::string node_to_find = "/amcl";

while (rclcpp::ok()) {
rclcpp::spin_some(node);
auto node_names = node->get_node_names();
auto is_node_running = std::find(node_names.begin(), node_names.end(),
node_to_find) != node_names.end();

if (is_node_running) {
RCLCPP_INFO(node->get_logger(), "The %s node is running.",
node_to_find.c_str());
write_health_status("healthy");

} else {
RCLCPP_WARN(node->get_logger(), "The %s node is not running.",
node_to_find.c_str());
write_health_status("unhealthy");
}
std::this_thread::sleep_for(LOOP_PERIOD);
}

return 0;
}
38 changes: 38 additions & 0 deletions healthcheck/healthcheck_navigation.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#include "fstream"
#include "rclcpp/rclcpp.hpp"

using namespace std::chrono_literals;

#define LOOP_PERIOD 2s

void write_health_status(const std::string &status) {
std::ofstream healthFile("/health_status.txt");
healthFile << status;
}

int main(int argc, char *argv[]) {
rclcpp::init(argc, argv);
auto node = rclcpp::Node::make_shared("healthcheck_node");
std::string node_to_find = "/amcl";

while (rclcpp::ok()) {
rclcpp::spin_some(node);
auto node_names = node->get_node_names();
auto is_node_running = std::find(node_names.begin(), node_names.end(),
node_to_find) != node_names.end();

if (is_node_running) {
RCLCPP_INFO(node->get_logger(), "The %s node is running.",
node_to_find.c_str());
write_health_status("healthy");

} else {
RCLCPP_WARN(node->get_logger(), "The %s node is not running.",
node_to_find.c_str());
write_health_status("unhealthy");
}
std::this_thread::sleep_for(LOOP_PERIOD);
}

return 0;
}
106 changes: 106 additions & 0 deletions healthcheck/healthcheck_slam.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
#include "nav2_msgs/srv/save_map.hpp"
#include "nav_msgs/msg/occupancy_grid.hpp"
#include "rclcpp/rclcpp.hpp"
#include <fstream>
#include <thread>

using namespace std::chrono_literals;

#define LOOP_PERIOD 2s
#define MSG_VALID_TIME 5s
#define SAVE_MAP_PERIOD 15s
#define SAVE_MAP_CONNECTION_TIMEOUT 2s
#define SAVE_MAP_VALID_TIME 20s

class HealthCheckNode : public rclcpp::Node {
public:
HealthCheckNode() : Node("healthcheck_node") {
rclcpp::QoS qos(1);
qos.transient_local();
map_subscriber_ = create_subscription<nav_msgs::msg::OccupancyGrid>(
"/map", qos,
std::bind(&HealthCheckNode::msgCallback, this, std::placeholders::_1));
save_map_client_ =
create_client<nav2_msgs::srv::SaveMap>("/map_saver/save_map");
}

void healthyCheck() {
auto current_time = std::chrono::steady_clock::now();
std::chrono::duration<double> msg_elapsed_time =
current_time - last_msg_time;
std::chrono::duration<double> saved_map_elapsed_time =
current_time - last_saved_map_time;
bool is_msg_valid = msg_elapsed_time.count() < MSG_VALID_TIME.count();
bool is_save_map_valid =
saved_map_elapsed_time.count() < SAVE_MAP_VALID_TIME.count();

if (is_msg_valid && is_save_map_valid) {
RCLCPP_INFO(get_logger(), "Health check: healthy");
writeHealthStatus("healthy");
} else {
RCLCPP_WARN(get_logger(), "Health check: unhealthy");
writeHealthStatus("unhealthy");
}
}

void saveMapPeriodically() {
auto current_time = std::chrono::steady_clock::now();
std::chrono::duration<double> elapsed_time =
current_time - last_saved_map_time;

if (elapsed_time.count() > SAVE_MAP_PERIOD.count()) {
if (save_map_client_->wait_for_service(SAVE_MAP_CONNECTION_TIMEOUT)) {
RCLCPP_DEBUG(get_logger(), "Service available");
auto request = std::make_shared<nav2_msgs::srv::SaveMap::Request>();
request->free_thresh = 0.15;
request->map_topic = "/map";
request->map_url = "/maps/map";
request->map_mode = "trinary";
request->image_format = "png";

auto future = save_map_client_->async_send_request(request);

if (rclcpp::spin_until_future_complete(shared_from_this(), future,
SAVE_MAP_CONNECTION_TIMEOUT) ==
rclcpp::FutureReturnCode::SUCCESS) {
RCLCPP_DEBUG(get_logger(), "Map saved");
last_saved_map_time = std::chrono::steady_clock::now();
} else {
RCLCPP_DEBUG(get_logger(), "Service response didn't arrived");
}
} else {
RCLCPP_DEBUG(get_logger(), "Service unavailable");
}
}
}

private:
rclcpp::Subscription<nav_msgs::msg::OccupancyGrid>::SharedPtr map_subscriber_;
rclcpp::Client<nav2_msgs::srv::SaveMap>::SharedPtr save_map_client_;
std::chrono::steady_clock::time_point last_msg_time;
std::chrono::steady_clock::time_point last_saved_map_time;

void writeHealthStatus(const std::string &status) {
std::ofstream healthFile("/health_status.txt");
healthFile << status;
}

void msgCallback(const nav_msgs::msg::OccupancyGrid::SharedPtr msg) {
RCLCPP_DEBUG(get_logger(), "Msg arrived");
last_msg_time = std::chrono::steady_clock::now();
}
};

int main(int argc, char *argv[]) {
rclcpp::init(argc, argv);
auto node = std::make_shared<HealthCheckNode>();

while (rclcpp::ok()) {
rclcpp::spin_some(node);
node->healthyCheck();
node->saveMapPeriodically();
std::this_thread::sleep_for(LOOP_PERIOD);
}

return 0;
}
21 changes: 0 additions & 21 deletions healthcheck_localization.py

This file was deleted.

26 changes: 0 additions & 26 deletions healthcheck_slam.py

This file was deleted.