Skip to content

Commit

Permalink
Add healthcheck (#6)
Browse files Browse the repository at this point in the history
* Add healthcheck

* Update healthcheck_navigation.cpp

* Update healthcheck_slam.cpp

* Update healthcheck_localization.cpp

* Timeout

* Fix navigation healthcheck

* New version of Healthcheck

* delete unnecessary

* little changes

* docker update

* formatting

* fix healhcheck

* typo

* Update Dockerfile

* fix

* Fix first healthcheck running

* Add entrypoint

* Delete debug msg

* Update Dockerfile

* Delete ros_entrypoint

* final fix

* delete std_pkg

* building for iron

* Fix Vulcannexus healthcheck

* iron

---------

Co-authored-by: dominikn <[email protected]>
  • Loading branch information
rafal-gorecki and DominikN authored Nov 27, 2023
1 parent faacd58 commit 459f945
Show file tree
Hide file tree
Showing 9 changed files with 238 additions and 59 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ros-docker-image.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ jobs:
strategy:
fail-fast: false
matrix:
ros-distro: [ "humble" ]
ros-distro: [ "humble" ] # iron is not supported on arm

steps:

Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/vulcanexus-docker-image.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name: Build/Publish Vulcanexus Docker Image
name: Build/Publish Vulcanexus Docker Image

on:
on:
workflow_dispatch:
inputs:
build_type:
Expand Down Expand Up @@ -36,7 +36,7 @@ jobs:
strategy:
fail-fast: false
matrix:
ros-distro: [ "humble" ]
ros-distro: [ "humble" ] # iron is not supported on arm

steps:

Expand Down
60 changes: 52 additions & 8 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,29 +1,73 @@
ARG ROS_DISTRO=humble
ARG PREFIX=

FROM husarnet/ros:${PREFIX}${ROS_DISTRO}-ros-core
### BUILD IMAGE ###
FROM husarnet/ros:${PREFIX}${ROS_DISTRO}-ros-base AS build

RUN apt update && apt install -y \
ros-$ROS_DISTRO-nav2-msgs

# Create healthcheck package
WORKDIR /ros2_ws

RUN mkdir src && cd src/ && \
source /opt/ros/$ROS_DISTRO/setup.bash && \
ros2 pkg create healthcheck_pkg --build-type ament_cmake --dependencies rclcpp lifecycle_msgs nav2_msgs && \
sed -i '/find_package(nav2_msgs REQUIRED)/a \
add_executable(healthcheck_localization src/healthcheck_localization.cpp)\n \
ament_target_dependencies(healthcheck_localization rclcpp lifecycle_msgs)\n \
add_executable(healthcheck_navigation src/healthcheck_navigation.cpp)\n \
ament_target_dependencies(healthcheck_navigation rclcpp lifecycle_msgs)\n \
add_executable(healthcheck_slam src/healthcheck_slam.cpp)\n \
ament_target_dependencies(healthcheck_slam rclcpp nav2_msgs)\n \
install(TARGETS \
healthcheck_localization \
healthcheck_navigation \
healthcheck_slam \
DESTINATION lib/${PROJECT_NAME})' \
/ros2_ws/src/healthcheck_pkg/CMakeLists.txt

COPY ./healthcheck /ros2_ws/src/healthcheck_pkg/src

SHELL ["/bin/bash", "-c"]
# Build
RUN source /opt/ros/$ROS_DISTRO/setup.bash && \
colcon build --event-handlers console_direct+ --cmake-args -DCMAKE_BUILD_TYPE=Release

### RUNTIME IMAGE ###
FROM husarnet/ros:${PREFIX}${ROS_DISTRO}-ros-core

ARG PREFIX
ENV SLAM_MODE=localization
ENV SLAM_MODE=navigation
ENV PREFIX_ENV=$PREFIX

WORKDIR /ros2_ws

RUN apt update && apt upgrade -y && apt install -y \
ros-$ROS_DISTRO-navigation2 \
ros-$ROS_DISTRO-nav2-bringup \
ros-$ROS_DISTRO-pointcloud-to-laserscan && \
mkdir /maps && \
apt-get autoremove -y && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*

COPY ./nav2_params /nav2_params

COPY --chmod=755 healthcheck_* /
COPY --from=build /ros2_ws/install /ros2_ws/install

RUN echo $(dpkg -s ros-$ROS_DISTRO-navigation2 | grep 'Version' | sed -r 's/Version:\s([0-9]+.[0-9]+.[0-9]+).*/\1/g') > /version.txt

HEALTHCHECK --interval=10s --timeout=10s --start-period=5s --retries=6 \
CMD bash -c "source /opt/$([ -n "${PREFIX_ENV}" ] && echo "${PREFIX_ENV//-/}" || echo "ros")/$ROS_DISTRO/setup.bash && /healthcheck_$SLAM_MODE.py"
RUN if [ -f "/ros_entrypoint.sh" ]; then \
sed -i '/test -f "\/ros2_ws\/install\/setup.bash" && source "\/ros2_ws\/install\/setup.bash"/a \
ros2 run healthcheck_pkg "healthcheck_$SLAM_MODE" &' \
/ros_entrypoint.sh; \
else \
sed -i '/test -f "\/ros2_ws\/install\/setup.bash" && source "\/ros2_ws\/install\/setup.bash"/a \
ros2 run healthcheck_pkg "healthcheck_$SLAM_MODE" &' \
/vulcanexus_entrypoint.sh; \
fi

COPY ./healthcheck/healthcheck.sh /
HEALTHCHECK --interval=7s --timeout=2s --start-period=5s --retries=5 \
CMD ["/healthcheck.sh"]

#tip: gathering logs from healthcheck: docker inspect b39 | jq '.[0].State.Health.Log'
#tip: gathering logs from healthcheck: docker inspect b39 | jq '.[0].State.Health.Log'
16 changes: 16 additions & 0 deletions healthcheck/healthcheck.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/bin/bash

HEALTHCHECK_FILE="/health_status.txt"

# Now check the health status
if [ -f "$HEALTHCHECK_FILE" ]; then
status=$(cat "$HEALTHCHECK_FILE")
if [ "$status" == "healthy" ]; then
exit 0
else
exit 1
fi
else
echo "Healthcheck file still not found. There may be an issue with the node."
exit 1
fi
31 changes: 31 additions & 0 deletions healthcheck/healthcheck_localization.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#include "fstream"
#include "lifecycle_msgs/msg/transition_event.hpp"
#include "rclcpp/rclcpp.hpp"

using namespace std::chrono_literals;

void write_health_status(const std::string &status) {
std::ofstream healthFile("/health_status.txt");
healthFile << status;
}

void msg_callback(const lifecycle_msgs::msg::TransitionEvent::SharedPtr msg) {
if (msg->goal_state.label == "active") {
write_health_status("healthy");
} else {
write_health_status("unhealthy");
}
}

int main(int argc, char *argv[]) {
rclcpp::init(argc, argv);
auto node = rclcpp::Node::make_shared("healthcheck_node");

auto sub = node->create_subscription<lifecycle_msgs::msg::TransitionEvent>(
"amcl/transition_event", rclcpp::SystemDefaultsQoS(),
msg_callback);

rclcpp::spin(node);

return 0;
}
31 changes: 31 additions & 0 deletions healthcheck/healthcheck_navigation.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#include "fstream"
#include "lifecycle_msgs/msg/transition_event.hpp"
#include "rclcpp/rclcpp.hpp"

using namespace std::chrono_literals;

void write_health_status(const std::string &status) {
std::ofstream healthFile("/health_status.txt");
healthFile << status;
}

void msg_callback(const lifecycle_msgs::msg::TransitionEvent::SharedPtr msg) {
if (msg->goal_state.label == "active") {
write_health_status("healthy");
} else {
write_health_status("unhealthy");
}
}

int main(int argc, char *argv[]) {
rclcpp::init(argc, argv);
auto node = rclcpp::Node::make_shared("healthcheck_node");

auto sub = node->create_subscription<lifecycle_msgs::msg::TransitionEvent>(
"controller_server/transition_event", rclcpp::SystemDefaultsQoS(),
msg_callback);

rclcpp::spin(node);

return 0;
}
104 changes: 104 additions & 0 deletions healthcheck/healthcheck_slam.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
#include "nav2_msgs/srv/save_map.hpp"
#include "nav_msgs/msg/occupancy_grid.hpp"
#include "rclcpp/rclcpp.hpp"
#include <fstream>
#include <thread>

using namespace std::chrono_literals;

#define LOOP_PERIOD 2s
#define MSG_VALID_TIME 5s
#define SAVE_MAP_PERIOD 15s
#define SAVE_MAP_CONNECTION_TIMEOUT 2s
#define SAVE_MAP_VALID_TIME 20s

class HealthCheckNode : public rclcpp::Node {
public:
HealthCheckNode() : Node("healthcheck_node") {
rclcpp::QoS qos(1);
qos.transient_local();
map_subscriber_ = create_subscription<nav_msgs::msg::OccupancyGrid>(
"/map", qos,
std::bind(&HealthCheckNode::msgCallback, this, std::placeholders::_1));
save_map_client_ =
create_client<nav2_msgs::srv::SaveMap>("/map_saver/save_map");
}

void healthyCheck() {
auto current_time = std::chrono::steady_clock::now();
std::chrono::duration<double> msg_elapsed_time =
current_time - last_msg_time;
std::chrono::duration<double> saved_map_elapsed_time =
current_time - last_saved_map_time;
bool is_msg_valid = msg_elapsed_time.count() < MSG_VALID_TIME.count();
bool is_save_map_valid =
saved_map_elapsed_time.count() < SAVE_MAP_VALID_TIME.count();

if (is_msg_valid && is_save_map_valid) {
writeHealthStatus("healthy");
} else {
writeHealthStatus("unhealthy");
}
}

void saveMapPeriodically() {
auto current_time = std::chrono::steady_clock::now();
std::chrono::duration<double> elapsed_time =
current_time - last_saved_map_time;

if (elapsed_time.count() > SAVE_MAP_PERIOD.count()) {
if (save_map_client_->wait_for_service(SAVE_MAP_CONNECTION_TIMEOUT)) {
RCLCPP_DEBUG(get_logger(), "Service available");
auto request = std::make_shared<nav2_msgs::srv::SaveMap::Request>();
request->free_thresh = 0.15;
request->map_topic = "/map";
request->map_url = "/maps/map";
request->map_mode = "trinary";
request->image_format = "png";

auto future = save_map_client_->async_send_request(request);

if (rclcpp::spin_until_future_complete(shared_from_this(), future,
SAVE_MAP_CONNECTION_TIMEOUT) ==
rclcpp::FutureReturnCode::SUCCESS) {
RCLCPP_DEBUG(get_logger(), "Map saved");
last_saved_map_time = std::chrono::steady_clock::now();
} else {
RCLCPP_DEBUG(get_logger(), "Service response didn't arrived");
}
} else {
RCLCPP_DEBUG(get_logger(), "Service unavailable");
}
}
}

private:
rclcpp::Subscription<nav_msgs::msg::OccupancyGrid>::SharedPtr map_subscriber_;
rclcpp::Client<nav2_msgs::srv::SaveMap>::SharedPtr save_map_client_;
std::chrono::steady_clock::time_point last_msg_time;
std::chrono::steady_clock::time_point last_saved_map_time;

void writeHealthStatus(const std::string &status) {
std::ofstream healthFile("/health_status.txt");
healthFile << status;
}

void msgCallback(const nav_msgs::msg::OccupancyGrid::SharedPtr msg) {
RCLCPP_DEBUG(get_logger(), "Msg arrived");
last_msg_time = std::chrono::steady_clock::now();
}
};

int main(int argc, char *argv[]) {
rclcpp::init(argc, argv);
auto node = std::make_shared<HealthCheckNode>();

while (rclcpp::ok()) {
rclcpp::spin_some(node);
node->healthyCheck();
node->saveMapPeriodically();
std::this_thread::sleep_for(LOOP_PERIOD);
}

return 0;
}
21 changes: 0 additions & 21 deletions healthcheck_localization.py

This file was deleted.

26 changes: 0 additions & 26 deletions healthcheck_slam.py

This file was deleted.

0 comments on commit 459f945

Please sign in to comment.