Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add healthcheck #6

Merged
merged 25 commits into from
Nov 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ros-docker-image.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ jobs:
strategy:
fail-fast: false
matrix:
ros-distro: [ "humble" ]
ros-distro: [ "humble" ] # iron is not supported on arm

steps:

Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/vulcanexus-docker-image.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name: Build/Publish Vulcanexus Docker Image
name: Build/Publish Vulcanexus Docker Image

on:
on:
workflow_dispatch:
inputs:
build_type:
Expand Down Expand Up @@ -36,7 +36,7 @@ jobs:
strategy:
fail-fast: false
matrix:
ros-distro: [ "humble" ]
ros-distro: [ "humble" ] # iron is not supported on arm

steps:

Expand Down
60 changes: 52 additions & 8 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,29 +1,73 @@
ARG ROS_DISTRO=humble
ARG PREFIX=

FROM husarnet/ros:${PREFIX}${ROS_DISTRO}-ros-core
### BUILD IMAGE ###
FROM husarnet/ros:${PREFIX}${ROS_DISTRO}-ros-base AS build

RUN apt update && apt install -y \
ros-$ROS_DISTRO-nav2-msgs

# Create healthcheck package
WORKDIR /ros2_ws

RUN mkdir src && cd src/ && \
source /opt/ros/$ROS_DISTRO/setup.bash && \
ros2 pkg create healthcheck_pkg --build-type ament_cmake --dependencies rclcpp lifecycle_msgs nav2_msgs && \
sed -i '/find_package(nav2_msgs REQUIRED)/a \
add_executable(healthcheck_localization src/healthcheck_localization.cpp)\n \
ament_target_dependencies(healthcheck_localization rclcpp lifecycle_msgs)\n \
add_executable(healthcheck_navigation src/healthcheck_navigation.cpp)\n \
ament_target_dependencies(healthcheck_navigation rclcpp lifecycle_msgs)\n \
add_executable(healthcheck_slam src/healthcheck_slam.cpp)\n \
ament_target_dependencies(healthcheck_slam rclcpp nav2_msgs)\n \
install(TARGETS \
healthcheck_localization \
healthcheck_navigation \
healthcheck_slam \
DESTINATION lib/${PROJECT_NAME})' \
/ros2_ws/src/healthcheck_pkg/CMakeLists.txt

COPY ./healthcheck /ros2_ws/src/healthcheck_pkg/src

SHELL ["/bin/bash", "-c"]
# Build
RUN source /opt/ros/$ROS_DISTRO/setup.bash && \
colcon build --event-handlers console_direct+ --cmake-args -DCMAKE_BUILD_TYPE=Release

### RUNTIME IMAGE ###
FROM husarnet/ros:${PREFIX}${ROS_DISTRO}-ros-core

ARG PREFIX
ENV SLAM_MODE=localization
ENV SLAM_MODE=navigation
ENV PREFIX_ENV=$PREFIX

WORKDIR /ros2_ws

RUN apt update && apt upgrade -y && apt install -y \
ros-$ROS_DISTRO-navigation2 \
ros-$ROS_DISTRO-nav2-bringup \
ros-$ROS_DISTRO-pointcloud-to-laserscan && \
mkdir /maps && \
apt-get autoremove -y && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*

COPY ./nav2_params /nav2_params

COPY --chmod=755 healthcheck_* /
COPY --from=build /ros2_ws/install /ros2_ws/install

RUN echo $(dpkg -s ros-$ROS_DISTRO-navigation2 | grep 'Version' | sed -r 's/Version:\s([0-9]+.[0-9]+.[0-9]+).*/\1/g') > /version.txt

HEALTHCHECK --interval=10s --timeout=10s --start-period=5s --retries=6 \
CMD bash -c "source /opt/$([ -n "${PREFIX_ENV}" ] && echo "${PREFIX_ENV//-/}" || echo "ros")/$ROS_DISTRO/setup.bash && /healthcheck_$SLAM_MODE.py"
RUN if [ -f "/ros_entrypoint.sh" ]; then \
sed -i '/test -f "\/ros2_ws\/install\/setup.bash" && source "\/ros2_ws\/install\/setup.bash"/a \
ros2 run healthcheck_pkg "healthcheck_$SLAM_MODE" &' \
/ros_entrypoint.sh; \
else \
sed -i '/test -f "\/ros2_ws\/install\/setup.bash" && source "\/ros2_ws\/install\/setup.bash"/a \
ros2 run healthcheck_pkg "healthcheck_$SLAM_MODE" &' \
/vulcanexus_entrypoint.sh; \
fi

COPY ./healthcheck/healthcheck.sh /
HEALTHCHECK --interval=7s --timeout=2s --start-period=5s --retries=5 \
CMD ["/healthcheck.sh"]

#tip: gathering logs from healthcheck: docker inspect b39 | jq '.[0].State.Health.Log'
#tip: gathering logs from healthcheck: docker inspect b39 | jq '.[0].State.Health.Log'
16 changes: 16 additions & 0 deletions healthcheck/healthcheck.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/bin/bash

HEALTHCHECK_FILE="/health_status.txt"

# Now check the health status
if [ -f "$HEALTHCHECK_FILE" ]; then
status=$(cat "$HEALTHCHECK_FILE")
if [ "$status" == "healthy" ]; then
exit 0
else
exit 1
fi
else
echo "Healthcheck file still not found. There may be an issue with the node."
exit 1
fi
31 changes: 31 additions & 0 deletions healthcheck/healthcheck_localization.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#include "fstream"
#include "lifecycle_msgs/msg/transition_event.hpp"
#include "rclcpp/rclcpp.hpp"

using namespace std::chrono_literals;

void write_health_status(const std::string &status) {
std::ofstream healthFile("/health_status.txt");
healthFile << status;
}

void msg_callback(const lifecycle_msgs::msg::TransitionEvent::SharedPtr msg) {
if (msg->goal_state.label == "active") {
write_health_status("healthy");
} else {
write_health_status("unhealthy");
}
}

int main(int argc, char *argv[]) {
rclcpp::init(argc, argv);
auto node = rclcpp::Node::make_shared("healthcheck_node");

auto sub = node->create_subscription<lifecycle_msgs::msg::TransitionEvent>(
"amcl/transition_event", rclcpp::SystemDefaultsQoS(),
msg_callback);

rclcpp::spin(node);

return 0;
}
31 changes: 31 additions & 0 deletions healthcheck/healthcheck_navigation.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#include "fstream"
#include "lifecycle_msgs/msg/transition_event.hpp"
#include "rclcpp/rclcpp.hpp"

using namespace std::chrono_literals;

void write_health_status(const std::string &status) {
std::ofstream healthFile("/health_status.txt");
healthFile << status;
}

void msg_callback(const lifecycle_msgs::msg::TransitionEvent::SharedPtr msg) {
if (msg->goal_state.label == "active") {
write_health_status("healthy");
} else {
write_health_status("unhealthy");
}
}

int main(int argc, char *argv[]) {
rclcpp::init(argc, argv);
auto node = rclcpp::Node::make_shared("healthcheck_node");

auto sub = node->create_subscription<lifecycle_msgs::msg::TransitionEvent>(
"controller_server/transition_event", rclcpp::SystemDefaultsQoS(),
msg_callback);

rclcpp::spin(node);

return 0;
}
104 changes: 104 additions & 0 deletions healthcheck/healthcheck_slam.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
#include "nav2_msgs/srv/save_map.hpp"
#include "nav_msgs/msg/occupancy_grid.hpp"
#include "rclcpp/rclcpp.hpp"
#include <fstream>
#include <thread>

using namespace std::chrono_literals;

#define LOOP_PERIOD 2s
#define MSG_VALID_TIME 5s
#define SAVE_MAP_PERIOD 15s
#define SAVE_MAP_CONNECTION_TIMEOUT 2s
#define SAVE_MAP_VALID_TIME 20s

class HealthCheckNode : public rclcpp::Node {
public:
HealthCheckNode() : Node("healthcheck_node") {
rclcpp::QoS qos(1);
qos.transient_local();
map_subscriber_ = create_subscription<nav_msgs::msg::OccupancyGrid>(
"/map", qos,
std::bind(&HealthCheckNode::msgCallback, this, std::placeholders::_1));
save_map_client_ =
create_client<nav2_msgs::srv::SaveMap>("/map_saver/save_map");
}

void healthyCheck() {
auto current_time = std::chrono::steady_clock::now();
std::chrono::duration<double> msg_elapsed_time =
current_time - last_msg_time;
std::chrono::duration<double> saved_map_elapsed_time =
current_time - last_saved_map_time;
bool is_msg_valid = msg_elapsed_time.count() < MSG_VALID_TIME.count();
bool is_save_map_valid =
saved_map_elapsed_time.count() < SAVE_MAP_VALID_TIME.count();

if (is_msg_valid && is_save_map_valid) {
writeHealthStatus("healthy");
} else {
writeHealthStatus("unhealthy");
}
}

void saveMapPeriodically() {
auto current_time = std::chrono::steady_clock::now();
std::chrono::duration<double> elapsed_time =
current_time - last_saved_map_time;

if (elapsed_time.count() > SAVE_MAP_PERIOD.count()) {
if (save_map_client_->wait_for_service(SAVE_MAP_CONNECTION_TIMEOUT)) {
RCLCPP_DEBUG(get_logger(), "Service available");
auto request = std::make_shared<nav2_msgs::srv::SaveMap::Request>();
request->free_thresh = 0.15;
request->map_topic = "/map";
request->map_url = "/maps/map";
request->map_mode = "trinary";
request->image_format = "png";

auto future = save_map_client_->async_send_request(request);

if (rclcpp::spin_until_future_complete(shared_from_this(), future,
SAVE_MAP_CONNECTION_TIMEOUT) ==
rclcpp::FutureReturnCode::SUCCESS) {
RCLCPP_DEBUG(get_logger(), "Map saved");
last_saved_map_time = std::chrono::steady_clock::now();
} else {
RCLCPP_DEBUG(get_logger(), "Service response didn't arrived");
}
} else {
RCLCPP_DEBUG(get_logger(), "Service unavailable");
}
}
}

private:
rclcpp::Subscription<nav_msgs::msg::OccupancyGrid>::SharedPtr map_subscriber_;
rclcpp::Client<nav2_msgs::srv::SaveMap>::SharedPtr save_map_client_;
std::chrono::steady_clock::time_point last_msg_time;
std::chrono::steady_clock::time_point last_saved_map_time;

void writeHealthStatus(const std::string &status) {
std::ofstream healthFile("/health_status.txt");
healthFile << status;
}

void msgCallback(const nav_msgs::msg::OccupancyGrid::SharedPtr msg) {
RCLCPP_DEBUG(get_logger(), "Msg arrived");
last_msg_time = std::chrono::steady_clock::now();
}
};

int main(int argc, char *argv[]) {
rclcpp::init(argc, argv);
auto node = std::make_shared<HealthCheckNode>();

while (rclcpp::ok()) {
rclcpp::spin_some(node);
node->healthyCheck();
node->saveMapPeriodically();
std::this_thread::sleep_for(LOOP_PERIOD);
}

return 0;
}
21 changes: 0 additions & 21 deletions healthcheck_localization.py

This file was deleted.

26 changes: 0 additions & 26 deletions healthcheck_slam.py

This file was deleted.

Loading