Skip to content

Commit

Permalink
Add check for high RAM usage
Browse files Browse the repository at this point in the history
We had a case where someone took off with an experimental
system with 100% RAM usage on the embedded system
without noticing. This lead to problems during flight.

Since we already have a CPU load check it seems natural
to also check the reported RAM usage.
  • Loading branch information
MaEtUgR committed Jun 27, 2024
1 parent 30b854d commit e4446ad
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 7 deletions.
1 change: 1 addition & 0 deletions ROMFS/px4fmu_common/init.d-posix/rcS
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ param set-default CBRK_SUPPLY_CHK 894281

# disable check, no CPU load reported on posix yet
param set-default COM_CPU_MAX -1
param set-default COM_RAM_MAX -1

# Don't require RC calibration and configuration
param set-default COM_RC_IN_MODE 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,10 @@ CpuResourceChecks::CpuResourceChecks()

void CpuResourceChecks::checkAndReport(const Context &context, Report &reporter)
{
if (_param_com_cpu_max.get() < FLT_EPSILON) {
const bool cpu_load_check_enabled = _param_com_cpu_max.get() > FLT_EPSILON;
const bool ram_usage_check_enabled = _param_com_ram_max.get() > FLT_EPSILON;

if (!cpu_load_check_enabled && !ram_usage_check_enabled) {
return;
}

Expand All @@ -54,15 +57,15 @@ void CpuResourceChecks::checkAndReport(const Context &context, Report &reporter)
/* EVENT
* @description
* <profile name="dev">
* If the system does not provide any CPU load information, use the parameter <param>COM_CPU_MAX</param>
* to disable the check.
* If the system does not provide any CPU and RAM load information, use the parameters <param>COM_CPU_MAX</param>
* and <param>COM_RAM_MAX</param> to disable the checks.
* </profile>
*/
reporter.healthFailure(NavModes::All, health_component_t::system, events::ID("check_missing_cpuload"),
events::Log::Error, "No CPU load information");
events::Log::Error, "No CPU and RAM load information");

if (reporter.mavlink_log_pub()) {
mavlink_log_critical(reporter.mavlink_log_pub(), "Preflight Fail: No CPU load information");
mavlink_log_critical(reporter.mavlink_log_pub(), "Preflight Fail: No CPU and RAM load information");
}

} else {
Expand All @@ -71,7 +74,7 @@ void CpuResourceChecks::checkAndReport(const Context &context, Report &reporter)
_high_cpu_load_hysteresis.set_state_and_update(high_cpu_load, hrt_absolute_time());

// fail check if CPU load is above the threshold for 2 seconds
if (_high_cpu_load_hysteresis.get_state()) {
if (cpu_load_check_enabled && _high_cpu_load_hysteresis.get_state()) {
/* EVENT
* @description
* The CPU load can be reduced for example by disabling unused modules (e.g. mavlink instances) or reducing the gyro update
Expand All @@ -88,5 +91,26 @@ void CpuResourceChecks::checkAndReport(const Context &context, Report &reporter)
mavlink_log_critical(reporter.mavlink_log_pub(), "Preflight Fail: CPU load too high: %3.1f%%", (double)cpuload_percent);
}
}

const float ram_usage_percent = cpuload.ram_usage * 100.f;
const bool high_ram_usage = ram_usage_percent > _param_com_ram_max.get();

if (ram_usage_check_enabled && high_ram_usage) {
/* EVENT
* @description
* The RAM usage can be reduced for example by disabling unused modules (e.g. mavlink instances).
*
* <profile name="dev">
* The threshold can be adjusted via <param>COM_RAM_MAX</param> parameter.
* </profile>
*/
reporter.healthFailure<float>(NavModes::All, health_component_t::system, events::ID("check_ram_usage_too_high"),
events::Log::Error, "RAM usage too high: {1:.1}%", ram_usage_percent);

if (reporter.mavlink_log_pub()) {
mavlink_log_critical(reporter.mavlink_log_pub(), "Preflight Fail: RAM usage too high: %3.1f%%",
(double)ram_usage_percent);
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ class CpuResourceChecks : public HealthAndArmingCheckBase
systemlib::Hysteresis _high_cpu_load_hysteresis{false};

DEFINE_PARAMETERS_CUSTOM_PARENT(HealthAndArmingCheckBase,
(ParamFloat<px4::params::COM_CPU_MAX>) _param_com_cpu_max
(ParamFloat<px4::params::COM_CPU_MAX>) _param_com_cpu_max,
(ParamFloat<px4::params::COM_RAM_MAX>) _param_com_ram_max
)
};
15 changes: 15 additions & 0 deletions src/modules/commander/commander_params.c
Original file line number Diff line number Diff line change
Expand Up @@ -802,6 +802,21 @@ PARAM_DEFINE_FLOAT(COM_KILL_DISARM, 5.0f);
*/
PARAM_DEFINE_FLOAT(COM_CPU_MAX, 95.0f);

/**
* Maximum allowed RAM usage to pass checks
*
* The check fails if the RAM usage is above this threshold.
*
* A negative value disables the check.
*
* @group Commander
* @unit %
* @min -1
* @max 100
* @increment 1
*/
PARAM_DEFINE_FLOAT(COM_RAM_MAX, 95.0f);

/**
* Required number of redundant power modules
*
Expand Down

0 comments on commit e4446ad

Please sign in to comment.