Skip to content

Commit

Permalink
add boot time check to mqtt exception handling, reenable offline stat…
Browse files Browse the repository at this point in the history
…e update
  • Loading branch information
patrickjaigner committed Oct 9, 2024
1 parent 3de0809 commit 44a4a2d
Showing 1 changed file with 56 additions and 46 deletions.
102 changes: 56 additions & 46 deletions edge-node/src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,12 +51,11 @@ def run() -> None:
max_setup_time = 180
max_config_update_time = 1200
max_system_check_time = 180
max_calibration_time = (
(len(config.calibration.gas_cylinders) + 1)
* config.calibration.sampling_per_cylinder_seconds
+ 300 # flush time
+ 180 # extra time
)
max_calibration_time = ((len(config.calibration.gas_cylinders) + 1) *
config.calibration.sampling_per_cylinder_seconds +
300 # flush time
+ 180 # extra time
)
max_measurement_time = config.measurement.procedure_seconds + 180 # extra time
utils.set_alarm(max_setup_time, "setup")

Expand All @@ -82,13 +81,12 @@ def run() -> None:
logger.info("Initializing hardware interfaces.", config=config)

try:
hardware_interface = hardware.HardwareInterface(
config=config, simulate=simulate
)
hardware_interface = hardware.HardwareInterface(config=config,
simulate=simulate)
except Exception as e:
logger.exception(
e, label="Could not initialize hardware interface.", config=config
)
logger.exception(e,
label="Could not initialize hardware interface.",
config=config)
raise e

# tear down hardware on program termination
Expand All @@ -97,7 +95,9 @@ def _graceful_teardown(*_args: Any) -> None:

logger.info("Starting graceful teardown.")
hardware_interface.teardown()
procedures.MQTTAgent.deinit()
logger.info("Finished graceful teardown.")
logger.info("Performing clean program exit.")
exit(0)

signal.signal(signal.SIGINT, _graceful_teardown)
Expand All @@ -109,8 +109,7 @@ def _graceful_teardown(*_args: Any) -> None:

# initialize config procedure
configuration_procedure = procedures.ConfigurationProcedure(
config, simulate=simulate
)
config, simulate=simulate)

# initialize procedures interacting with hardware:
# system_check: logging system statistics and reporting hardware/system errors
Expand All @@ -121,25 +120,24 @@ def _graceful_teardown(*_args: Any) -> None:

try:
system_check_procedure = procedures.SystemCheckProcedure(
config, hardware_interface, simulate=simulate
)
config, hardware_interface, simulate=simulate)
calibration_procedure = procedures.CalibrationProcedure(
config, hardware_interface, simulate=simulate
)
config, hardware_interface, simulate=simulate)
wind_measurement_procedure = procedures.WindMeasurementProcedure(
config, hardware_interface, simulate=simulate
)
config, hardware_interface, simulate=simulate)
co2_measurement_procedure = procedures.CO2MeasurementProcedure(
config, hardware_interface, simulate=simulate
)
config, hardware_interface, simulate=simulate)
except Exception as e:
logger.exception(e, label="could not initialize procedures", config=config)
logger.exception(e,
label="could not initialize procedures",
config=config)
raise e

# -------------------------------------------------------------------------
# infinite mainloop

logger.info("Successfully finished setup, starting mainloop.", config=config)
logger.info("Successfully finished setup, starting mainloop.",
config=config)

last_successful_mainloop_iteration_time = 0.0
while True:
Expand All @@ -161,7 +159,8 @@ def _graceful_teardown(*_args: Any) -> None:

if config.active_components.run_calibration_procedures:
if calibration_procedure.is_due():
logger.info("Running calibration procedure.", config=config)
logger.info("Running calibration procedure.",
config=config)
calibration_procedure.run()
else:
logger.info("Calibration procedure is not due.")
Expand All @@ -184,9 +183,9 @@ def _graceful_teardown(*_args: Any) -> None:
utils.set_alarm(max_config_update_time, "config update")

logger.info("Checking for new config messages.")
new_config_message: Optional[custom_types.MQTTConfigurationRequest] = (
procedures.MQTTAgent.get_config_message()
)
new_config_message: Optional[
custom_types.MQTTConfigurationRequest] = (
procedures.MQTTAgent.get_config_message())

if new_config_message is not None:
# run config update procedure
Expand All @@ -196,27 +195,27 @@ def _graceful_teardown(*_args: Any) -> None:
# -> Exit, Restarts via Cron Job to load new config
except Exception:
# reinitialize hardware if configuration failed
logger.info(
"Exception during configuration procedure.", config=config
)
logger.info("Exception during configuration procedure.",
config=config)
hardware_interface.reinitialize(config)

# -----------------------------------------------------------------
# MQTT Agent Checks

if config.active_components.send_messages_over_mqtt:
procedures.MQTTAgent.check_errors()

# -----------------------------------------------------------------

logger.info("Finished mainloop iteration.")
last_successful_mainloop_iteration_time = time.time()
# raises CommunicationOutage if communication loop has stopped

# update state config
state = utils.StateInterface.read()
if state.offline_since:
state.offline_since = None
# utils.StateInterface.write(state)
utils.StateInterface.write(state)

# -----------------------------------------------------------------

logger.info("Finished mainloop iteration.")
last_successful_mainloop_iteration_time = time.time()

except procedures.MQTTAgent.CommunicationOutage as e:
logger.exception(e, label="exception in mainloop", config=config)
Expand All @@ -231,12 +230,19 @@ def _graceful_teardown(*_args: Any) -> None:
utils.StateInterface.write(state)

# reboot if exception lasts longer than 24 hours
# & the last reboot has been performed longer than 24h ago
if (time.time() - state.offline_since) >= 86400:
logger.info(
"Rebooting because no successful MQTT connect for 24 hours.",
config=config,
)
os.system("sudo reboot")
if utils.read_os_uptime() >= 86400:
logger.info(
"Rebooting because no successful MQTT connect for 24 hours.",
config=config,
)
os.system("sudo reboot")
# teardown routine will exit program
else:
logger.info(
"System is offline. Last reboot is less than 24h ago. No action."
)

try:
# check timer with exponential backoff
Expand Down Expand Up @@ -266,17 +272,20 @@ def _graceful_teardown(*_args: Any) -> None:
# cancel the alarm for too long mainloops
signal.alarm(0)

# reboot if exception lasts longer than 12 hours
if (time.time() - last_successful_mainloop_iteration_time) >= 86400:
# reboot if exception lasts longer than 24 hours
# & the last reboot has been performed longer than 24h ago
if (time.time() -
last_successful_mainloop_iteration_time) >= 86400:
if utils.read_os_uptime() >= 86400:
logger.info(
"Rebooting because no successful mainloop iteration for 24 hours.",
config=config,
)
os.system("sudo reboot")
# teardown routine will exit program
else:
logger.info(
"System is offline. Last reboot is less than 24h ago. No action."
"Persisting exception. Last reboot is less than 24h ago. No action."
)

try:
Expand All @@ -287,7 +296,8 @@ def _graceful_teardown(*_args: Any) -> None:
logger.info("Performing hardware reset.", config=config)
hardware_interface.teardown()
hardware_interface.reinitialize(config)
logger.info("Hardware reset was successful.", config=config)
logger.info("Hardware reset was successful.",
config=config)

except Exception as e:
logger.exception(
Expand Down

0 comments on commit 44a4a2d

Please sign in to comment.