diff --git a/control/rebalance.py b/control/rebalance.py index 65cae497..608fb850 100755 --- a/control/rebalance.py +++ b/control/rebalance.py @@ -27,10 +27,11 @@ def __init__(self, gateway_service): self.ceph_utils = gateway_service.ceph_utils self.rebalance_period_sec = gateway_service.config.getint_with_default("gateway", "rebalance_period_sec", 7) self.rebalance_max_ns_to_change_lb_grp = gateway_service.config.getint_with_default("gateway", "max_ns_to_change_lb_grp", 8) - self.auto_rebalance = threading.Thread(target=self.auto_rebalance_task, daemon=True) + self.rebalance_event = threading.Event() + self.auto_rebalance = threading.Thread(target=self.auto_rebalance_task, daemon=True, args=(self.rebalance_event,)) self.auto_rebalance.start() #start the thread - def auto_rebalance_task(self ): + def auto_rebalance_task(self, death_event): """Periodically calls for auto rebalance.""" while (self.rebalance_period_sec > 0): for i in range(self.rebalance_max_ns_to_change_lb_grp): @@ -41,6 +42,8 @@ def auto_rebalance_task(self ): break except Exception: self.logger.exception(f"Exception in auto rebalance") + if death_event: + death_event.set() raise time.sleep(0.01) #release lock for 10ms after rebalancing each 1 NS time.sleep(self.rebalance_period_sec) diff --git a/control/server.py b/control/server.py index 51bfa69c..39c9279e 100644 --- a/control/server.py +++ b/control/server.py @@ -692,6 +692,10 @@ def keep_alive(self): spdk_ping_interval_in_seconds = 0.0 while True: + if self.gateway_rpc: + if self.gateway_rpc.rebalance.rebalance_event.is_set(): + self.logger.critical(f"Failure in rebalance, aborting") + raise SystemExit(f"Failure in rebalance, quitting gateway") timedout = self.server.wait_for_termination(timeout=1) if not timedout: break