From b171c21240e577c446825d8cc119b7f7bfc2463b Mon Sep 17 00:00:00 2001 From: Luis Perez Date: Mon, 1 Jul 2024 12:04:50 -0700 Subject: [PATCH 1/4] Speed up the default preStop hook to allow for graceful shutdown The default PaaSTA preStop hook sleeps for 30s - which is exactly the same as the default termination grace period, therefore preventing anything that gets this default hook from being able to gracefully shutdown. We could (should?) look into simply not adding a hook for things that don't need one (i.e., not mesh-registed and no custom hooks), but in the meantime adding a silly message doesn't seem particularly harmful. --- paasta_tools/kubernetes_tools.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/paasta_tools/kubernetes_tools.py b/paasta_tools/kubernetes_tools.py index 6cef35311d..e1ce52d709 100644 --- a/paasta_tools/kubernetes_tools.py +++ b/paasta_tools/kubernetes_tools.py @@ -1472,7 +1472,10 @@ def get_kubernetes_container_termination_action(self) -> V1Handler: if not command: return V1Handler( _exec=V1ExecAction( - command=["/bin/sh", "-c", f"sleep {DEFAULT_PRESTOP_SLEEP_SECONDS}"] + # NOTE: we could also probably look into seeing what happens if we return None + # instead of a V1Handler, but in the meantime, having a funny message as the pre-stop + # hook doesn't seem particularly harmful + command=["/bin/sh", "-c", f"echo Goodbye, cruel world."] ) ) if isinstance(command, str): From 4b9aaccc9a7162be5b52f29e009b7d9bccdee1a2 Mon Sep 17 00:00:00 2001 From: Luis Perez Date: Mon, 1 Jul 2024 12:11:41 -0700 Subject: [PATCH 2/4] update tests --- tests/test_kubernetes_tools.py | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/tests/test_kubernetes_tools.py b/tests/test_kubernetes_tools.py index 4757046ce7..a24834715d 100644 --- a/tests/test_kubernetes_tools.py +++ b/tests/test_kubernetes_tools.py @@ -930,7 +930,9 @@ def test_get_kubernetes_containers(self, prometheus_port, expected_ports): image=mock_get_docker_url.return_value, lifecycle=V1Lifecycle( pre_stop=V1Handler( - _exec=V1ExecAction(command=["/bin/sh", "-c", "sleep 30"]) + _exec=V1ExecAction( + command=["/bin/sh", "-c", "echo Goodbye, cruel world."] + ) ) ), liveness_probe=V1Probe( @@ -2187,9 +2189,18 @@ def test_get_pod_anti_affinity(self, anti_affinity, expected): @pytest.mark.parametrize( "termination_action,expected", [ - (None, ["/bin/sh", "-c", "sleep 30"]), # no termination action - ("", ["/bin/sh", "-c", "sleep 30"]), # empty termination action - ([], ["/bin/sh", "-c", "sleep 30"]), # empty termination action + ( + None, + ["/bin/sh", "-c", "echo Goodbye, cruel world."], + ), # no termination action + ( + "", + ["/bin/sh", "-c", "echo Goodbye, cruel world."], + ), # empty termination action + ( + [], + ["/bin/sh", "-c", "echo Goodbye, cruel world."], + ), # empty termination action ("/bin/no-args", ["/bin/no-args"]), # no args command (["/bin/bash", "cmd.sh"], ["/bin/bash", "cmd.sh"]), # no args command ], @@ -4274,7 +4285,7 @@ def test_warning_big_bounce(): job_config.format_kubernetes_app().spec.template.metadata.labels[ "paasta.yelp.com/config_sha" ] - == "config5abf6b07" + == "config7d73e761" ), "If this fails, just change the constant in this test, but be aware that deploying this change will cause every service to bounce!" @@ -4320,7 +4331,7 @@ def test_warning_big_bounce_routable_pod(): job_config.format_kubernetes_app().spec.template.metadata.labels[ "paasta.yelp.com/config_sha" ] - == "config8ee1bd70" + == "config17bb62ea" ), "If this fails, just change the constant in this test, but be aware that deploying this change will cause every smartstack-registered service to bounce!" From b0f26e62b4fc0d34ba4ef4588a86cfa1c878a077 Mon Sep 17 00:00:00 2001 From: Luis Perez Date: Mon, 1 Jul 2024 12:19:28 -0700 Subject: [PATCH 3/4] Add back sleep 30s for mesh-registered things I missed that the special-cased preStop is only on the hacheck container. If a pod is mesh-registered, the main container needs to sleep for as long as the hacheck sidecar to allow requests to drain. --- paasta_tools/kubernetes_tools.py | 36 ++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/paasta_tools/kubernetes_tools.py b/paasta_tools/kubernetes_tools.py index e1ce52d709..be00a72719 100644 --- a/paasta_tools/kubernetes_tools.py +++ b/paasta_tools/kubernetes_tools.py @@ -1434,7 +1434,9 @@ def get_kubernetes_containers( env=self.get_container_env(), resources=self.get_resource_requirements(), lifecycle=V1Lifecycle( - pre_stop=self.get_kubernetes_container_termination_action() + pre_stop=self.get_kubernetes_container_termination_action( + service_namespace_config + ) ), name=self.get_sanitised_instance_name(), liveness_probe=self.get_liveness_probe(service_namespace_config), @@ -1464,20 +1466,36 @@ def get_readiness_probe( else: return self.get_liveness_probe(service_namespace_config) - def get_kubernetes_container_termination_action(self) -> V1Handler: + def get_kubernetes_container_termination_action( + self, service_namespace_config: ServiceNamespaceConfig + ) -> V1Handler: command = self.config_dict.get("lifecycle", KubeLifecycleDict({})).get( "pre_stop_command", [] ) # default pre stop hook for the container if not command: - return V1Handler( - _exec=V1ExecAction( - # NOTE: we could also probably look into seeing what happens if we return None - # instead of a V1Handler, but in the meantime, having a funny message as the pre-stop - # hook doesn't seem particularly harmful - command=["/bin/sh", "-c", f"echo Goodbye, cruel world."] + # if the service is in smartstack, we want to sleep for a while to allow requests to drain + if service_namespace_config.is_in_smartstack(): + return V1Handler( + _exec=V1ExecAction( + command=[ + "/bin/sh", + "-c", + f"sleep {DEFAULT_PRESTOP_SLEEP_SECONDS}", + ] + ) + ) + # otherwise, we want to immediately send a SIGTERM to the container + # so that it can gracefully shutdown if configured to do so + else: + return V1Handler( + _exec=V1ExecAction( + # NOTE: we could also probably look into seeing what happens if we return None + # instead of a V1Handler, but in the meantime, having a funny message as the pre-stop + # hook doesn't seem particularly harmful + command=["/bin/sh", "-c", f"echo Goodbye, cruel world."] + ) ) - ) if isinstance(command, str): command = [command] return V1Handler(_exec=V1ExecAction(command=command)) From c141173d8cf43409fce811de821598e8c7f69ef3 Mon Sep 17 00:00:00 2001 From: Luis Perez Date: Mon, 1 Jul 2024 12:33:15 -0700 Subject: [PATCH 4/4] update tests, round 2 --- tests/test_kubernetes_tools.py | 43 +++++++++++++++++++++++++++------- 1 file changed, 34 insertions(+), 9 deletions(-) diff --git a/tests/test_kubernetes_tools.py b/tests/test_kubernetes_tools.py index a24834715d..8e29b63080 100644 --- a/tests/test_kubernetes_tools.py +++ b/tests/test_kubernetes_tools.py @@ -930,9 +930,7 @@ def test_get_kubernetes_containers(self, prometheus_port, expected_ports): image=mock_get_docker_url.return_value, lifecycle=V1Lifecycle( pre_stop=V1Handler( - _exec=V1ExecAction( - command=["/bin/sh", "-c", "echo Goodbye, cruel world."] - ) + _exec=V1ExecAction(command=["/bin/sh", "-c", "sleep 30"]) ) ), liveness_probe=V1Probe( @@ -957,6 +955,7 @@ def test_get_kubernetes_containers(self, prometheus_port, expected_ports): service_namespace_config = mock.Mock() service_namespace_config.get_healthcheck_mode.return_value = "http" service_namespace_config.get_healthcheck_uri.return_value = "/status" + service_namespace_config.is_in_smartstack.return_value = True assert ( self.deployment.get_kubernetes_containers( docker_volumes=mock_docker_volumes, @@ -2187,33 +2186,59 @@ def test_get_pod_anti_affinity(self, anti_affinity, expected): assert self.deployment.get_pod_anti_affinity() == expected_affinity @pytest.mark.parametrize( - "termination_action,expected", + "termination_action,expected,service_namespace_config", [ ( None, ["/bin/sh", "-c", "echo Goodbye, cruel world."], + ServiceNamespaceConfig({}), ), # no termination action ( "", ["/bin/sh", "-c", "echo Goodbye, cruel world."], + ServiceNamespaceConfig({}), ), # empty termination action ( [], ["/bin/sh", "-c", "echo Goodbye, cruel world."], + ServiceNamespaceConfig({}), ), # empty termination action - ("/bin/no-args", ["/bin/no-args"]), # no args command - (["/bin/bash", "cmd.sh"], ["/bin/bash", "cmd.sh"]), # no args command + ( + None, + ["/bin/sh", "-c", "sleep 30"], + ServiceNamespaceConfig({"proxy_port": 6666}), + ), # no termination action, mesh-registered + ( + "", + ["/bin/sh", "-c", "sleep 30"], + ServiceNamespaceConfig({"proxy_port": 6666}), + ), # empty termination action, mesh-registered + ( + [], + ["/bin/sh", "-c", "sleep 30"], + ServiceNamespaceConfig({"proxy_port": 6666}), + ), # empty termination action, mesh-registered + ("/bin/no-args", ["/bin/no-args"], {}), # no args command + (["/bin/bash", "cmd.sh"], ["/bin/bash", "cmd.sh"], {}), # no args command ], ) def test_kubernetes_container_termination_action( - self, termination_action, expected + self, + termination_action, + expected, + service_namespace_config, ): if termination_action: self.deployment.config_dict["lifecycle"] = { "pre_stop_command": termination_action } handler = V1Handler(_exec=V1ExecAction(command=expected)) - assert self.deployment.get_kubernetes_container_termination_action() == handler + assert ( + self.deployment.get_kubernetes_container_termination_action( + service_namespace_config + ) + == handler + ) @pytest.mark.parametrize( "whitelist,blacklist,expected", @@ -4331,7 +4356,7 @@ def test_warning_big_bounce_routable_pod(): job_config.format_kubernetes_app().spec.template.metadata.labels[ "paasta.yelp.com/config_sha" ] - == "config17bb62ea" + == "config8ee1bd70" ), "If this fails, just change the constant in this test, but be aware that deploying this change will cause every smartstack-registered service to bounce!"