Skip to content

Commit

Permalink
fix kill main pid runner + vendor psutil (Netflix#2015)
Browse files Browse the repository at this point in the history
* kill main PID as well in subprocess manager

* use process groups

* remove whitespaces

* vendor psutil

* avoid circular import

* use kill instead of psutil
  • Loading branch information
madhur-ob authored Sep 6, 2024
1 parent 3b5277b commit f9cda8a
Showing 1 changed file with 8 additions and 3 deletions.
11 changes: 8 additions & 3 deletions metaflow/runner/subprocess_manager.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,30 @@
import asyncio
import os
import time
import shutil
import signal
import subprocess
import sys
import tempfile
import threading
import time
from typing import Callable, Dict, Iterator, List, Optional, Tuple


def kill_process_and_descendants(pid, termination_timeout):
# TODO: there's a race condition that new descendants might
# spawn b/w the invocations of 'pkill' and 'kill'.
# Needs to be fixed in future.
try:
subprocess.check_call(["pkill", "-TERM", "-P", str(pid)])
subprocess.check_call(["kill", "-TERM", str(pid)])
except subprocess.CalledProcessError:
pass

time.sleep(termination_timeout)

try:
subprocess.check_call(["pkill", "-KILL", "-P", str(pid)])
subprocess.check_call(["kill", "-KILL", str(pid)])
except subprocess.CalledProcessError:
pass

Expand Down Expand Up @@ -436,13 +441,13 @@ def cleanup(self):
if self.run_called:
shutil.rmtree(self.temp_dir, ignore_errors=True)

async def kill(self, termination_timeout: float = 1):
async def kill(self, termination_timeout: float = 5):
"""
Kill the subprocess and its descendants.
Parameters
----------
termination_timeout : float, default 1
termination_timeout : float, default 5
The time to wait after sending a SIGTERM to the process and its descendants
before sending a SIGKILL.
"""
Expand Down

0 comments on commit f9cda8a

Please sign in to comment.