Skip to content

Commit

Permalink
Return to simpler check version
Browse files Browse the repository at this point in the history
Now we only check whether the return value is != 0, and if so, we wait a bit and rerun the command. Note that this might block the manager
  • Loading branch information
Icemole committed Aug 20, 2024
1 parent ccd7756 commit e7e6332
Show file tree
Hide file tree
Showing 4 changed files with 13 additions and 25 deletions.
5 changes: 1 addition & 4 deletions sisyphus/aws_batch_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,10 +222,7 @@ def task_state(self, task, task_id):
"""Return task state:"""
name = task.task_name()
task_name = escape_name(name, task_id)
try:
queue_state = self.queue_state()
except subprocess.CalledProcessError:
return STATE_RUNNABLE
queue_state = self.queue_state()
qs = queue_state.get(task_name)

# task name should be uniq
Expand Down
11 changes: 4 additions & 7 deletions sisyphus/load_sharing_facility_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,9 +255,9 @@ def queue_state(self):
try:
out, err, retval = self.system_call(system_command)
if retval != 0:
raise subprocess.CalledProcessError(
retval, system_command, self._system_call_error_warn_msg(system_command)
)
logging.warning(self._system_call_error_warn_msg(system_command))
time.sleep(gs.WAIT_PERIOD_QSTAT_PARSING)
continue
except subprocess.TimeoutExpired:
logging.warning(self._system_call_timeout_warn_msg(system_command))
time.sleep(gs.WAIT_PERIOD_SSH_TIMEOUT)
Expand Down Expand Up @@ -292,10 +292,7 @@ def task_state(self, task, task_id):
name = task.task_name()
name = escape_name(name).encode()
task_name = (name, task_id)
try:
queue_state = self.queue_state()
except subprocess.CalledProcessError:
return STATE_RUNNABLE
queue_state = self.queue_state()
qs = queue_state[task_name]

# task name should be uniq
Expand Down
11 changes: 4 additions & 7 deletions sisyphus/simple_linux_utility_for_resource_management_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,9 +313,9 @@ def queue_state(self):
try:
out, err, retval = self.system_call(system_command)
if retval != 0:
raise subprocess.CalledProcessError(
retval, system_command, self._system_call_error_warn_msg(system_command)
)
logging.warning(self._system_call_error_warn_msg(system_command))
time.sleep(gs.WAIT_PERIOD_QSTAT_PARSING)
continue
except subprocess.TimeoutExpired:
logging.warning(self._system_call_timeout_warn_msg(system_command))
time.sleep(gs.WAIT_PERIOD_SSH_TIMEOUT)
Expand Down Expand Up @@ -350,10 +350,7 @@ def task_state(self, task, task_id):
name = task.task_name()
name = self.process_task_name(name)
task_name = (name, task_id)
try:
queue_state = self.queue_state()
except subprocess.CalledProcessError:
return STATE_RUNNABLE
queue_state = self.queue_state()
qs = queue_state[task_name]

# task name should be uniq
Expand Down
11 changes: 4 additions & 7 deletions sisyphus/son_of_grid_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,9 +324,9 @@ def queue_state(self):
try:
out, err, retval = self.system_call(system_command)
if retval != 0:
raise subprocess.CalledProcessError(
retval, system_command, self._system_call_error_warn_msg(system_command)
)
logging.warning(self._system_call_error_warn_msg(system_command))
time.sleep(gs.WAIT_PERIOD_QSTAT_PARSING)
continue
except subprocess.TimeoutExpired:
logging.warning(self._system_call_timeout_warn_msg(system_command))
time.sleep(gs.WAIT_PERIOD_SSH_TIMEOUT)
Expand Down Expand Up @@ -416,10 +416,7 @@ def task_state(self, task, task_id):
name = task.task_name()
name = escape_name(name)
task_name = (name, task_id)
try:
queue_state = self.queue_state()
except subprocess.CalledProcessError:
return STATE_RUNNABLE
queue_state = self.queue_state()
qs = queue_state[task_name]

# task name should be uniq
Expand Down

0 comments on commit e7e6332

Please sign in to comment.