Skip to content

Commit

Permalink
Optimize job pickling to speed up Manager thread performance on larg…
Browse files Browse the repository at this point in the history
…e graphs (#211)

* remove creator from pickled Job objects
  • Loading branch information
RaphaelLilt authored Oct 28, 2024
1 parent 311ebfd commit 3181d72
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 4 deletions.
3 changes: 3 additions & 0 deletions sisyphus/global_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,9 @@ def file_caching(path):
#: Show job targets on status screen, can significantly slow down startup time if many outputs are used
SHOW_JOB_TARGETS = True

# Controls whether to include 'creator' in serialized state of an abstract path
INCLUDE_CREATOR_STATE = False

#: How many seconds should be waited before assuming a job is finished after the finished file is written
#: to allow network file system to sync up
WAIT_PERIOD_JOB_FS_SYNC = 30
Expand Down
24 changes: 21 additions & 3 deletions sisyphus/job_path.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Author: Jan-Thorsten Peter <[email protected]>

import copy
import os
import logging
import gzip
Expand Down Expand Up @@ -239,12 +240,29 @@ def __hash__(self):
# TODO Check how uninitialized object should behave here
return hash((self.__dict__.get("creator"), self.__dict__.get("path")))

def __sis_state__(self):
d = self.__dict__.copy()
del d["users"]
return d

def __deepcopy__(self, memo):
cls = self.__class__
result = cls.__new__(cls)
memo[id(self)] = result
for k, v in self.__dict__.items():
if k != "users":
setattr(result, k, copy.deepcopy(v, memo))
result.users = set()
return result

def __getstate__(self):
"""Skips exporting users
:return:
"""
d = self.__dict__.copy()
del d["users"]
d = self.__sis_state__()
if not gs.INCLUDE_CREATOR_STATE and hasattr(self, "creator"):
d["path"] = self.get_path()
d["creator"] = None
return d

def __setstate__(self, state):
Expand Down Expand Up @@ -277,7 +295,7 @@ def __repr__(self):
def copy(self):
"""Creates a copy of this Path"""
new = Path("")
new.__setstate__(self.__getstate__())
new.__setstate__(self.__sis_state__())
return new

def copy_append(self, suffix):
Expand Down
15 changes: 14 additions & 1 deletion tests/job_path_unittest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import os
import pickle

from sisyphus import gs
import sisyphus.toolkit as tk
from sisyphus.job_path import Path, Variable
from sisyphus.tools import finished_results_cache
Expand Down Expand Up @@ -86,7 +87,19 @@ def pickle_and_check(path):
pickle.dump(path, f)
with open(pickle_path, "rb") as f:
path_unpickled = pickle.load(f)
self.assertEqual(path.__dict__, path_unpickled.__dict__)

# Compare absolute paths
self.assertEqual(path.get_path(), path_unpickled.get_path())

if gs.INCLUDE_CREATOR_STATE:
self.assertEqual(path.creator, path_unpickled.creator)
else:
self.assertIsNone(path_unpickled.creator)

excluded_keys = {"creator", "path"}
original_attrs = {k: v for k, v in path.__dict__.items() if k not in excluded_keys}
unpickled_attrs = {k: v for k, v in path_unpickled.__dict__.items() if k not in excluded_keys}
self.assertEqual(original_attrs, unpickled_attrs)

pickle_and_check(Path("out"))

Expand Down

0 comments on commit 3181d72

Please sign in to comment.