Skip to content

Commit

Permalink
Merge pull request #9 from AllenInstitute/feature/add-separate-lockfi…
Browse files Browse the repository at this point in the history
…le-loc

add optional lock_root to PathLock input
  • Loading branch information
rpmcginty authored Oct 9, 2024
2 parents 5c3eced + 247a0ef commit 70d231a
Show file tree
Hide file tree
Showing 2 changed files with 76 additions and 12 deletions.
51 changes: 40 additions & 11 deletions src/aibs_informatics_core/utils/file_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
from datetime import datetime
from enum import Enum
from pathlib import Path
from typing import Dict, List, Literal, Optional, Pattern, Union, cast
from typing import Dict, List, Literal, Optional, Pattern, Sequence, Union, cast

from aibs_informatics_core.utils.os_operations import find_all_paths

Expand Down Expand Up @@ -252,15 +252,15 @@ def get_path_size_bytes(path: Path) -> int:

def get_path_hash(
path: Union[Path, str],
includes: Optional[List[Union[Pattern, str]]] = None,
excludes: Optional[List[Union[Pattern, str]]] = None,
includes: Optional[Sequence[Union[Pattern, str]]] = None,
excludes: Optional[Sequence[Union[Pattern, str]]] = None,
) -> str:
"""Generate the hash based on files found under a given path.
Args:
path (str): path to compute a hash
includes (List[str], optional): list of regex patterns to include. Defaults to all.
excludes (List[str], optional): list of regex patterns to exclude. Defaults to None.
includes (Sequence[str], optional): list of regex patterns to include. Defaults to all.
excludes (Sequence[str], optional): list of regex patterns to exclude. Defaults to None.
Returns:
str: hash value
Expand All @@ -278,8 +278,8 @@ def find_paths(
root: Union[str, Path],
include_dirs: bool = True,
include_files: bool = True,
includes: Optional[List[Union[Pattern, str]]] = None,
excludes: Optional[List[Union[Pattern, str]]] = None,
includes: Optional[Sequence[Union[Pattern, str]]] = None,
excludes: Optional[Sequence[Union[Pattern, str]]] = None,
) -> List[str]:
"""Find paths that match criteria
Expand All @@ -288,8 +288,8 @@ def find_paths(
include_dirs (bool, optional): whether to include directories. Defaults to True.
include_files (bool, optional): whether to include files. Defaults to True.
includes (List[str], optional): list of regex patterns to include. Defaults to all.
excludes (List[str], optional): list of regex patterns to exclude. Defaults to None.
includes (Sequence[str], optional): list of regex patterns to include. Defaults to all.
excludes (Sequence[str], optional): list of regex patterns to exclude. Defaults to None.
Returns:
List[str]: list of paths matching criteria
Expand Down Expand Up @@ -353,10 +353,36 @@ class CannotAcquirePathLockError(Exception):

@dataclass
class PathLock:
"""
A context manager for acquiring and releasing locks on a file or directory path.
If lock_root is provided, a lock file will be created in that directory with the name of the hash of the path.
If lock_root is not provided, a lock file with the same name as the path and a .lock extension will be created.
Providing an explicit lock root is useful if you dont want processes to read the lock file
from the same directory as the file being locked.
Attributes:
path (Union[str, Path]): The path to the file.
lock_root (Optional[Union[str, Path]]): The root directory for lock files. If provided, a
lock file will be created in this directory with the name of the hash of the path.
Otherwise, a lock file with the same name as the path and a .lock extension
will be created. Defaults to None.
"""

path: Union[str, Path]
lock_root: Optional[Union[str, Path]] = None
raise_if_locked: bool = False

def __post_init__(self):
self._lock_path = Path(f"{self.path}.lock")
# If lock root is provided, then create a lock file in that directory
# with the name of the hash of the path. Otherwise, create a lock file
# with the same name as the path with a .lock extension.
if self.lock_root:
lock_file_name = f"{hashlib.sha256(str(self.path).encode()).hexdigest()}.lock"
self._lock_path = Path(self.lock_root) / lock_file_name
else:
self._lock_path = Path(f"{self.path}.lock")
self._lock_file = None
logger.info(f"Created {self} with {self._lock_path} lock file")

Expand All @@ -372,7 +398,10 @@ def acquire(self):
try:
self._lock_path.parent.mkdir(parents=True, exist_ok=True)
self._lock_file = open(self._lock_path, "w")
fcntl.flock(self._lock_file, fcntl.LOCK_EX | fcntl.LOCK_NB)
op = fcntl.LOCK_EX
if self.raise_if_locked:
op |= fcntl.LOCK_NB
fcntl.flock(self._lock_file, op)
self._lock_file.write(f"{datetime.now().timestamp()}")
logger.info(f"Lock acquired!")
except Exception as e:
Expand Down
37 changes: 36 additions & 1 deletion test/aibs_informatics_core/utils/test_file_operations.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import errno
import os
import tarfile
import threading
import zipfile
from pathlib import Path
from typing import List, Literal, Sequence, Tuple, Union
Expand Down Expand Up @@ -362,6 +363,16 @@ def test__remove_path__handles_empty(self):
remove_path(path)
self.assertFalse(path.exists())

def test__PathLock__locks_using_lock_root(self):
path = self.tmp_path()
lock_root = self.tmp_path()
with PathLock(path, lock_root=lock_root) as lock:
lock_path = lock._lock_path
self.assertTrue(lock_path.exists())
self.assertStringPattern(rf"{lock_root}/[a-z0-9]{{64}}.lock", f"{lock_path}")

self.assertFalse(lock_path.exists())

def test__PathLock__locks_folder(self):
path = self.tmp_path()
with PathLock(path) as lock:
Expand All @@ -383,9 +394,33 @@ def test__PathLock__fails_if_lock_aquired(self):
path = self.tmp_path()
with PathLock(path) as lock:
with self.assertRaises(Exception):
with PathLock(path) as lock2:
with PathLock(path, raise_if_locked=True) as lock2:
pass

def test__PathLock__blocks_if_lock_already_acquired(self):
path = self.tmp_path()
lock1 = PathLock(path)
lock1.__enter__()
try:

def acquire_lock():
with PathLock(path, raise_if_locked=False):
pass # Should not get here until lock1 is released

t = threading.Thread(target=acquire_lock)
t.start()
t.join(timeout=1) # Wait 1 second for the thread to finish

# If the thread is still alive, it means it's blocked as expected
self.assertTrue(t.is_alive(), "The thread should be blocked and still alive.")

finally:
try:
lock1.__exit__(None, None, None)
t.join() # Now the thread should finish since the lock is released
except Exception:
pass


@mark.parametrize(
"path, root, expected",
Expand Down

0 comments on commit 70d231a

Please sign in to comment.