Skip to content

Commit

Permalink
Merge pull request #91 from vbharadwaj-bk/main
Browse files Browse the repository at this point in the history
  • Loading branch information
vbharadwaj-bk authored May 31, 2024
2 parents 0849d17 + 5788402 commit 0c4ad1c
Show file tree
Hide file tree
Showing 8 changed files with 102 additions and 35 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,15 @@ jobs:
fail-fast: false
matrix:
os: ["ubuntu-latest", "macos-latest"]
python-version: [ "3.7", "3.8", "3.9", "3.10"]
python-version: [ "3.8", "3.9", "3.10"]
name: Test (${{ matrix.python-version }}, ${{ matrix.os }})
runs-on: ${{ matrix.os }}
defaults:
run:
shell: bash -l {0}
steps:
- uses: actions/checkout@v2
- uses: conda-incubator/setup-miniconda@v2
- uses: conda-incubator/setup-miniconda@v3
with:
mamba-version: "*"
channels: conda-forge
Expand Down Expand Up @@ -58,7 +58,7 @@ jobs:
name: Build and publish Python distributions to PyPI and TestPyPI
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3
- uses: conda-incubator/setup-miniconda@v2
with:
mamba-version: "*"
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,4 @@ htmlcov
**/.DS_Store
.eggs
cppimport/_version.py
*.lock
41 changes: 40 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,8 @@ Note that `force_rebuild` does not work when importing the module concurrently.

### Can I import my model concurrently?

It's safe to use `cppimport` to import a module concurrently using multiple threads, processes or even machines!
It's (mostly) safe to use `cppimport` to import a module concurrently using multiple threads, processes or even machines!
There's an exception if your filesystem does not support file locking - see the next section.

Before building a module, `cppimport` obtains a lockfile preventing other processors from building it at the same time - this prevents clashes that can lead to failure.
Other processes will wait maximum 10 mins until the first process has built the module and load it. If your module does not build within 10 mins then it will timeout.
Expand All @@ -173,6 +174,44 @@ cppimport.settings['lock_timeout'] = 10*60 # 10 mins

You should not use `force_rebuild` when importing concurrently.

### Acquiring the lock hangs or times out unexpectedly - what's going on?
Certain platforms (e.g. those running
a Data Virtualization Service, DVS) do not support file locking. If you're on Linux with access to `flock`, you can test whether
locking is supported (credit to [this page](https://help.univention.com/t/howto-verify-the-mounted-filesystem-supports-file-locking/10149)):

```bash
touch testfile
flock ./testfile true && echo ok || echo nok
```

If locking is not supported, you can disable the file lock in
the cppimport global settings:

```python
cppimport.settings['use_filelock'] = False
```

This setting must be changed before you import any
code. By setting `use_filelock=False`, you become responsible
for ensuring that only a single process
(re)builds the package at a time. For example: if you're
using [mpi4py](https://mpi4py.readthedocs.io/en/stable/)
to run independent, communicating processes, here's how
to protect the build:

```python
from mpi4py import MPI
import cppimport, cppimport.import_hook
cppimport.settings["use_filelock"] = False

pid = MPI.COMM_WORLD.Get_rank()

if pid == 0:
import somecode # Process 0 compiles extension if needed
MPI.COMM_WORLD.Barrier() # Remaining processes wait
import somecode # All processes use compiled extension
```

### How can I get information about filepaths in the configuration block?
The module name is available as the `fullname` variable and the C++ module file is available as `filepath`.
For example,
Expand Down
2 changes: 2 additions & 0 deletions cppimport/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
See CONTRIBUTING.md for a description of the project structure and the internal logic.
"""

import ctypes
import logging
import os
Expand All @@ -18,6 +19,7 @@
force_rebuild=False, # `force_rebuild` with multiple processes is not supported
file_exts=[".cpp", ".c"],
rtld_flags=ctypes.RTLD_LOCAL,
use_filelock=True, # Filelock if multiple processes try to build simultaneously
lock_suffix=".lock",
lock_timeout=10 * 60,
remove_strict_prototypes=True,
Expand Down
3 changes: 1 addition & 2 deletions cppimport/build_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def _handle_strict_prototypes():

cfg_vars = distutils.sysconfig.get_config_vars()
for key, value in cfg_vars.items():
if type(value) == str:
if value is str:
cfg_vars[key] = value.replace("-Wstrict-prototypes", "")


Expand Down Expand Up @@ -144,7 +144,6 @@ def _parallel_compile(
extra_postargs=None,
depends=None,
):

# these lines are copied directly from distutils.ccompiler.CCompiler
macros, objects, extra_postargs, pp_opts, build = self._setup_compile(
output_dir, macros, include_dirs, sources, depends, extra_postargs
Expand Down
57 changes: 30 additions & 27 deletions cppimport/importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,33 +43,36 @@ def build_completed():

t = time()

# Race to obtain the lock and build. Other processes can wait
while not build_completed() and time() - t < cppimport.settings["lock_timeout"]:
try:
with filelock.FileLock(lock_path, timeout=1):
if build_completed():
break
template_and_build(filepath, module_data)
except filelock.Timeout:
logging.debug(f"Could not obtain lock (pid {os.getpid()})")
if cppimport.settings["force_rebuild"]:
raise ValueError(
"force_build must be False to build concurrently."
"This process failed to claim a filelock indicating that"
" a concurrent build is in progress"
)
sleep(1)

if os.path.exists(lock_path):
with suppress(OSError):
os.remove(lock_path)

if not build_completed():
raise Exception(
f"Could not compile binary as lock already taken and timed out."
f" Try increasing the timeout setting if "
f"the build time is longer (pid {os.getpid()})."
)
if cppimport.settings["use_filelock"]:
# Race to obtain the lock and build. Other processes can wait
while not build_completed() and time() - t < cppimport.settings["lock_timeout"]:
try:
with filelock.FileLock(lock_path, timeout=1):
if build_completed():
break
template_and_build(filepath, module_data)
except filelock.Timeout:
logging.debug(f"Could not obtain lock (pid {os.getpid()})")
if cppimport.settings["force_rebuild"]:
raise ValueError(
"force_build must be False to build concurrently."
"This process failed to claim a filelock indicating that"
" a concurrent build is in progress"
)
sleep(1)

if os.path.exists(lock_path):
with suppress(OSError):
os.remove(lock_path)

if not build_completed():
raise Exception(
f"Could not compile binary as lock already taken and timed out."
f" Try increasing the timeout setting if "
f"the build time is longer (pid {os.getpid()})."
)
else:
template_and_build(filepath, module_data)


def template_and_build(filepath, module_data):
Expand Down
8 changes: 8 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
def pytest_addoption(parser):
parser.addoption(
"--multiprocessing",
action="store_true",
dest="multiprocessing",
default=False,
help="enable multiprocessing tests with filelock",
)
19 changes: 17 additions & 2 deletions tests/test_cppimport.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,16 @@
from multiprocessing import Process
from tempfile import TemporaryDirectory

import pytest

import cppimport
import cppimport.build_module
import cppimport.templating
from cppimport.find import find_module_cpppath

cppimport.settings["use_filelock"] = False # Filelock only enabled for multiprocessing
multiprocessing_enable = pytest.mark.skipif("not config.getoption('multiprocessing')")

root_logger = logging.getLogger()
root_logger.setLevel(logging.DEBUG)

Expand Down Expand Up @@ -127,8 +132,11 @@ def test_with_file_in_syspath():
def test_rebuild_after_failed_compile():
cppimport.imp("mymodule")
test_code = """
import cppimport; mymodule = cppimport.imp("mymodule");assert(mymodule.add(1,2) == 3)
"""
import cppimport;
cppimport.settings["use_filelock"] = False;
mymodule = cppimport.imp("mymodule");
assert(mymodule.add(1,2) == 3)
"""
with appended("tests/mymodule.cpp", ";asdf;"):
subprocess_check(test_code, 1)
subprocess_check(test_code, 0)
Expand All @@ -149,6 +157,7 @@ def test_no_rebuild_if_no_deps_change():
cppimport.imp("mymodule")
test_code = """
import cppimport;
cppimport.settings["use_filelock"] = False;
mymodule = cppimport.imp("mymodule");
assert(not hasattr(mymodule, 'Thing'))
"""
Expand All @@ -160,6 +169,7 @@ def test_rebuild_header_after_change():
cppimport.imp("mymodule")
test_code = """
import cppimport;
cppimport.settings["use_filelock"] = False;
mymodule = cppimport.imp("mymodule");
mymodule.Thing().cheer()
"""
Expand Down Expand Up @@ -215,7 +225,12 @@ def test_relative_import():
assert f() == 3


@multiprocessing_enable
def test_multiple_processes():
"""
Only runs if the flag --multiprocessing is passed to
pytest. This function requires file locking enabled.
"""
with tmp_dir(["tests/hook_test.cpp"]) as tmp_path:
test_code = f"""
import os;
Expand Down

0 comments on commit 0c4ad1c

Please sign in to comment.