|
19 | 19 | import io
|
20 | 20 | import locale
|
21 | 21 | import logging
|
| 22 | +import multiprocessing |
22 | 23 | import os
|
23 | 24 | import posixpath
|
24 | 25 | import re
|
25 | 26 | import shutil
|
26 | 27 | import tempfile
|
27 | 28 | import urllib.parse
|
| 29 | +from functools import cache |
28 | 30 | from pathlib import Path, PurePath, PurePosixPath
|
29 | 31 | from random import Random
|
30 | 32 |
|
|
59 | 61 | MAX_WORKERS = 10
|
60 | 62 |
|
61 | 63 |
|
| 64 | +def _get_int_env_var(env_var: str) -> int | None: |
| 65 | + int_value = None |
| 66 | + env_value = os.getenv(env_var) |
| 67 | + if env_value is not None: |
| 68 | + with contextlib.suppress(TypeError): |
| 69 | + int_value = int(env_value) |
| 70 | + return int_value |
| 71 | + |
| 72 | + |
| 73 | +@cache |
| 74 | +def _get_num_workers() -> int: |
| 75 | + f"""Calculate the number of workers to use. |
| 76 | +
|
| 77 | + Returns |
| 78 | + ------- |
| 79 | + num : `int` |
| 80 | + The number of workers to use. Will use the value of the |
| 81 | + ``LSST_RESOURCES_NUM_WORKERS`` environment variable if set. Will fall |
| 82 | + back to using the CPU count (plus 2) but capped at {MAX_WORKERS}. |
| 83 | + """ |
| 84 | + num_workers: int | None = None |
| 85 | + num_workers = _get_int_env_var("LSST_RESOURCES_NUM_WORKERS") |
| 86 | + if num_workers is None: |
| 87 | + # CPU_LIMIT is used on nublado. |
| 88 | + cpu_limit = _get_int_env_var("CPU_LIMIT") or multiprocessing.cpu_count() |
| 89 | + if cpu_limit is not None: |
| 90 | + num_workers = cpu_limit + 2 |
| 91 | + |
| 92 | + # But don't ever return more than the maximum allowed. |
| 93 | + return min([num_workers, MAX_WORKERS]) |
| 94 | + |
| 95 | + |
62 | 96 | class ResourcePath: # numpydoc ignore=PR02
|
63 | 97 | """Convenience wrapper around URI parsers.
|
64 | 98 |
|
@@ -883,7 +917,7 @@ def _mexists(cls, uris: Iterable[ResourcePath]) -> dict[ResourcePath, bool]:
|
883 | 917 | existence : `dict` of [`ResourcePath`, `bool`]
|
884 | 918 | Mapping of original URI to boolean indicating existence.
|
885 | 919 | """
|
886 |
| - exists_executor = concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS) |
| 920 | + exists_executor = concurrent.futures.ThreadPoolExecutor(max_workers=_get_num_workers()) |
887 | 921 | future_exists = {exists_executor.submit(uri.exists): uri for uri in uris}
|
888 | 922 |
|
889 | 923 | results: dict[ResourcePath, bool] = {}
|
@@ -926,7 +960,7 @@ def mtransfer(
|
926 | 960 | A dict of all the transfer attempts with a boolean indicating
|
927 | 961 | whether the transfer succeeded for the target URI.
|
928 | 962 | """
|
929 |
| - exists_executor = concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS) |
| 963 | + exists_executor = concurrent.futures.ThreadPoolExecutor(max_workers=_get_num_workers()) |
930 | 964 | future_transfers = {
|
931 | 965 | exists_executor.submit(
|
932 | 966 | to_uri.transfer_from,
|
|
0 commit comments