Skip to content

Commit 98ea8a4

Browse files
committed
Allow as_local to specify the temp directory to use
1 parent de1ecf0 commit 98ea8a4

File tree

7 files changed

+62
-19
lines changed

7 files changed

+62
-19
lines changed

python/lsst/resources/_resourcePath.py

+17-5
Original file line numberDiff line numberDiff line change
@@ -1011,7 +1011,7 @@ def abspath(self) -> ResourcePath:
10111011
"""
10121012
return self
10131013

1014-
def _as_local(self, multithreaded: bool = True) -> tuple[str, bool]:
1014+
def _as_local(self, multithreaded: bool = True, tmpdir: ResourcePath | None = None) -> tuple[str, bool]:
10151015
"""Return the location of the (possibly remote) resource as local file.
10161016
10171017
This is a helper function for `as_local` context manager.
@@ -1024,6 +1024,9 @@ def _as_local(self, multithreaded: bool = True) -> tuple[str, bool]:
10241024
effect if the URI scheme does not support parallel streams or
10251025
if a global override has been applied. If `False` parallel
10261026
streams will be disabled.
1027+
tmpdir : `ResourcePath` or `None`, optional
1028+
Explicit override of the temporary directory to use for remote
1029+
downloads.
10271030
10281031
Returns
10291032
-------
@@ -1038,7 +1041,9 @@ def _as_local(self, multithreaded: bool = True) -> tuple[str, bool]:
10381041
raise NotImplementedError()
10391042

10401043
@contextlib.contextmanager
1041-
def as_local(self, multithreaded: bool = True) -> Iterator[ResourcePath]:
1044+
def as_local(
1045+
self, multithreaded: bool = True, tmpdir: ResourcePathExpression | None = None
1046+
) -> Iterator[ResourcePath]:
10421047
"""Return the location of the (possibly remote) resource as local file.
10431048
10441049
Parameters
@@ -1049,6 +1054,9 @@ def as_local(self, multithreaded: bool = True) -> Iterator[ResourcePath]:
10491054
effect if the URI scheme does not support parallel streams or
10501055
if a global override has been applied. If `False` parallel
10511056
streams will be disabled.
1057+
tmpdir : `ResourcePathExpression` or `None`, optional
1058+
Explicit override of the temporary directory to use for remote
1059+
downloads.
10521060
10531061
Yields
10541062
------
@@ -1074,7 +1082,10 @@ def as_local(self, multithreaded: bool = True) -> Iterator[ResourcePath]:
10741082
"""
10751083
if self.isdir():
10761084
raise IsADirectoryError(f"Directory-like URI {self} cannot be fetched as local.")
1077-
local_src, is_temporary = self._as_local(multithreaded=multithreaded)
1085+
temp_dir = ResourcePath(tmpdir, forceDirectory=True) if tmpdir is not None else None
1086+
if temp_dir is not None and not temp_dir.isLocal:
1087+
raise ValueError(f"Temporary directory for as_local must be local resource not {temp_dir}")
1088+
local_src, is_temporary = self._as_local(multithreaded=multithreaded, tmpdir=temp_dir)
10781089
local_uri = ResourcePath(local_src, isTemporary=is_temporary)
10791090

10801091
try:
@@ -1100,8 +1111,9 @@ def temporary_uri(
11001111
Parameters
11011112
----------
11021113
prefix : `ResourcePath`, optional
1103-
Prefix to use. Without this the path will be formed as a local
1104-
file URI in a temporary directory. Ensuring that the prefix
1114+
Temporary directory to use (can be any scheme). Without this the
1115+
path will be formed as a local file URI in a temporary directory
1116+
created by `tempfile.mkdtemp`. Ensuring that the prefix
11051117
location exists is the responsibility of the caller.
11061118
suffix : `str`, optional
11071119
A file suffix to be used. The ``.`` should be included in this

python/lsst/resources/file.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ def remove(self) -> None:
7979
"""Remove the resource."""
8080
os.remove(self.ospath)
8181

82-
def _as_local(self, multithreaded: bool = True) -> tuple[str, bool]:
82+
def _as_local(self, multithreaded: bool = True, tmpdir: ResourcePath | None = None) -> tuple[str, bool]:
8383
"""Return the local path of the file.
8484
8585
This is an internal helper for ``as_local()``.
@@ -88,6 +88,8 @@ def _as_local(self, multithreaded: bool = True) -> tuple[str, bool]:
8888
----------
8989
multithreaded : `bool`, optional
9090
Unused.
91+
tmpdir : `ResourcePath` or `None`, optional
92+
Unused.
9193
9294
Returns
9395
-------

python/lsst/resources/gs.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -202,9 +202,9 @@ def mkdir(self) -> None:
202202
# Should this method do anything at all?
203203
self.blob.upload_from_string(b"", retry=_RETRY_POLICY)
204204

205-
def _as_local(self, multithreaded: bool = True) -> tuple[str, bool]:
205+
def _as_local(self, multithreaded: bool = True, tmpdir: ResourcePath | None = None) -> tuple[str, bool]:
206206
with (
207-
ResourcePath.temporary_uri(suffix=self.getExtension(), delete=False) as tmp_uri,
207+
ResourcePath.temporary_uri(prefix=tmpdir, suffix=self.getExtension(), delete=False) as tmp_uri,
208208
time_this(log, msg="Downloading %s to local file", args=(self,)),
209209
):
210210
try:

python/lsst/resources/http.py

+28-6
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,20 @@ def _timeout_from_environment(env_var: str, default_value: float) -> float:
9898
return timeout
9999

100100

101+
@functools.lru_cache
102+
def _calc_tmpdir_buffer_size(tmpdir: str) -> int:
103+
"""Compute the block size as 256 blocks of typical size
104+
(i.e. 4096 bytes) or 10 times the file system block size,
105+
whichever is higher.
106+
107+
This is a reasonable compromise between
108+
using memory for buffering and the number of system calls
109+
issued to read from or write to temporary files.
110+
"""
111+
fsstats = os.statvfs(tmpdir)
112+
return max(10 * fsstats.f_bsize, 256 * 4096)
113+
114+
101115
class HttpResourcePathConfig:
102116
"""Configuration class to encapsulate the configurable items used by class
103117
HttpResourcePath.
@@ -381,8 +395,8 @@ def tmpdir_buffersize(self) -> tuple[str, int]:
381395
# whichever is higher. This is a reasonable compromise between
382396
# using memory for buffering and the number of system calls
383397
# issued to read from or write to temporary files.
384-
fsstats = os.statvfs(tmpdir)
385-
self._tmpdir_buffersize = (tmpdir, max(10 * fsstats.f_bsize, 256 * 4096))
398+
bufsize = _calc_tmpdir_buffer_size(tmpdir)
399+
self._tmpdir_buffersize = (tmpdir, bufsize)
386400

387401
return self._tmpdir_buffersize
388402

@@ -1480,17 +1494,20 @@ def _sign_with_macaroon(self, activity: ActivityCaveat, expiration_time_seconds:
14801494
except json.JSONDecodeError:
14811495
raise ValueError(f"could not deserialize response to POST request for URL {self}")
14821496

1483-
def _as_local(self, multithreaded: bool = True) -> tuple[str, bool]:
1497+
def _as_local(self, multithreaded: bool = True, tmpdir: ResourcePath | None = None) -> tuple[str, bool]:
14841498
"""Download object over HTTP and place in temporary directory.
14851499
14861500
Parameters
14871501
----------
14881502
multithreaded : `bool`, optional
14891503
If `True` the transfer will be allowed to attempt to improve
1490-
throughput by using parallel download streams. This may of no
1504+
throughput by using parallel download streams. This may of no
14911505
effect if the URI scheme does not support parallel streams or
14921506
if a global override has been applied. If `False` parallel
14931507
streams will be disabled.
1508+
tmpdir : `ResourcePath` or `None`, optional
1509+
Explicit override of the temporary directory to use for remote
1510+
downloads.
14941511
14951512
Returns
14961513
-------
@@ -1509,9 +1526,14 @@ def _as_local(self, multithreaded: bool = True) -> tuple[str, bool]:
15091526
f"Unable to download resource {self}; status: {resp.status_code} {resp.reason}"
15101527
)
15111528

1512-
tmpdir, buffer_size = self._config.tmpdir_buffersize
1529+
if tmpdir is None:
1530+
temp_dir, buffer_size = self._config.tmpdir_buffersize
1531+
tmpdir = ResourcePath(temp_dir, forceDirectory=True)
1532+
else:
1533+
buffer_size = _calc_tmpdir_buffer_size(tmpdir.ospath)
1534+
15131535
with ResourcePath.temporary_uri(
1514-
suffix=self.getExtension(), prefix=ResourcePath(tmpdir, forceDirectory=True), delete=False
1536+
suffix=self.getExtension(), prefix=tmpdir, delete=False
15151537
) as tmp_uri:
15161538
expected_length = int(resp.headers.get("Content-Length", "-1"))
15171539
with time_this(

python/lsst/resources/mem.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -27,5 +27,5 @@ def exists(self) -> bool:
2727
"""Test for existence and always return False."""
2828
return True
2929

30-
def _as_local(self, multithreaded: bool = True) -> tuple[str, bool]:
30+
def _as_local(self, multithreaded: bool = True, tmpdir: ResourcePath | None = None) -> tuple[str, bool]:
3131
raise RuntimeError(f"Do not know how to retrieve data for URI '{self}'")

python/lsst/resources/packageresource.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
AbstractFileSystem = type
3030

3131
from ._resourceHandles._baseResourceHandle import ResourceHandleProtocol
32-
from ._resourcePath import ResourcePath
32+
from ._resourcePath import ResourcePath, ResourcePathExpression
3333

3434
log = logging.getLogger(__name__)
3535

@@ -85,13 +85,17 @@ def read(self, size: int = -1) -> bytes:
8585
return fh.read(size)
8686

8787
@contextlib.contextmanager
88-
def as_local(self, multithreaded: bool = True) -> Iterator[ResourcePath]:
88+
def as_local(
89+
self, multithreaded: bool = True, tmpdir: ResourcePathExpression | None = None
90+
) -> Iterator[ResourcePath]:
8991
"""Return the location of the Python resource as local file.
9092
9193
Parameters
9294
----------
9395
multithreaded : `bool`, optional
9496
Unused.
97+
tmpdir : `ResourcePathExpression` or `None`, optional
98+
Unused.
9599
96100
Yields
97101
------

python/lsst/resources/s3.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -353,7 +353,7 @@ def to_fsspec(self) -> tuple[AbstractFileSystem, str]:
353353

354354
return s3, f"{self._bucket}/{self.relativeToPathRoot}"
355355

356-
def _as_local(self, multithreaded: bool = True) -> tuple[str, bool]:
356+
def _as_local(self, multithreaded: bool = True, tmpdir: ResourcePath | None = None) -> tuple[str, bool]:
357357
"""Download object from S3 and place in temporary directory.
358358
359359
Parameters
@@ -364,6 +364,9 @@ def _as_local(self, multithreaded: bool = True) -> tuple[str, bool]:
364364
effect if the URI scheme does not support parallel streams or
365365
if a global override has been applied. If `False` parallel
366366
streams will be disabled.
367+
tmpdir : `ResourcePath` or `None`, optional
368+
Explicit override of the temporary directory to use for remote
369+
downloads.
367370
368371
Returns
369372
-------
@@ -373,7 +376,7 @@ def _as_local(self, multithreaded: bool = True) -> tuple[str, bool]:
373376
Always returns `True`. This is always a temporary file.
374377
"""
375378
with (
376-
ResourcePath.temporary_uri(suffix=self.getExtension(), delete=False) as tmp_uri,
379+
ResourcePath.temporary_uri(prefix=tmpdir, suffix=self.getExtension(), delete=False) as tmp_uri,
377380
self._use_threads_temp_override(multithreaded),
378381
time_this(log, msg="Downloading %s to local file", args=(self,)),
379382
):

0 commit comments

Comments
 (0)