39
39
40
40
try :
41
41
import fsspec
42
- from aiohttp import ClientSession , TCPConnector
42
+ from aiohttp import ClientSession , ClientTimeout , TCPConnector
43
43
from fsspec .implementations .http import HTTPFileSystem
44
44
from fsspec .spec import AbstractFileSystem
45
45
except ImportError :
@@ -103,7 +103,7 @@ class HttpResourcePathConfig:
103
103
"""
104
104
105
105
# Default timeouts for all HTTP requests (seconds).
106
- DEFAULT_TIMEOUT_CONNECT : float = 30 .0
106
+ DEFAULT_TIMEOUT_CONNECT : float = 60 .0
107
107
DEFAULT_TIMEOUT_READ : float = 1_500.0
108
108
109
109
# Default lower and upper bounds for the backoff interval (seconds).
@@ -125,6 +125,7 @@ def __init__(self) -> None:
125
125
self ._back_end_connections : int | None = None
126
126
self ._digest_algorithm : str | None = None
127
127
self ._send_expect_on_put : bool | None = None
128
+ self ._fsspec_is_enabled : bool | None = None
128
129
self ._timeout : tuple [float , float ] | None = None
129
130
self ._collect_memory_usage : bool | None = None
130
131
self ._backoff_min : float | None = None
@@ -206,6 +207,20 @@ def send_expect_on_put(self) -> bool:
206
207
self ._send_expect_on_put = "LSST_HTTP_PUT_SEND_EXPECT_HEADER" in os .environ
207
208
return self ._send_expect_on_put
208
209
210
+ @property
211
+ def fsspec_is_enabled (self ) -> bool :
212
+ """Return True if `fsspec` is enabled for objects of class
213
+ HttpResourcePath.
214
+
215
+ To determine if `fsspec` is enabled, this method inspects the presence
216
+ of the environment variable `LSST_HTTP_ENABLE_FSSPEC` (with any value).
217
+ """
218
+ if self ._fsspec_is_enabled is not None :
219
+ return self ._fsspec_is_enabled
220
+
221
+ self ._fsspec_is_enabled = "LSST_HTTP_ENABLE_FSSPEC" in os .environ
222
+ return self ._fsspec_is_enabled
223
+
209
224
@property
210
225
def timeout (self ) -> tuple [float , float ]:
211
226
"""Return a tuple with the values of timeouts for connecting to the
@@ -734,6 +749,10 @@ class HttpResourcePath(ResourcePath):
734
749
via a PUT request. No digest is requested if this variable is not set
735
750
or is set to an invalid value.
736
751
Valid values are those in ACCEPTED_DIGESTS.
752
+
753
+ - LSST_HTTP_ENABLE_FSSPEC: the presence of this environment variable
754
+ activates the usage of `fsspec` compatible file system to read
755
+ a HTTP URL. The value of the variable is not inspected.
737
756
"""
738
757
739
758
# WebDAV servers known to be able to sign URLs. The values are lowercased
@@ -742,7 +761,7 @@ class HttpResourcePath(ResourcePath):
742
761
SUPPORTED_URL_SIGNERS = ("dcache" , "xrootd" )
743
762
744
763
# Configuration items for this class instances.
745
- _config = HttpResourcePathConfig ()
764
+ _config : HttpResourcePathConfig = HttpResourcePathConfig ()
746
765
747
766
# The session for metadata requests is used for interacting with
748
767
# the front end servers for requests such as PROPFIND, HEAD, etc. Those
@@ -897,6 +916,15 @@ def server_signs_urls(self) -> bool:
897
916
"""
898
917
return self .server in HttpResourcePath .SUPPORTED_URL_SIGNERS
899
918
919
+ @classmethod
920
+ def _reload_config (cls ) -> None :
921
+ """Reload the configuration for all instances of this class. That
922
+ configuration is instantiated from the environment.
923
+
924
+ This is an internal method mainly intended for tests.
925
+ """
926
+ HttpResourcePath ._config = HttpResourcePathConfig ()
927
+
900
928
def exists (self ) -> bool :
901
929
"""Check that a remote HTTP resource exists."""
902
930
log .debug ("Checking if resource exists: %s" , self .geturl ())
@@ -1308,6 +1336,26 @@ def to_fsspec(self) -> tuple[AbstractFileSystem, str]:
1308
1336
f"method HttpResourcePath.to_fsspec() not implemented for directory { self } "
1309
1337
)
1310
1338
1339
+ # If usage of fsspec-compatible file system is disabled in the
1340
+ # configuration we raise an exception which signals the caller
1341
+ # that it cannot use fsspec. An example of such a caller is
1342
+ # `lsst.daf.butler.formatters.ParquetFormatter`.
1343
+ #
1344
+ # Note that we don't call super().to_fsspec() since that method
1345
+ # assumes that fsspec can be used provided fsspec package is
1346
+ # importable.
1347
+ #
1348
+ # The motivation for making this configurable is that for HTTP
1349
+ # URLs fsspec.HTTPFileSystem uses async I/O and we have found
1350
+ # unexpected behavior by clients when used against dCache for reading
1351
+ # parquet files via a ParquetFormatter instance. That behavior cannot
1352
+ # be reproduced when using other callers.
1353
+ #
1354
+ # This needs more investigation to discard the possibility that async
1355
+ # I/O, used by fsspec.HTTPFileSystem, is related to this behavior.
1356
+ if not self ._config .fsspec_is_enabled :
1357
+ raise ImportError ("fsspec is disabled for HttpResourcePath objects with webDAV back end" )
1358
+
1311
1359
async def get_client_session (** kwargs : Any ) -> ClientSession :
1312
1360
"""Return a aiohttp.ClientSession configured to use an
1313
1361
`aiohttp.TCPConnector` shared by all instances of this class.
@@ -1325,14 +1373,42 @@ async def get_client_session(**kwargs: Any) -> ClientSession:
1325
1373
TCP connections to the server.
1326
1374
"""
1327
1375
if HttpResourcePath ._tcp_connector is None :
1328
- HttpResourcePath ._tcp_connector = TCPConnector (ssl = self ._config .ssl_context )
1376
+ HttpResourcePath ._tcp_connector = TCPConnector (
1377
+ # SSL context equipped with client credentials and
1378
+ # configured to validate server certificates.
1379
+ ssl = self ._config .ssl_context ,
1380
+ # Total number of simultaneous connections this connector
1381
+ # keeps open with any host.
1382
+ #
1383
+ # The default is 100 but we deliberately reduced it to
1384
+ # avoid keeping a large number of open connexions to file
1385
+ # servers when thousands of quanta execute simultaneously.
1386
+ #
1387
+ # In any case, new connexions are automatically established
1388
+ # when needed.
1389
+ limit = 10 ,
1390
+ # Number of simultaneous connections to a single host:port.
1391
+ limit_per_host = 1 ,
1392
+ # Close network connection after usage
1393
+ force_close = True ,
1394
+ )
1329
1395
1330
- return ClientSession (connector = HttpResourcePath ._tcp_connector , ** kwargs )
1396
+ connect_timeout , read_timeout = self ._config .timeout
1397
+ return ClientSession (
1398
+ connector = HttpResourcePath ._tcp_connector ,
1399
+ timeout = ClientTimeout (
1400
+ connect = connect_timeout ,
1401
+ sock_connect = connect_timeout ,
1402
+ sock_read = read_timeout ,
1403
+ total = 2 * read_timeout ,
1404
+ ),
1405
+ ** kwargs ,
1406
+ )
1331
1407
1332
- # Retrieve a signed URL for download valid for 1 hour .
1333
- url = self .generate_presigned_get_url (expiration_time_seconds = 3_600 )
1408
+ # Retrieve a signed URL for download valid for 2 hours .
1409
+ url = self .generate_presigned_get_url (expiration_time_seconds = 2 * 3_600 )
1334
1410
1335
- # HTTPFileSystem constructors accepts the argument 'block_size'. The
1411
+ # HTTPFileSystem constructor accepts the argument 'block_size'. The
1336
1412
# default value is 'fsspec.utils.DEFAULT_BLOCK_SIZE' which is 5 MB.
1337
1413
# That seems to be a reasonable block size for downloading files.
1338
1414
return HTTPFileSystem (get_client = get_client_session ), url
0 commit comments