From 9867dbaec53d02bb248545acae108675ba7738eb Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Fri, 18 Oct 2024 08:28:08 -0700 Subject: [PATCH 1/4] Propagate fragments with ResourcePath.join Previously fragments were either dropped completely (if a ResourcePath was given that had a fragment) or included in the file name (if a plain string was given). Now fragments are always propagated if the ResourcePath parsing finds a fragment. This means that if a filename has a "#" in it that "#" will now always be treated as a fragment and not part of the filename. --- python/lsst/resources/_resourcePath.py | 14 +++++++++----- python/lsst/resources/tests.py | 15 ++++++++++++++- 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/python/lsst/resources/_resourcePath.py b/python/lsst/resources/_resourcePath.py index 0581842..dc0f963 100644 --- a/python/lsst/resources/_resourcePath.py +++ b/python/lsst/resources/_resourcePath.py @@ -693,7 +693,7 @@ def join( quoted depending on the associated URI scheme. If the path looks like a URI referring to an absolute location, it will be returned directly (matching the behavior of `os.path.join`). It can - also be a `ResourcePath`. + also be a `ResourcePath`. Fragments are propagated. isTemporary : `bool`, optional Indicate that the resulting URI represents a temporary resource. Default is ``self.isTemporary``. @@ -723,6 +723,9 @@ def join( refer to a file. Use `updatedFile` if the file is to be replaced. + If an unquoted ``#`` is included in the path it is assumed to be + referring to a fragment and not part of the file name. + Raises ------ ValueError @@ -757,10 +760,10 @@ def join( # Absolute URI so return it directly. return path_uri - # If this was originally a ResourcePath extract the unquoted path from - # it. Otherwise we use the string we were given to allow "#" to appear - # in the filename if given as a plain string. - if not isinstance(path, str): + # We want to propagate fragments to the joined path and we rely on + # the ResourcePath parser to find these fragments for us even in plain + # strings. Must assume there are no `#` characters in filenames. + if not isinstance(path, str) or path_uri.fragment: path = path_uri.unquoted_path # Might need to quote the path. @@ -780,6 +783,7 @@ def join( path=newpath, forceDirectory=forceDirectory, isTemporary=isTemporary, + fragment=path_uri.fragment, ) def relative_to(self, other: ResourcePath) -> str | None: diff --git a/python/lsst/resources/tests.py b/python/lsst/resources/tests.py index e630dcf..f751d40 100644 --- a/python/lsst/resources/tests.py +++ b/python/lsst/resources/tests.py @@ -474,9 +474,22 @@ def test_join(self) -> None: self.assertFalse(up_relative.isdir()) self.assertEqual(up_relative.geturl(), f"{root_str}b/c.txt") + # Check that fragment is passed through join (simple unquoted case). + fnew3 = root.join("a/b.txt#fragment") + self.assertEqual(fnew3.fragment, "fragment") + self.assertEqual(fnew3.basename(), "b.txt", msg=f"Got: {fnew3._uri}") + + # Join a resource path. + subpath = ResourcePath("a/b.txt#fragment2", forceAbsolute=False, forceDirectory=False) + fnew3 = root.join(subpath) + self.assertEqual(fnew3.fragment, "fragment2") + self.assertEqual(fnew3.basename(), "b.txt", msg=f"Got: {fnew3._uri}") + + # Quoted string with fragment. quote_example = "hsc/payload/b&c.t@x#t" needs_quote = root.join(quote_example) - self.assertEqual(needs_quote.unquoted_path, "/" + quote_example) + self.assertEqual(needs_quote.unquoted_path, "/" + quote_example[:-2]) + self.assertEqual(needs_quote.fragment, "t") other = ResourcePath(f"{self.root}test.txt") self.assertEqual(root.join(other), other) From 5af1b27b98a8aa975bf405593553ccbe87ea6aef Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Tue, 22 Oct 2024 12:48:55 -0700 Subject: [PATCH 2/4] Ensure that URL signing retains fragment --- python/lsst/resources/http.py | 2 +- python/lsst/resources/s3.py | 6 +++++- tests/test_http.py | 4 ++-- tests/test_s3.py | 9 +++++++++ 4 files changed, 17 insertions(+), 4 deletions(-) diff --git a/python/lsst/resources/http.py b/python/lsst/resources/http.py index f8f6632..25f5f1d 100644 --- a/python/lsst/resources/http.py +++ b/python/lsst/resources/http.py @@ -1390,7 +1390,7 @@ def _sign_with_macaroon(self, activity: ActivityCaveat, expiration_time_seconds: try: response_body = json.loads(resp.text) if "macaroon" in response_body: - return f"{self}?authz={response_body['macaroon']}" + return str(self.replace(query=f"authz={response_body['macaroon']}")) else: raise ValueError(f"could not retrieve macaroon for URL {self}") except json.JSONDecodeError: diff --git a/python/lsst/resources/s3.py b/python/lsst/resources/s3.py index 6fd0937..bdfb708 100644 --- a/python/lsst/resources/s3.py +++ b/python/lsst/resources/s3.py @@ -582,8 +582,12 @@ def generate_presigned_put_url(self, *, expiration_time_seconds: int) -> str: return self._generate_presigned_url("put_object", expiration_time_seconds) def _generate_presigned_url(self, method: str, expiration_time_seconds: int) -> str: - return self.client.generate_presigned_url( + url = self.client.generate_presigned_url( method, Params={"Bucket": self._bucket, "Key": self.relativeToPathRoot}, ExpiresIn=expiration_time_seconds, ) + if self.fragment: + resource = ResourcePath(url) + url = str(resource.replace(fragment=self.fragment)) + return url diff --git a/tests/test_http.py b/tests/test_http.py index 1eaefe6..1d14413 100644 --- a/tests/test_http.py +++ b/tests/test_http.py @@ -469,9 +469,9 @@ def test_plain_http_url_signing(self): # Plain HTTP URLs are already readable without authentication, so # generating a pre-signed URL is a no-op. - path = ResourcePath("http://nonwebdav.test/file") + path = ResourcePath("http://nonwebdav.test/file#frag") self.assertEqual( - path.generate_presigned_get_url(expiration_time_seconds=300), "http://nonwebdav.test/file" + path.generate_presigned_get_url(expiration_time_seconds=300), "http://nonwebdav.test/file#frag" ) # Writing to an arbitrary plain HTTP URL is unlikely to work, so we diff --git a/tests/test_s3.py b/tests/test_s3.py index 9042398..f476582 100644 --- a/tests/test_s3.py +++ b/tests/test_s3.py @@ -169,6 +169,15 @@ def _test_url_signing_case(self, filename: str, test_data: bytes): get_url = s3_path.generate_presigned_get_url(expiration_time_seconds=3600) self._check_presigned_url(get_url, 3600) + # Check that fragments are retained. + s3_path = s3_path.replace(fragment="zip-path=X") + put_url = s3_path.generate_presigned_put_url(expiration_time_seconds=1800) + self.assertEqual(ResourcePath(put_url).fragment, "zip-path=X") + self._check_presigned_url(put_url, 1800) + get_url = s3_path.generate_presigned_get_url(expiration_time_seconds=3600) + self.assertEqual(ResourcePath(get_url).fragment, "zip-path=X") + self._check_presigned_url(get_url, 3600) + # Moto monkeypatches the 'requests' library to mock access to presigned # URLs, so we are able to use HttpResourcePath to access the URLs in # this test. From e31d6cb84b55aeb98e76f04f62210922c23b79e9 Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Tue, 22 Oct 2024 12:49:15 -0700 Subject: [PATCH 3/4] Add method to get unquoted fragment The fragment can be encoded but some users need the unquoted form. --- python/lsst/resources/_resourcePath.py | 7 ++++++- python/lsst/resources/tests.py | 6 ++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/python/lsst/resources/_resourcePath.py b/python/lsst/resources/_resourcePath.py index dc0f963..96bc22c 100644 --- a/python/lsst/resources/_resourcePath.py +++ b/python/lsst/resources/_resourcePath.py @@ -393,9 +393,14 @@ def is_root(self) -> bool: @property def fragment(self) -> str: - """Return the fragment component of the URI.""" + """Return the fragment component of the URI. May be quoted.""" return self._uri.fragment + @property + def unquoted_fragment(self) -> str: + """Return unquoted fragment.""" + return urllib.parse.unquote(self.fragment) + @property def params(self) -> str: """Return any parameters included in the URI.""" diff --git a/python/lsst/resources/tests.py b/python/lsst/resources/tests.py index f751d40..d11476f 100644 --- a/python/lsst/resources/tests.py +++ b/python/lsst/resources/tests.py @@ -444,6 +444,12 @@ def test_escapes(self) -> None: uri = ResourcePath(hash_path) self.assertEqual(uri.ospath, hash_path[:hpos]) self.assertEqual(uri.fragment, hash_path[hpos + 1 :]) + self.assertEqual(uri.unquoted_fragment, uri.fragment) + + # Fragments can be quoted, although this is not enforced anywhere. + with_frag = ResourcePath(self._make_uri("a/b.txt#" + urllib.parse.quote("zip-path=ingést"))) + self.assertEqual(with_frag.fragment, "zip-path%3Ding%C3%A9st") + self.assertEqual(with_frag.unquoted_fragment, "zip-path=ingést") def test_hash(self) -> None: """Test that we can store URIs in sets and as keys.""" From 1e556d821e4b965dbbd535992a9fd507b7c99328 Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Fri, 18 Oct 2024 08:45:09 -0700 Subject: [PATCH 4/4] Add news fragment --- doc/changes/DM-46776.feature.rst | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 doc/changes/DM-46776.feature.rst diff --git a/doc/changes/DM-46776.feature.rst b/doc/changes/DM-46776.feature.rst new file mode 100644 index 0000000..2bc45e9 --- /dev/null +++ b/doc/changes/DM-46776.feature.rst @@ -0,0 +1,5 @@ +* Modified ``ResourcePath.join()`` to propagate fragments from the given path to the joined path. +This now means that if the ``ResourcePath`` constructor finds a fragment that fragment will be used. +Previously the fragment was dropped if a ``ResourcePath`` was given that had a fragment, or the fragment was treated as part of the filename if a plain string was given. +This change means that filenames can no longer include ``#`` characters. +* Added new ``ResourcePath.unquoted_fragment`` property to get the unquoted fragment.