Skip to content

Commit 0dfe638

Browse files
committed
Ensure we retain/can crawl remotely retrieved resources.
Closes: #25
1 parent ec3015b commit 0dfe638

File tree

3 files changed

+135
-64
lines changed

3 files changed

+135
-64
lines changed

referencing/_core.py

Lines changed: 74 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,16 @@
11
from __future__ import annotations
22

33
from collections.abc import Iterable, Iterator, Sequence
4-
from typing import Any, Callable, ClassVar, Generic, Protocol, Tuple, cast
4+
from typing import (
5+
Any,
6+
Callable,
7+
ClassVar,
8+
Generic,
9+
Protocol,
10+
Tuple,
11+
TypeVar,
12+
cast,
13+
)
514
from urllib.parse import unquote, urldefrag, urljoin
615

716
from attrs import evolve, field
@@ -252,16 +261,8 @@ def __getitem__(self, uri: URI) -> Resource[D]:
252261
"""
253262
try:
254263
return self._resources[uri]
255-
except LookupError:
256-
try:
257-
return self._retrieve(uri)
258-
except (
259-
exceptions.CannotDetermineSpecification,
260-
exceptions.NoSuchResource,
261-
):
262-
raise
263-
except Exception:
264-
raise exceptions.Unretrievable(ref=uri)
264+
except KeyError:
265+
raise exceptions.NoSuchResource(ref=uri)
265266

266267
def __iter__(self) -> Iterator[URI]:
267268
"""
@@ -288,6 +289,32 @@ def __repr__(self) -> str:
288289
summary = f"{pluralized}"
289290
return f"<Registry ({size} {summary})>"
290291

292+
def get_or_retrieve(self, uri: URI):
293+
"""
294+
Get a resource from the registry, crawling or retrieving if necessary.
295+
"""
296+
resource = self._resources.get(uri)
297+
if resource is not None:
298+
return Retrieved(registry=self, value=resource)
299+
300+
registry = self.crawl()
301+
resource = registry._resources.get(uri)
302+
if resource is not None:
303+
return Retrieved(registry=registry, value=resource)
304+
305+
try:
306+
resource = registry._retrieve(uri)
307+
except (
308+
exceptions.CannotDetermineSpecification,
309+
exceptions.NoSuchResource,
310+
):
311+
raise
312+
except Exception:
313+
raise exceptions.Unretrievable(ref=uri)
314+
else:
315+
registry = registry.with_resource(uri, resource)
316+
return Retrieved(registry=registry, value=resource)
317+
291318
def remove(self, uri: URI):
292319
"""
293320
Return a registry with the resource identified by a given URI removed.
@@ -308,7 +335,15 @@ def anchor(self, uri: URI, name: str):
308335
"""
309336
Retrieve the given anchor, which must already have been found.
310337
"""
311-
return self._anchors[uri, name]
338+
value = self._anchors.get((uri, name))
339+
if value is not None:
340+
return Retrieved(value=value, registry=self)
341+
342+
registry = self.crawl()
343+
value = registry._anchors.get((uri, name))
344+
if value is not None:
345+
return Retrieved(value=value, registry=registry)
346+
raise exceptions.NoSuchAnchor(ref=uri, resource=self[uri], anchor=name)
312347

313348
def contents(self, uri: URI) -> D:
314349
"""
@@ -424,10 +459,23 @@ def resolver_with_root(self, resource: Resource[D]) -> Resolver[D]:
424459
)
425460

426461

462+
T = TypeVar("T", AnchorType[Any], Resource[Any])
463+
464+
465+
@frozen
466+
class Retrieved(Generic[D, T]):
467+
"""
468+
A value retrieved from a `Registry`.
469+
"""
470+
471+
value: T
472+
registry: Registry[D]
473+
474+
427475
@frozen
428476
class Resolved(Generic[D]):
429477
"""
430-
A resolved reference.
478+
A reference resolved to its contents by a `Resolver`.
431479
"""
432480

433481
contents: D
@@ -486,44 +534,24 @@ def lookup(self, ref: URI) -> Resolved[D]:
486534
uri, fragment = self._base_uri, ref[1:]
487535
else:
488536
uri, fragment = urldefrag(urljoin(self._base_uri, ref))
489-
registry = self._registry
490-
resource = registry.get(uri)
491-
if resource is None:
492-
registry = registry.crawl()
493-
try:
494-
resource = registry[uri]
495-
except exceptions.NoSuchResource:
496-
raise exceptions.Unresolvable(ref=ref) from None
497-
except exceptions.Unretrievable:
498-
raise exceptions.Unresolvable(ref=ref)
537+
try:
538+
retrieved = self._registry.get_or_retrieve(uri)
539+
except exceptions.NoSuchResource:
540+
raise exceptions.Unresolvable(ref=ref) from None
541+
except exceptions.Unretrievable:
542+
raise exceptions.Unresolvable(ref=ref)
499543

500544
if fragment.startswith("/"):
501-
return resource.pointer(
502-
pointer=fragment,
503-
resolver=self._evolve(registry=registry, base_uri=uri),
504-
)
545+
resolver = self._evolve(registry=retrieved.registry, base_uri=uri)
546+
return retrieved.value.pointer(pointer=fragment, resolver=resolver)
505547

506548
if fragment:
507-
try:
508-
anchor = registry.anchor(uri, fragment)
509-
except LookupError:
510-
registry = registry.crawl()
511-
try:
512-
anchor = registry.anchor(uri, fragment)
513-
except LookupError:
514-
raise exceptions.NoSuchAnchor(
515-
ref=ref,
516-
resource=resource,
517-
anchor=fragment,
518-
)
519-
return anchor.resolve(
520-
resolver=self._evolve(registry=registry, base_uri=uri),
521-
)
549+
retrieved = retrieved.registry.anchor(uri, fragment)
550+
resolver = self._evolve(registry=retrieved.registry, base_uri=uri)
551+
return retrieved.value.resolve(resolver=resolver)
522552

523-
return Resolved(
524-
contents=resource.contents,
525-
resolver=self._evolve(registry=registry, base_uri=uri),
526-
)
553+
resolver = self._evolve(registry=retrieved.registry, base_uri=uri)
554+
return Resolved(contents=retrieved.value.contents, resolver=resolver)
527555

528556
def in_subresource(self, subresource: Resource[D]) -> Resolver[D]:
529557
"""

referencing/jsonschema.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from collections.abc import Sequence, Set
88
from typing import Any, Iterable, Union
99

10-
from referencing import Anchor, Registry, Resource, Specification
10+
from referencing import Anchor, Registry, Resource, Specification, exceptions
1111
from referencing._attrs import frozen
1212
from referencing._core import Resolved as _Resolved, Resolver as _Resolver
1313
from referencing.typing import URI, Anchor as AnchorType, Mapping
@@ -544,8 +544,8 @@ def resolve(self, resolver: _Resolver[Schema]):
544544
last = self.resource
545545
for uri, registry in resolver.dynamic_scope():
546546
try:
547-
anchor = registry.anchor(uri, self.name)
548-
except LookupError:
547+
anchor = registry.anchor(uri, self.name).value
548+
except exceptions.NoSuchAnchor:
549549
continue
550550
if isinstance(anchor, DynamicAnchor):
551551
last = anchor.resource

referencing/tests/test_core.py

Lines changed: 58 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -110,21 +110,17 @@ def test_crawl_finds_anchors_with_id(self):
110110
{"ID": "urn:bar", "anchors": {"foo": 12}},
111111
)
112112
registry = Registry().with_resource(resource.id(), resource)
113-
with pytest.raises(LookupError):
114-
registry.anchor(resource.id(), "foo")
115113

116-
assert registry.crawl().anchor(resource.id(), "foo") == Anchor(
114+
assert registry.crawl().anchor(resource.id(), "foo").value == Anchor(
117115
name="foo",
118116
resource=ID_AND_CHILDREN.create_resource(12),
119117
)
120118

121119
def test_crawl_finds_anchors_no_id(self):
122120
resource = ID_AND_CHILDREN.create_resource({"anchors": {"foo": 12}})
123121
registry = Registry().with_resource("urn:root", resource)
124-
with pytest.raises(LookupError):
125-
registry.anchor("urn:root", "foo")
126122

127-
assert registry.crawl().anchor("urn:root", "foo") == Anchor(
123+
assert registry.crawl().anchor("urn:root", "foo").value == Anchor(
128124
name="foo",
129125
resource=ID_AND_CHILDREN.create_resource(12),
130126
)
@@ -135,6 +131,16 @@ def test_contents(self):
135131
registry = Registry().with_resource(uri, resource)
136132
assert registry.contents(uri) == {"foo": "bar"}
137133

134+
def test_crawled_anchor(self):
135+
resource = ID_AND_CHILDREN.create_resource({"anchors": {"foo": "bar"}})
136+
registry = Registry().with_resource("urn:example", resource)
137+
retrieved = registry.anchor("urn:example", "foo")
138+
assert retrieved.value == Anchor(
139+
name="foo",
140+
resource=ID_AND_CHILDREN.create_resource("bar"),
141+
)
142+
assert retrieved.registry == registry.crawl()
143+
138144
def test_init(self):
139145
one = Resource.opaque(contents={})
140146
two = ID_AND_CHILDREN.create_resource({"foo": "bar"})
@@ -401,37 +407,41 @@ def test_remove_nonexistent_uri(self):
401407
def test_retrieve(self):
402408
foo = Resource.opaque({"foo": "bar"})
403409
registry = Registry(retrieve=lambda uri: foo)
404-
assert registry["urn:example"] == foo
410+
assert registry.get_or_retrieve("urn:example").value == foo
405411

406412
def test_retrieve_arbitrary_exception(self):
413+
foo = Resource.opaque({"foo": "bar"})
414+
407415
def retrieve(uri):
408416
if uri == "urn:succeed":
409-
return {}
417+
return foo
410418
raise Exception("Oh no!")
411419

412420
registry = Registry(retrieve=retrieve)
413-
assert registry["urn:succeed"] == {}
421+
assert registry.get_or_retrieve("urn:succeed").value == foo
414422
with pytest.raises(exceptions.Unretrievable):
415-
registry["urn:uhoh"]
423+
registry.get_or_retrieve("urn:uhoh")
416424

417425
def test_retrieve_no_such_resource(self):
426+
foo = Resource.opaque({"foo": "bar"})
427+
418428
def retrieve(uri):
419429
if uri == "urn:succeed":
420-
return {}
430+
return foo
421431
raise exceptions.NoSuchResource(ref=uri)
422432

423433
registry = Registry(retrieve=retrieve)
424-
assert registry["urn:succeed"] == {}
434+
assert registry.get_or_retrieve("urn:succeed").value == foo
425435
with pytest.raises(exceptions.NoSuchResource):
426-
registry["urn:uhoh"]
436+
registry.get_or_retrieve("urn:uhoh")
427437

428438
def test_retrieve_cannot_determine_specification(self):
429439
def retrieve(uri):
430440
return Resource.from_contents({})
431441

432442
registry = Registry(retrieve=retrieve)
433443
with pytest.raises(exceptions.CannotDetermineSpecification):
434-
registry["urn:uhoh"]
444+
registry.get_or_retrieve("urn:uhoh")
435445

436446
def test_retrieve_already_available_resource(self):
437447
def retrieve(uri):
@@ -440,6 +450,7 @@ def retrieve(uri):
440450
foo = Resource.opaque({"foo": "bar"})
441451
registry = Registry({"urn:example": foo})
442452
assert registry["urn:example"] == foo
453+
assert registry.get_or_retrieve("urn:example").value == foo
443454

444455
def test_retrieve_first_checks_crawlable_resource(self):
445456
def retrieve(uri):
@@ -687,7 +698,7 @@ def test_lookup_non_existent_anchor(self):
687698
with pytest.raises(exceptions.Unresolvable) as e:
688699
resolver.lookup(ref)
689700
assert e.value == exceptions.NoSuchAnchor(
690-
ref=ref,
701+
ref="urn:example",
691702
resource=root,
692703
anchor="noSuchAnchor",
693704
)
@@ -698,6 +709,38 @@ def test_lookup_retrieved_resource(self):
698709
resolved = resolver.lookup("http://example.com/")
699710
assert resolved.contents == resource.contents
700711

712+
def test_repeated_lookup_from_retrieved_resource(self):
713+
"""
714+
A (custom-)retrieved resource is added to the registry returned by
715+
looking it up.
716+
"""
717+
resource = Resource.opaque(contents={"foo": "baz"})
718+
once = [resource]
719+
720+
def retrieve(uri: str):
721+
return once.pop()
722+
723+
resolver = Registry(retrieve=retrieve).resolver()
724+
resolved = resolver.lookup("http://example.com/")
725+
assert resolved.contents == resource.contents
726+
727+
resolved = resolved.resolver.lookup("http://example.com/")
728+
assert resolved.contents == resource.contents
729+
730+
def test_repeated_anchor_lookup_from_retrieved_resource(self):
731+
resource = Resource.opaque(contents={"foo": "baz"})
732+
once = [resource]
733+
734+
def retrieve(uri: str):
735+
return once.pop()
736+
737+
resolver = Registry(retrieve=retrieve).resolver()
738+
resolved = resolver.lookup("http://example.com/")
739+
assert resolved.contents == resource.contents
740+
741+
resolved = resolved.resolver.lookup("#")
742+
assert resolved.contents == resource.contents
743+
701744
# FIXME: The tests below aren't really representable in the current
702745
# suite, though we should probably think of ways to do so.
703746

0 commit comments

Comments
 (0)