Skip to content

Commit 0b24e3b

Browse files
committed
Add Resolvers to unify finding of resources
Add two classes to find files in pyiron resource paths, one for any resource files one specifically for executables that also parses out versions.
1 parent 553d8d4 commit 0b24e3b

File tree

12 files changed

+350
-0
lines changed

12 files changed

+350
-0
lines changed

pyiron_snippets/resources.py

Lines changed: 284 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,284 @@
1+
"""
2+
Classes to find data files and executables in global paths.
3+
"""
4+
from abc import ABC, abstractmethod
5+
from collections.abc import Iterator, Iterable
6+
import os
7+
import os.path
8+
from fnmatch import fnmatch
9+
from glob import glob
10+
import re
11+
from typing import Any
12+
13+
if os.name == "nt":
14+
EXE_SUFFIX = "bat"
15+
else:
16+
EXE_SUFFIX = "sh"
17+
18+
class ResourceNotFound(RuntimeError):
19+
pass
20+
21+
class AbstractResolver(ABC):
22+
"""
23+
Interface for resolvers.
24+
25+
Implementations must define :meth:`._search`, taking a tuple of names to search for and yielding instances of any
26+
type. Implementations should pick a single type to yield, e.g. :class:`.ResourceResolver` always yields absolute
27+
paths, while :class:`.ExecutableResolver` always yields 2-tuples of a version tag and absolute paths.
28+
"""
29+
@abstractmethod
30+
def _search(self, name: tuple[str]) -> Iterator[Any]:
31+
pass
32+
33+
def search(self, name: Iterable[str] | str = "*") -> Iterator[Any]:
34+
"""
35+
Yield all matches.
36+
37+
When `name` is given as an iterable, returned results match at least one of the `name` globs.
38+
39+
Args:
40+
name (str, iterable of str): file name to search for; can be an exact file name, a glob or list of those
41+
42+
Yields:
43+
object: resources matching `name`
44+
"""
45+
if name is not None and not isinstance(name, str):
46+
name = tuple(name)
47+
else:
48+
name = (name,)
49+
yield from self._search(name)
50+
51+
def list(self, name: Iterable[str] | str = "*") -> list[Any]:
52+
"""
53+
Return all matches.
54+
55+
Args:
56+
name (str, iterable of str): file name to search for; can be an exact file name, a glob or list of those
57+
58+
Returns:
59+
list: all matches returned by :meth:`.search`.
60+
"""
61+
return list(self.search(name))
62+
63+
def first(self, name: Iterable[str] | str = "*") -> Any:
64+
"""
65+
Return first match.
66+
67+
Args:
68+
name (str, iterable of str): file name to search for; can be an exact file name, a glob or list of those
69+
70+
Returns:
71+
object: the first match returned by :meth:`.search`.
72+
73+
Raises:
74+
:class:`~.ResourceNotFound`: if no matches are found.
75+
"""
76+
try:
77+
return next(iter(self.search(name)))
78+
except StopIteration:
79+
raise ResourceNotFound(f"Could not find {name} in {self}!") from None
80+
81+
def chain(self, *resolvers: "AbstractResolver") -> "ResolverChain":
82+
"""
83+
Return a new resolver that searches this and all given resolvers sequentially.
84+
85+
You will likely want to ensure that all given resolvers yield the same types and e.g. not mix ExecutableResolver
86+
and ResourceResolver, but this is not checked.
87+
88+
The advantage of using :meth:`.chain` rather than adding more paths to one resolver is when different paths have
89+
different internal sub structure, such as when combining resources from pyiron resources and conda data
90+
packages. When searching for lammps potential files, e.g. we have some folders that are set up as
91+
92+
<resources>/lammps/potentials/...
93+
94+
but iprpy conda package that ships the NIST potentials doesn't have the lammps/potentials
95+
96+
<iprpy>/...
97+
98+
With chaining we can do very easily
99+
100+
>>> ResourceResolver([<resources>], "lammps", "potentials").chain(
101+
... ResourceResolver([<iprpy>])) # doctest: +SKIP
102+
103+
without we'd need to modify the resource paths ourselves explicitly
104+
105+
>>> ResourceResolver([r + '/lammps/potentials' for r in <resources>] + [<iprpy>]) # doctest: +SKIP
106+
107+
which is a bit more awkward.
108+
109+
Args:
110+
resolvers (:class:`.AbstractResolver`): any number of sub resolvers
111+
112+
Returns:
113+
self: if `resolvers` is empty
114+
:class:`.ResolverChain`: otherwise
115+
"""
116+
if resolvers == ():
117+
return self
118+
return ResolverChain(self, *resolvers)
119+
120+
121+
class ResolverChain(AbstractResolver):
122+
"""
123+
A chain of resolvers. Matches are returned sequentially.
124+
"""
125+
__slots__ = ("_resolvers",)
126+
def __init__(self, *resolvers):
127+
"""
128+
Args:
129+
*resolvers (:class:`.AbstractResolver`): sub resolvers to use
130+
"""
131+
self._resolvers = resolvers
132+
133+
def _search(self, name):
134+
for resolver in self._resolvers:
135+
yield from resolver.search(name)
136+
137+
def __repr__(self):
138+
inner = ", ".join(repr(r) for r in self._resolvers)
139+
return f'{type(self).__name__}({inner})'
140+
141+
142+
class ResourceResolver(AbstractResolver):
143+
"""
144+
Generic resolver for files and directories.
145+
146+
Resources are expected to conform to the following format:
147+
<resource_path>/<module>/<subdir0>/<subdir1>/...
148+
149+
*All* entries within in this final `subdir` are yielded by :meth:`.search`, whether they are files or directories.
150+
Search results can be restricted by passing a (list of) globs. If a list is given, entries matching at least one of
151+
them are returned.
152+
153+
>>> exe = ResourceResolver(..., "lammps")
154+
>>> exe.list() # doctest: +SKIP
155+
[
156+
('v1', '/my/resources/lammps/bin/run_lammps_v1.sh),
157+
('v1_mpi', '/my/resources/lammps/bin/run_lammps_v1_mpi.sh),
158+
('v2_default', '/my/resources/lammps/bin/run_lammps_v2_default.sh),
159+
]
160+
>>> exe.default_version # doctest: +SKIP
161+
"v2_default"
162+
>>> exe.dict("v1*") # doctest: +SKIP
163+
{
164+
'v1': '/my/resources/lammps/bin/run_lammps_v1.sh),
165+
'v1_mpi': '/my/resources/lammps/bin/run_lammps_v1_mpi.sh)
166+
}
167+
"""
168+
__slots__ = "_resource_paths", "_module", "_subdirs"
169+
def __init__(self, resource_paths, module, *subdirs):
170+
"""
171+
Args:
172+
resource_paths (list of str): base paths for resource locations
173+
module (str): name of the module
174+
*subdirs (str): additional sub directories to descend into
175+
"""
176+
self._resource_paths = resource_paths
177+
self._module = module
178+
self._subdirs = subdirs
179+
180+
def __repr__(self):
181+
inner = repr(self._resource_paths)
182+
inner += f", {repr(self._module)}"
183+
inner += ", ".join(repr(s) for s in self._subdirs)
184+
return f"{type(self).__name__}({inner})"
185+
186+
def _search(self, name):
187+
for p in self._resource_paths:
188+
sub = os.path.join(p, self._module, *self._subdirs)
189+
if os.path.exists(sub):
190+
for n in name:
191+
yield from sorted(glob(os.path.join(sub, n)))
192+
193+
194+
class ExecutableResolver(AbstractResolver):
195+
"""
196+
A resolver for executable scripts.
197+
198+
Executables are expected to conform to the following format:
199+
<resource_path>/<module>/bin/run_<code>_<version_string>.<suffix>
200+
201+
and have the executable bit set. :meth:`.search` yields tuples of version strings and full paths to the executable
202+
instead of plain strings.
203+
204+
>>> exe = ExecutableResolver(..., "lammps")
205+
>>> exe.list() # doctest: +SKIP
206+
[
207+
('v1', '/my/resources/lammps/bin/run_lammps_v1.sh),
208+
('v1_mpi', '/my/resources/lammps/bin/run_lammps_v1_mpi.sh),
209+
('v2_default', '/my/resources/lammps/bin/run_lammps_v2_default.sh),
210+
]
211+
>>> exe.default_version # doctest: +SKIP
212+
"v2_default"
213+
>>> exe.dict("v1*") # doctest: +SKIP
214+
{
215+
'v1': '/my/resources/lammps/bin/run_lammps_v1.sh),
216+
'v1_mpi': '/my/resources/lammps/bin/run_lammps_v1_mpi.sh)
217+
}
218+
"""
219+
__slots__ = "_regex", "_resolver"
220+
def __init__(self, resource_paths, code, module=None, suffix=EXE_SUFFIX):
221+
"""
222+
Args:
223+
resource_paths (list of str): base paths for resource locations
224+
code (str): name of the simulation code
225+
module (str): name of the module the code is part of, same as `code` by default
226+
suffix (str): file ending; 'bat' on Windows 'sh' elsewhere
227+
"""
228+
if module is None:
229+
module = code
230+
self._regex = re.compile(f"run_{code}_(.*)\.{suffix}$")
231+
self._glob = f'run_{code}_*.{suffix}'
232+
self._resolver = ResourceResolver(
233+
resource_paths,
234+
module, 'bin',
235+
)
236+
237+
def _search(self, name):
238+
seen = set()
239+
def cond(path):
240+
isfile = os.path.isfile(path)
241+
isexec = os.access(path, os.X_OK, effective_ids=os.access in os.supports_effective_ids)
242+
return isfile and isexec
243+
for path in filter(cond, self._resolver.search(self._glob)):
244+
# we know that the regex has to match, because we constrain the resolver with the glob
245+
version = self._regex.search(path).group(1)
246+
if version not in seen and any(fnmatch(version, n) for n in name):
247+
yield (version, path)
248+
seen.add(version)
249+
250+
def dict(self, name="*") -> dict[str, str]:
251+
"""
252+
Construct dict from :meth:`.search` results.
253+
254+
Args:
255+
name (str or list of str): glob(s) to filter the version strings
256+
257+
Returns:
258+
dict: mapping version strings to full paths
259+
"""
260+
return dict(self.search(name=name))
261+
262+
@property
263+
def available_versions(self):
264+
"""
265+
list of str: all found versions
266+
"""
267+
return [x[0] for x in self.search("*")]
268+
269+
@property
270+
def default_version(self):
271+
"""
272+
str: the first version found in resources
273+
274+
If a version matching `*default*` exists, the first matching is returned.
275+
276+
Raises:
277+
:class:`.ResourceNotFound`: if no executables are found at all
278+
"""
279+
try:
280+
return self.first("*default*")[0]
281+
except ResourceNotFound:
282+
pass
283+
# try again outside the except clause to avoid nested error in case this fails as well
284+
return self.first("*")[0]

tests/unit/static/resources/res1/module1/bin/run_code1_version1.sh

Whitespace-only changes.

tests/unit/static/resources/res1/module1/bin/run_code1_version2.sh

Whitespace-only changes.

tests/unit/static/resources/res1/module1/bin/run_code1_versionnonexec.sh

Whitespace-only changes.

tests/unit/static/resources/res1/module1/bin/run_code2_version1.sh

Whitespace-only changes.

tests/unit/static/resources/res1/module1/bin/run_code2_version2_default.sh

Whitespace-only changes.

tests/unit/static/resources/res1/module1/bin/wrong_format

Whitespace-only changes.

tests/unit/static/resources/res1/module1/data/empty.txt

Whitespace-only changes.

tests/unit/static/resources/res1/module3/empty.txt

Whitespace-only changes.

tests/unit/static/resources/res2/module2/data/empty.txt

Whitespace-only changes.

tests/unit/static/resources/res2/module3/empty.txt

Whitespace-only changes.

tests/unit/test_resources.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
import os
2+
import os.path
3+
import unittest
4+
from pyiron_snippets.resources import ResourceNotFound, ResourceResolver, ExecutableResolver
5+
6+
class TestResolvers(unittest.TestCase):
7+
"""
8+
Class to test resolvers
9+
"""
10+
11+
@classmethod
12+
def setUpClass(cls):
13+
cls.static_path = os.path.join(os.path.dirname(__file__), "static", "resources")
14+
cls.res1 = os.path.join(cls.static_path, "res1")
15+
cls.res2 = os.path.join(cls.static_path, "res2")
16+
17+
def test_resource_resolver(self):
18+
res = ResourceResolver([self.res1], "module1")
19+
self.assertEqual(set(res.search()),
20+
{os.path.join(self.res1, "module1", "bin"),
21+
os.path.join(self.res1, "module1", "data")},
22+
"Simple search does not return all resources!")
23+
self.assertEqual(res.first(), tuple(res.search())[0],
24+
"first does not return first result!")
25+
self.assertEqual(list(res.search()), res.list(), "list not equal to search!")
26+
with self.assertRaises(ResourceNotFound, msg="first does not raise error on non existing resource!"):
27+
res.first("nonexisting")
28+
res = ResourceResolver([self.res1, self.res2], "module3")
29+
self.assertTrue(len(res.list("empty.txt")) == 2,
30+
msg="should find all instances of files with the same name.")
31+
32+
def test_order(self):
33+
"""search must return results in the order given by the resource paths."""
34+
self.assertTrue("res1" in ResourceResolver([self.res1, self.res2], "module3").first(),
35+
"resolver does not respect order of given resource paths!")
36+
self.assertTrue("res2" in ResourceResolver([self.res2, self.res1], "module3").first(),
37+
"resolver does not respect order of given resource paths!")
38+
self.assertEqual(tuple(os.path.basename(r) for r in ResourceResolver([self.res1], "module1").search()),
39+
tuple(sorted(("bin", "data"))),
40+
"search does not return results from the same folder in alphabetical order!")
41+
42+
def test_chain(self):
43+
"""chained resolvers must behave like normal resolvers."""
44+
chain = ResourceResolver([self.res1], "module3").chain(ResourceResolver([self.res2], "module3"))
45+
resol = ResourceResolver([self.res1, self.res2], "module3")
46+
47+
self.assertEqual(chain.first(), resol.first(),
48+
"first returns different result for chained and normal resolver!")
49+
self.assertEqual(tuple(chain.search()), tuple(resol.search()),
50+
"search returns different result for chained and normal resolver!")
51+
52+
def test_executable(self):
53+
res = ExecutableResolver([self.res1], code="code1", module="module1")
54+
self.assertNotIn("versionnonexec", res.available_versions,
55+
"ExecutableResolver must not list scripts that are not executable.")
56+
self.assertNotIn("wrong_format", res.available_versions,
57+
"ExecutableResolver must not list scripts that do not follow the correct format.")
58+
self.assertEqual("version1", res.default_version,
59+
"default version should be chosen in alphabetical order if not explicitly set.")
60+
res = ExecutableResolver([self.res1], code="code2", module="module1")
61+
print(res.list(), res.available_versions)
62+
self.assertEqual(res.default_version, "version2_default",
63+
"default version should be chosen as explicitly set.")
64+
65+
if __name__ == "__main__":
66+
unittest.main()

0 commit comments

Comments
 (0)