|
| 1 | +""" |
| 2 | +Classes to find data files and executables in global paths. |
| 3 | +""" |
| 4 | +from abc import ABC, abstractmethod |
| 5 | +from collections.abc import Iterator, Iterable |
| 6 | +import os |
| 7 | +import os.path |
| 8 | +from fnmatch import fnmatch |
| 9 | +from glob import glob |
| 10 | +import re |
| 11 | +from typing import Any |
| 12 | + |
| 13 | +if os.name == "nt": |
| 14 | + EXE_SUFFIX = "bat" |
| 15 | +else: |
| 16 | + EXE_SUFFIX = "sh" |
| 17 | + |
| 18 | +class ResourceNotFound(RuntimeError): |
| 19 | + pass |
| 20 | + |
| 21 | +class AbstractResolver(ABC): |
| 22 | + """ |
| 23 | + Interface for resolvers. |
| 24 | +
|
| 25 | + Implementations must define :meth:`._search`, taking a tuple of names to search for and yielding instances of any |
| 26 | + type. Implementations should pick a single type to yield, e.g. :class:`.ResourceResolver` always yields absolute |
| 27 | + paths, while :class:`.ExecutableResolver` always yields 2-tuples of a version tag and absolute paths. |
| 28 | + """ |
| 29 | + @abstractmethod |
| 30 | + def _search(self, name: tuple[str]) -> Iterator[Any]: |
| 31 | + pass |
| 32 | + |
| 33 | + def search(self, name: Iterable[str] | str = "*") -> Iterator[Any]: |
| 34 | + """ |
| 35 | + Yield all matches. |
| 36 | +
|
| 37 | + When `name` is given as an iterable, returned results match at least one of the `name` globs. |
| 38 | +
|
| 39 | + Args: |
| 40 | + name (str, iterable of str): file name to search for; can be an exact file name, a glob or list of those |
| 41 | +
|
| 42 | + Yields: |
| 43 | + object: resources matching `name` |
| 44 | + """ |
| 45 | + if name is not None and not isinstance(name, str): |
| 46 | + name = tuple(name) |
| 47 | + else: |
| 48 | + name = (name,) |
| 49 | + yield from self._search(name) |
| 50 | + |
| 51 | + def list(self, name: Iterable[str] | str = "*") -> list[Any]: |
| 52 | + """ |
| 53 | + Return all matches. |
| 54 | +
|
| 55 | + Args: |
| 56 | + name (str, iterable of str): file name to search for; can be an exact file name, a glob or list of those |
| 57 | +
|
| 58 | + Returns: |
| 59 | + list: all matches returned by :meth:`.search`. |
| 60 | + """ |
| 61 | + return list(self.search(name)) |
| 62 | + |
| 63 | + def first(self, name: Iterable[str] | str = "*") -> Any: |
| 64 | + """ |
| 65 | + Return first match. |
| 66 | +
|
| 67 | + Args: |
| 68 | + name (str, iterable of str): file name to search for; can be an exact file name, a glob or list of those |
| 69 | +
|
| 70 | + Returns: |
| 71 | + object: the first match returned by :meth:`.search`. |
| 72 | +
|
| 73 | + Raises: |
| 74 | + :class:`~.ResourceNotFound`: if no matches are found. |
| 75 | + """ |
| 76 | + try: |
| 77 | + return next(iter(self.search(name))) |
| 78 | + except StopIteration: |
| 79 | + raise ResourceNotFound(f"Could not find {name} in {self}!") from None |
| 80 | + |
| 81 | + def chain(self, *resolvers: "AbstractResolver") -> "ResolverChain": |
| 82 | + """ |
| 83 | + Return a new resolver that searches this and all given resolvers sequentially. |
| 84 | +
|
| 85 | + You will likely want to ensure that all given resolvers yield the same types and e.g. not mix ExecutableResolver |
| 86 | + and ResourceResolver, but this is not checked. |
| 87 | +
|
| 88 | + The advantage of using :meth:`.chain` rather than adding more paths to one resolver is when different paths have |
| 89 | + different internal sub structure, such as when combining resources from pyiron resources and conda data |
| 90 | + packages. When searching for lammps potential files, e.g. we have some folders that are set up as |
| 91 | +
|
| 92 | + <resources>/lammps/potentials/... |
| 93 | +
|
| 94 | + but iprpy conda package that ships the NIST potentials doesn't have the lammps/potentials |
| 95 | +
|
| 96 | + <iprpy>/... |
| 97 | +
|
| 98 | + With chaining we can do very easily |
| 99 | +
|
| 100 | + >>> ResourceResolver([<resources>], "lammps", "potentials").chain( |
| 101 | + ... ResourceResolver([<iprpy>])) # doctest: +SKIP |
| 102 | +
|
| 103 | + without we'd need to modify the resource paths ourselves explicitly |
| 104 | +
|
| 105 | + >>> ResourceResolver([r + '/lammps/potentials' for r in <resources>] + [<iprpy>]) # doctest: +SKIP |
| 106 | +
|
| 107 | + which is a bit more awkward. |
| 108 | +
|
| 109 | + Args: |
| 110 | + resolvers (:class:`.AbstractResolver`): any number of sub resolvers |
| 111 | +
|
| 112 | + Returns: |
| 113 | + self: if `resolvers` is empty |
| 114 | + :class:`.ResolverChain`: otherwise |
| 115 | + """ |
| 116 | + if resolvers == (): |
| 117 | + return self |
| 118 | + return ResolverChain(self, *resolvers) |
| 119 | + |
| 120 | + |
| 121 | +class ResolverChain(AbstractResolver): |
| 122 | + """ |
| 123 | + A chain of resolvers. Matches are returned sequentially. |
| 124 | + """ |
| 125 | + __slots__ = ("_resolvers",) |
| 126 | + def __init__(self, *resolvers): |
| 127 | + """ |
| 128 | + Args: |
| 129 | + *resolvers (:class:`.AbstractResolver`): sub resolvers to use |
| 130 | + """ |
| 131 | + self._resolvers = resolvers |
| 132 | + |
| 133 | + def _search(self, name): |
| 134 | + for resolver in self._resolvers: |
| 135 | + yield from resolver.search(name) |
| 136 | + |
| 137 | + def __repr__(self): |
| 138 | + inner = ", ".join(repr(r) for r in self._resolvers) |
| 139 | + return f'{type(self).__name__}({inner})' |
| 140 | + |
| 141 | + |
| 142 | +class ResourceResolver(AbstractResolver): |
| 143 | + """ |
| 144 | + Generic resolver for files and directories. |
| 145 | +
|
| 146 | + Resources are expected to conform to the following format: |
| 147 | + <resource_path>/<module>/<subdir0>/<subdir1>/... |
| 148 | +
|
| 149 | + *All* entries within in this final `subdir` are yielded by :meth:`.search`, whether they are files or directories. |
| 150 | + Search results can be restricted by passing a (list of) globs. If a list is given, entries matching at least one of |
| 151 | + them are returned. |
| 152 | +
|
| 153 | + >>> exe = ResourceResolver(..., "lammps") |
| 154 | + >>> exe.list() # doctest: +SKIP |
| 155 | + [ |
| 156 | + ('v1', '/my/resources/lammps/bin/run_lammps_v1.sh), |
| 157 | + ('v1_mpi', '/my/resources/lammps/bin/run_lammps_v1_mpi.sh), |
| 158 | + ('v2_default', '/my/resources/lammps/bin/run_lammps_v2_default.sh), |
| 159 | + ] |
| 160 | + >>> exe.default_version # doctest: +SKIP |
| 161 | + "v2_default" |
| 162 | + >>> exe.dict("v1*") # doctest: +SKIP |
| 163 | + { |
| 164 | + 'v1': '/my/resources/lammps/bin/run_lammps_v1.sh), |
| 165 | + 'v1_mpi': '/my/resources/lammps/bin/run_lammps_v1_mpi.sh) |
| 166 | + } |
| 167 | + """ |
| 168 | + __slots__ = "_resource_paths", "_module", "_subdirs" |
| 169 | + def __init__(self, resource_paths, module, *subdirs): |
| 170 | + """ |
| 171 | + Args: |
| 172 | + resource_paths (list of str): base paths for resource locations |
| 173 | + module (str): name of the module |
| 174 | + *subdirs (str): additional sub directories to descend into |
| 175 | + """ |
| 176 | + self._resource_paths = resource_paths |
| 177 | + self._module = module |
| 178 | + self._subdirs = subdirs |
| 179 | + |
| 180 | + def __repr__(self): |
| 181 | + inner = repr(self._resource_paths) |
| 182 | + inner += f", {repr(self._module)}" |
| 183 | + inner += ", ".join(repr(s) for s in self._subdirs) |
| 184 | + return f"{type(self).__name__}({inner})" |
| 185 | + |
| 186 | + def _search(self, name): |
| 187 | + for p in self._resource_paths: |
| 188 | + sub = os.path.join(p, self._module, *self._subdirs) |
| 189 | + if os.path.exists(sub): |
| 190 | + for n in name: |
| 191 | + yield from sorted(glob(os.path.join(sub, n))) |
| 192 | + |
| 193 | + |
| 194 | +class ExecutableResolver(AbstractResolver): |
| 195 | + """ |
| 196 | + A resolver for executable scripts. |
| 197 | +
|
| 198 | + Executables are expected to conform to the following format: |
| 199 | + <resource_path>/<module>/bin/run_<code>_<version_string>.<suffix> |
| 200 | +
|
| 201 | + and have the executable bit set. :meth:`.search` yields tuples of version strings and full paths to the executable |
| 202 | + instead of plain strings. |
| 203 | +
|
| 204 | + >>> exe = ExecutableResolver(..., "lammps") |
| 205 | + >>> exe.list() # doctest: +SKIP |
| 206 | + [ |
| 207 | + ('v1', '/my/resources/lammps/bin/run_lammps_v1.sh), |
| 208 | + ('v1_mpi', '/my/resources/lammps/bin/run_lammps_v1_mpi.sh), |
| 209 | + ('v2_default', '/my/resources/lammps/bin/run_lammps_v2_default.sh), |
| 210 | + ] |
| 211 | + >>> exe.default_version # doctest: +SKIP |
| 212 | + "v2_default" |
| 213 | + >>> exe.dict("v1*") # doctest: +SKIP |
| 214 | + { |
| 215 | + 'v1': '/my/resources/lammps/bin/run_lammps_v1.sh), |
| 216 | + 'v1_mpi': '/my/resources/lammps/bin/run_lammps_v1_mpi.sh) |
| 217 | + } |
| 218 | + """ |
| 219 | + __slots__ = "_regex", "_resolver" |
| 220 | + def __init__(self, resource_paths, code, module=None, suffix=EXE_SUFFIX): |
| 221 | + """ |
| 222 | + Args: |
| 223 | + resource_paths (list of str): base paths for resource locations |
| 224 | + code (str): name of the simulation code |
| 225 | + module (str): name of the module the code is part of, same as `code` by default |
| 226 | + suffix (str): file ending; 'bat' on Windows 'sh' elsewhere |
| 227 | + """ |
| 228 | + if module is None: |
| 229 | + module = code |
| 230 | + self._regex = re.compile(f"run_{code}_(.*)\.{suffix}$") |
| 231 | + self._glob = f'run_{code}_*.{suffix}' |
| 232 | + self._resolver = ResourceResolver( |
| 233 | + resource_paths, |
| 234 | + module, 'bin', |
| 235 | + ) |
| 236 | + |
| 237 | + def _search(self, name): |
| 238 | + seen = set() |
| 239 | + def cond(path): |
| 240 | + isfile = os.path.isfile(path) |
| 241 | + isexec = os.access(path, os.X_OK, effective_ids=os.access in os.supports_effective_ids) |
| 242 | + return isfile and isexec |
| 243 | + for path in filter(cond, self._resolver.search(self._glob)): |
| 244 | + # we know that the regex has to match, because we constrain the resolver with the glob |
| 245 | + version = self._regex.search(path).group(1) |
| 246 | + if version not in seen and any(fnmatch(version, n) for n in name): |
| 247 | + yield (version, path) |
| 248 | + seen.add(version) |
| 249 | + |
| 250 | + def dict(self, name="*") -> dict[str, str]: |
| 251 | + """ |
| 252 | + Construct dict from :meth:`.search` results. |
| 253 | +
|
| 254 | + Args: |
| 255 | + name (str or list of str): glob(s) to filter the version strings |
| 256 | +
|
| 257 | + Returns: |
| 258 | + dict: mapping version strings to full paths |
| 259 | + """ |
| 260 | + return dict(self.search(name=name)) |
| 261 | + |
| 262 | + @property |
| 263 | + def available_versions(self): |
| 264 | + """ |
| 265 | + list of str: all found versions |
| 266 | + """ |
| 267 | + return [x[0] for x in self.search("*")] |
| 268 | + |
| 269 | + @property |
| 270 | + def default_version(self): |
| 271 | + """ |
| 272 | + str: the first version found in resources |
| 273 | +
|
| 274 | + If a version matching `*default*` exists, the first matching is returned. |
| 275 | +
|
| 276 | + Raises: |
| 277 | + :class:`.ResourceNotFound`: if no executables are found at all |
| 278 | + """ |
| 279 | + try: |
| 280 | + return self.first("*default*")[0] |
| 281 | + except ResourceNotFound: |
| 282 | + pass |
| 283 | + # try again outside the except clause to avoid nested error in case this fails as well |
| 284 | + return self.first("*")[0] |
0 commit comments