Skip to content

Commit

Permalink
switch Index to be a frozenset
Browse files Browse the repository at this point in the history
  • Loading branch information
rabernat committed Jan 12, 2022
1 parent 4f8c052 commit 0ed0db6
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 26 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ repos:
- id: seed-isort-config

- repo: https://github.com/pre-commit/mirrors-mypy
rev: 'v0.910'
rev: 'v0.931'
hooks:
- id: mypy
exclude: tests
Expand Down
34 changes: 11 additions & 23 deletions pangeo_forge_recipes/patterns.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,19 @@
"""
Filename / URL patterns.
"""
from __future__ import annotations

import inspect
from dataclasses import dataclass, field, replace
from enum import Enum
from itertools import product
from typing import (
TYPE_CHECKING,
Any,
Callable,
ClassVar,
Dict,
Iterable,
FrozenSet,
Iterator,
List,
Optional,
Expand Down Expand Up @@ -68,7 +70,7 @@ class MergeDim:
operation: ClassVar[CombineOp] = CombineOp.MERGE


@dataclass(frozen=True)
@dataclass(frozen=True, order=True)
class DimIndex:
"""Object used to index a single dimension of a FilePattern or Recipe Chunks.
Expand All @@ -92,27 +94,13 @@ def __post_init__(self):
assert self.index < self.sequence_len


class Index(tuple):
"""A tuple of ``DimIndex`` objects.
The order of the indexes doesn't matter for comparision."""

def __new__(self, args: Iterable[DimIndex]):
# This validation really slows things down because we call Index a lot!
# if not all((isinstance(a, DimIndex) for a in args)):
# raise ValueError("All arguments must be DimIndex.")
# args_set = set(args)
# if len(set(args_set)) < len(tuple(args)):
# raise ValueError("Duplicate argument detected.")
return tuple.__new__(Index, args)

def __str__(self):
return ",".join(str(dim) for dim in self)

def __eq__(self, other):
return (set(self) == set(other)) and (len(self) == len(other))

def __hash__(self):
return hash(frozenset(self))
if TYPE_CHECKING:
# If we just do this, we can't initialize an Index by just writing
# Index(list_of_dims)
Index = FrozenSet[DimIndex]
else:
# But if we just do this, it won't pass mypy 😖
Index = frozenset[DimIndex]


CombineDim = Union[MergeDim, ConcatDim]
Expand Down
2 changes: 2 additions & 0 deletions pangeo_forge_recipes/recipes/reference_hdf_zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ def scan_file(chunk_key: ChunkKey, config: HDFReferenceRecipe):
ref_fname = os.path.basename(fname + ".json")
with file_opener(fname, **config.netcdf_storage_options) as fp:
protocol = getattr(getattr(fp, "fs", None), "protocol", None) # make mypy happy
if protocol is None:
raise ValueError("Couldn't determine protocol")
target_url = unstrip_protocol(fname, protocol)
config.metadata_cache[ref_fname] = create_hdf5_reference(fp, target_url, fname)

Expand Down
4 changes: 2 additions & 2 deletions pangeo_forge_recipes/recipes/xarray_zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,12 @@


def _input_metadata_fname(input_key: InputKey) -> str:
key_str = "-".join([f"{k.name}_{k.index}" for k in input_key])
key_str = "-".join([f"{k.name}_{k.index}" for k in sorted(input_key)])
return "input-meta-" + key_str + ".json"


def _input_reference_fname(input_key: InputKey) -> str:
key_str = "-".join([f"{k.name}_{k.index}" for k in input_key])
key_str = "-".join([f"{k.name}_{k.index}" for k in sorted(input_key)])
return "input-reference-" + key_str + ".json"


Expand Down

0 comments on commit 0ed0db6

Please sign in to comment.