Skip to content

Commit

Permalink
Make non-sgkit arrays optional
Browse files Browse the repository at this point in the history
  • Loading branch information
benjeffery committed Oct 31, 2022
1 parent 5810cf5 commit 9927560
Showing 1 changed file with 65 additions and 24 deletions.
89 changes: 65 additions & 24 deletions tsinfer/formats.py
Original file line number Diff line number Diff line change
Expand Up @@ -2232,6 +2232,12 @@ def __init__(self, path):

assert self.ploidy == self.data["call_genotype"].chunks[2]

def __metadata_schema_getter(self, zarr_group):
try:
return self.data[zarr_group].attrs["metadata_schema"]
except KeyError:
return {"codec": "json"}

@property
def format_name(self):
return self.FORMAT_NAME
Expand All @@ -2246,23 +2252,34 @@ def finalised(self):

@property
def sequence_length(self):
return int(np.max(self.data["variant_position"])) + 1
try:
return self.data.attrs["sequence_length"]
except KeyError:
return int(np.max(self.data["variant_position"])) + 1

@property
def num_sites(self):
return self._num_sites

@property
def sites_metadata_schema(self):
return {"codec": "json"}
return self.__metadata_schema_getter("sites")

@property
def sites_metadata(self):
return zarr.array([{}] * self.num_individuals, object_codec=numcodecs.JSON())
try:
return self.data["sites/metadata"]
except KeyError:
return zarr.array(
[{}] * self.num_individuals, object_codec=numcodecs.JSON()
)

@property
def sites_time(self):
return np.full(self.data["variant_position"].shape, tskit.UNKNOWN_TIME)
try:
return self.data["sites/time"]
except KeyError:
return np.full(self.data["variant_position"].shape, tskit.UNKNOWN_TIME)

@property
def sites_position(self):
Expand All @@ -2279,11 +2296,17 @@ def sites_genotypes(self):

@property
def provenances_timestamp(self):
return np.array([])
try:
return self.data["provenances_timestamp"]
except KeyError:
return np.array([], dtype=object)

@property
def provenances_record(self):
return np.array([])
try:
return self.data["provenances_record"]
except KeyError:
return np.array([], dtype=object)

@property
def num_samples(self):
Expand All @@ -2298,55 +2321,73 @@ def samples_individual(self):

@property
def metadata_schema(self):
return None
try:
return self.data.attrs["metadata_schema"]
except KeyError:
None

@property
def metadata(self):
return b""

@property
def populations(self):
return zarr.empty(0)

@property
def num_populations(self):
return 0
try:
return self.data.attrs["metadata_schema"]
except KeyError:
return b""

@property
def populations_metadata(self):
return zarr.array([{}] * self.num_individuals, object_codec=numcodecs.JSON())
try:
return self.data["populations/metadata"]
except KeyError:
return np.array([], dtype=object)

@property
def populations_metadata_schema(self):
return {"codec": "json"}
return self.__metadata_schema_getter("populations")

@property
def num_individuals(self):
return self._num_individuals

@property
def individuals_time(self):
return np.full(self.num_individuals, tskit.UNKNOWN_TIME)
try:
return self.data["individuals/time"]
except KeyError:
return np.full(self.num_individuals, tskit.UNKNOWN_TIME)

@property
def individuals_metadata_schema(self):
return {"codec": "json"}
return self.__metadata_schema_getter("individuals")

@property
def individuals_metadata(self):
return zarr.array([{}] * self.num_individuals, object_codec=numcodecs.JSON())
try:
return self.data["individuals/metadata"]
except KeyError:
return zarr.array(
[{}] * self.num_individuals, object_codec=numcodecs.JSON()
)

@property
def individuals_location(self):
return zarr.array([[]] * self.num_individuals, dtype=float)
try:
return self.data["individuals/location"]
except KeyError:
return zarr.array([[]] * self.num_individuals, dtype=float)

@property
def individuals_population(self):
return np.full((self.num_individuals), tskit.NULL, dtype=np.int32)
try:
return self.data["individuals/population"]
except KeyError:
return np.full((self.num_individuals), tskit.NULL, dtype=np.int32)

@property
def individuals_flags(self):
return np.full((self.num_individuals), 0, dtype=np.int32)
try:
return self.data["individuals/population"]
except KeyError:
return np.full((self.num_individuals), 0, dtype=np.int32)

def variants(self, sites=None, recode_ancestral=None):
"""
Expand Down

0 comments on commit 9927560

Please sign in to comment.