Skip to content

Commit

Permalink
Merge pull request #67 from axelboc/empty
Browse files Browse the repository at this point in the history
Support reading empty datasets and attributes
  • Loading branch information
bmaranville authored Mar 14, 2024
2 parents 790e673 + 19e5cc7 commit 5f92764
Show file tree
Hide file tree
Showing 4 changed files with 96 additions and 50 deletions.
28 changes: 16 additions & 12 deletions src/hdf5_hl.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -66,13 +66,13 @@ export declare class Attribute {
name: string;
metadata: Metadata;
dtype: Dtype;
shape: number[];
shape: number[] | null;
private _value?;
private _json_value?;
constructor(file_id: bigint, path: string, name: string);
get value(): OutputData;
get json_value(): JSONCompatibleOutputData;
to_array(): string | number | boolean | JSONCompatibleOutputData[];
get value(): OutputData | null;
get json_value(): JSONCompatibleOutputData | null;
to_array(): JSONCompatibleOutputData | null;
}
declare abstract class HasAttrs {
file_id: bigint;
Expand Down Expand Up @@ -132,14 +132,14 @@ export declare class Dataset extends HasAttrs {
refresh(): void;
get metadata(): Metadata;
get dtype(): Dtype;
get shape(): number[];
get shape(): number[] | null;
get filters(): Filter[];
get value(): OutputData;
get json_value(): JSONCompatibleOutputData;
slice(ranges: Slice[]): OutputData;
get value(): OutputData | null;
get json_value(): JSONCompatibleOutputData | null;
slice(ranges: Slice[]): OutputData | null;
write_slice(ranges: Slice[], data: any): void;
create_region_reference(ranges: Slice[]): RegionReference;
to_array(): string | number | boolean | JSONCompatibleOutputData[];
to_array(): JSONCompatibleOutputData | null;
resize(new_shape: number[]): number;
make_scale(scale_name?: string): void;
attach_scale(index: number, scale_dset_path: string): void;
Expand All @@ -148,16 +148,20 @@ export declare class Dataset extends HasAttrs {
get_scale_name(): string | null;
set_dimension_label(index: number, label: string): void;
get_dimension_labels(): (string | null)[];
_value_getter(json_compatible?: boolean): OutputData;
_value_getter(json_compatible?: false): OutputData | null;
_value_getter(json_compatible: true): JSONCompatibleOutputData | null;
_value_getter(json_compatible: boolean): OutputData | JSONCompatibleOutputData | null;
}
export declare class DatasetRegion {
source_dataset: Dataset;
region_reference: RegionReference;
private _metadata?;
constructor(source_dataset: Dataset, region_reference: RegionReference);
get metadata(): Metadata;
get value(): OutputData;
_value_getter(json_compatible?: boolean): OutputData;
get value(): OutputData | null;
_value_getter(json_compatible?: false): OutputData | null;
_value_getter(json_compatible: true): JSONCompatibleOutputData | null;
_value_getter(json_compatible: boolean): OutputData | JSONCompatibleOutputData | null;
}
export declare const h5wasm: {
File: typeof File;
Expand Down
82 changes: 56 additions & 26 deletions src/hdf5_hl.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,15 @@ function dirname(path: string) {
return head;
}

function get_attr(file_id: bigint, obj_name: string, attr_name: string, json_compatible: true): JSONCompatibleOutputData;
function get_attr(file_id: bigint, obj_name: string, attr_name: string, json_compatible: false): OutputData;
function get_attr(file_id: bigint, obj_name: string, attr_name: string, json_compatible: boolean): OutputData | JSONCompatibleOutputData;
function get_attr(file_id: bigint, obj_name: string, attr_name: string, json_compatible: boolean = false) {
function get_attr(file_id: bigint, obj_name: string, attr_name: string, json_compatible: true): JSONCompatibleOutputData | null;
function get_attr(file_id: bigint, obj_name: string, attr_name: string, json_compatible: false): OutputData | null;
function get_attr(file_id: bigint, obj_name: string, attr_name: string, json_compatible: boolean): OutputData | JSONCompatibleOutputData | null;
function get_attr(file_id: bigint, obj_name: string, attr_name: string, json_compatible: boolean = false): OutputData | JSONCompatibleOutputData | null {
let metadata = Module.get_attribute_metadata(file_id, obj_name, attr_name);
if (!metadata.shape) {
return null;
}

let nbytes = metadata.size * metadata.total_size;
let data_ptr = Module._malloc(nbytes);
var processed;
Expand Down Expand Up @@ -106,6 +110,7 @@ function process_data(data: Uint8Array, metadata: Metadata, json_compatible: boo
// but otherwise returns Uint8Array raw bytes as loaded.
let output_data: OutputData;
let { shape, type } = metadata;

let known_type = true;
// let length: number;
if (type === Module.H5T_class_t.H5T_STRING.value) {
Expand Down Expand Up @@ -153,9 +158,9 @@ function process_data(data: Uint8Array, metadata: Metadata, json_compatible: boo
else if (type === Module.H5T_class_t.H5T_COMPOUND.value) {
const { size, compound_type } = <{size: Metadata["size"], compound_type: CompoundTypeMetadata}>metadata;
let n = Math.floor(data.byteLength / size);
let output: OutputData[] = [];
let output: (OutputData | JSONCompatibleOutputData)[] = [];
for (let i = 0; i < n; i++) {
let row: OutputData = [];
let row: (OutputData | JSONCompatibleOutputData)[] = [];
let row_data = data.slice(i * size, (i + 1) * size);
for (let member of compound_type.members) {
let member_data = row_data.slice(member.offset, member.offset + member.size);
Expand All @@ -168,7 +173,7 @@ function process_data(data: Uint8Array, metadata: Metadata, json_compatible: boo

else if (type === Module.H5T_class_t.H5T_ARRAY.value) {
const { array_type } = <{array_type: Metadata}>metadata;
shape = shape.concat(array_type.shape);
shape = (<number[]>shape).concat(<number[]>array_type.shape);
array_type.shape = shape;
// always convert ARRAY types to base JS types:
output_data = process_data(data, array_type, true);
Expand Down Expand Up @@ -220,7 +225,7 @@ function process_data(data: Uint8Array, metadata: Metadata, json_compatible: boo
}

function isIterable(x: any): x is Iterable<unknown> {
return typeof x === 'object' && Symbol.iterator in x;
return typeof x === 'object' && x !== null && Symbol.iterator in x;
}

function isH5PYBooleanEnum(enum_type: EnumTypeMetadata) {
Expand Down Expand Up @@ -515,9 +520,9 @@ export class Attribute {
name: string;
metadata: Metadata;
dtype: Dtype;
shape: number[];
private _value?: OutputData;
private _json_value?: JSONCompatibleOutputData;
shape: number[] | null;
private _value?: OutputData | null;
private _json_value?: JSONCompatibleOutputData | null;

constructor(file_id: bigint, path: string, name: string) {
this.file_id = file_id;
Expand All @@ -529,27 +534,27 @@ export class Attribute {
this.shape = metadata.shape;
}

get value() {
get value(): OutputData | null {
if (typeof this._value === "undefined") {
this._value = get_attr(this.file_id, this.path, this.name, false);
}
return this._value;
}

get json_value() {
get json_value(): JSONCompatibleOutputData | null {
if (typeof this._json_value === "undefined") {
this._json_value = get_attr(this.file_id, this.path, this.name, true);
}
return this._json_value;
}

to_array() {
to_array(): JSONCompatibleOutputData | null {
const { json_value, metadata } = this;
const { shape } = metadata;
if (!isIterable(json_value) || typeof json_value === "string") {
return json_value;
}
return create_nested_array(json_value, shape);
return create_nested_array(json_value, <number[]>shape);
}
}

Expand Down Expand Up @@ -916,24 +921,28 @@ export class Dataset extends HasAttrs {
return Module.get_dataset_filters(this.file_id, this.path);
}

get value() {
get value(): OutputData | null {
return this._value_getter(false);
}

get json_value(): JSONCompatibleOutputData {
return this._value_getter(true) as JSONCompatibleOutputData;
get json_value(): JSONCompatibleOutputData | null {
return this._value_getter(true);
}

slice(ranges: Slice[]) {
slice(ranges: Slice[]): OutputData | null {
// interpret ranges as [start, stop], with one per dim.
const metadata = this.metadata;
// if auto_refresh is on, getting the metadata has triggered a refresh of the dataset_id;
const { shape } = metadata;
if (!shape) {
return null;
}

const {strides, count, offset} = calculateHyperslabParams(shape, ranges);
const total_size = count.reduce((previous, current) => current * previous, 1n);
const nbytes = metadata.size * Number(total_size);
const data_ptr = Module._malloc(nbytes);
let processed;
let processed: OutputData;
try {
Module.get_dataset_data(this.file_id, this.path, count, offset, strides, BigInt(data_ptr));
let data = Module.HEAPU8.slice(data_ptr, data_ptr + nbytes);
Expand All @@ -950,6 +959,9 @@ export class Dataset extends HasAttrs {
write_slice(ranges: Slice[], data: any) {
// interpret ranges as [start, stop], with one per dim.
let metadata = this.metadata;
if (!metadata.shape) {
throw new Error("cannot write to a slice of an empty dataset");
}
if (metadata.vlen) {
throw new Error("writing to a slice of vlen dtype is not implemented");
}
Expand All @@ -971,20 +983,24 @@ export class Dataset extends HasAttrs {

create_region_reference(ranges: Slice[]) {
const metadata = this.metadata;
if (!metadata.shape) {
throw new Error("cannot create region reference from empty dataset");
}

// interpret ranges as [start, stop], with one per dim.
const { shape } = metadata;
const {strides, count, offset} = calculateHyperslabParams(shape, ranges);
const ref_data = Module.create_region_reference(this.file_id, this.path, count, offset, strides);
return new RegionReference(ref_data);
}

to_array() {
to_array(): JSONCompatibleOutputData | null {
const { json_value, metadata } = this;
const { shape } = metadata;
if (!isIterable(json_value) || typeof json_value === "string") {
return json_value;
}
let nested = create_nested_array(json_value, shape);
let nested = create_nested_array(json_value, <number[]>shape);
return nested;
}

Expand Down Expand Up @@ -1033,8 +1049,15 @@ export class Dataset extends HasAttrs {
return Module.get_dimension_labels(this.file_id, this.path);
}

_value_getter(json_compatible=false) {
_value_getter(json_compatible?: false): OutputData | null;
_value_getter(json_compatible: true): JSONCompatibleOutputData | null;
_value_getter(json_compatible: boolean): OutputData | JSONCompatibleOutputData | null;
_value_getter(json_compatible=false): OutputData | JSONCompatibleOutputData | null {
let metadata = this.metadata;
if (!metadata.shape) {
return null
}

// if auto_refresh is on, getting the metadata has triggered a refresh of the dataset_id;
let nbytes = metadata.size * metadata.total_size;
let data_ptr = Module._malloc(nbytes);
Expand Down Expand Up @@ -1071,12 +1094,19 @@ export class DatasetRegion {
return this._metadata;
}

get value() {
get value(): OutputData | null {
return this._value_getter(false);
}

_value_getter(json_compatible=false) {
_value_getter(json_compatible?: false): OutputData | null;
_value_getter(json_compatible: true): JSONCompatibleOutputData | null;
_value_getter(json_compatible: boolean): OutputData | JSONCompatibleOutputData | null;
_value_getter(json_compatible=false): OutputData | JSONCompatibleOutputData | null {
let metadata = this.metadata;
if (!metadata.shape) {
return null;
}

// if auto_refresh is on, getting the metadata has triggered a refresh of the dataset_id;
let nbytes = metadata.size * metadata.total_size;
let data_ptr = Module._malloc(nbytes);
Expand Down Expand Up @@ -1108,7 +1138,7 @@ function create_nested_array(value: JSONCompatibleOutputData[], shape: number[])
const subdims = shape.slice(1).reverse();
for (let dim of subdims) {
// in each pass, replace input with array of slices of input
const new_output: JSONCompatibleOutputData = [];
const new_output: JSONCompatibleOutputData[][] = [];
const { length } = output;
let cursor = 0;
while (cursor < length) {
Expand Down
24 changes: 18 additions & 6 deletions src/hdf5_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -345,15 +345,26 @@ val get_abstractDS_metadata(hid_t dspace, hid_t dtype, hid_t dcpl)
{
val attr = get_dtype_metadata(dtype);

int rank = H5Sget_simple_extent_ndims(dspace);
int type = H5Sget_simple_extent_type(dspace);
int total_size = H5Sget_simple_extent_npoints(dspace);
attr.set("total_size", total_size);

if (type == H5S_NULL) {
attr.set("shape", val::null());
attr.set("maxshape", val::null());
attr.set("chunks", val::null());
return attr;
}

int rank = H5Sget_simple_extent_ndims(dspace);
std::vector<hsize_t> dims_out(rank);
std::vector<hsize_t> maxdims_out(rank);

int ndims = H5Sget_simple_extent_dims(dspace, dims_out.data(), maxdims_out.data());

val shape = val::array();
val maxshape = val::array();
for (int d = 0; d < ndims; d++)
{
for (int d = 0; d < ndims; d++) {
shape.set(d, (uint)dims_out.at(d));
maxshape.set(d, (uint)maxdims_out.at(d));
}
Expand All @@ -364,19 +375,20 @@ val get_abstractDS_metadata(hid_t dspace, hid_t dtype, hid_t dcpl)

if (dcpl) {
H5D_layout_t layout = H5Pget_layout(dcpl);

if (layout == H5D_CHUNKED) {
std::vector<hsize_t> chunk_dims_out(ndims);
H5Pget_chunk(dcpl, ndims, chunk_dims_out.data());

val chunks = val::array();
for (int c = 0; c < ndims; c++)
{
for (int c = 0; c < ndims; c++) {
chunks.set(c, (uint)chunk_dims_out.at(c));
}

attr.set("chunks", chunks);
}
}

attr.set("total_size", total_size);
return attr;
}

Expand Down
12 changes: 6 additions & 6 deletions src/hdf5_util_helpers.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,14 @@ export interface H5T_class_t {

export interface Metadata {
array_type?: Metadata,
chunks: Array<number> | null,
chunks: number[] | null,
compound_type?: CompoundTypeMetadata,
cset: number,
enum_type?: EnumTypeMetadata,
littleEndian: boolean,
maxshape: Array<number> | null,
maxshape: number[] | null,
ref_type?: 'object' | 'region',
shape: Array<number>,
shape: number[] | null,
signed: boolean,
size: number,
total_size: number,
Expand All @@ -40,7 +40,7 @@ export interface CompoundMember extends Metadata {
}

export interface CompoundTypeMetadata {
members: Array<CompoundMember>
members: CompoundMember[]
nmembers: number;
}

Expand Down Expand Up @@ -105,7 +105,7 @@ export interface H5Module extends EmscriptenModule {
reclaim_vlen_memory(file_id: BigInt, obj_name: string, attr_name: string, data_ptr: bigint): Status;
get_attribute_data(file_id: BigInt, obj_name: string, attr_name: string, arg3: bigint): Status;
FS: FS.FileSystemType,
get_keys_vector(group_id: bigint, H5_index_t: number): Array<string>,
get_keys_vector(group_id: bigint, H5_index_t: number): string[],
get_attribute_metadata(loc_id: bigint, group_name_string: string, attribute_name_string: string): Metadata,
get_plugin_search_paths(): string[],
insert_plugin_search_path(search_path: string, index: number): number,
Expand All @@ -116,7 +116,7 @@ export interface H5Module extends EmscriptenModule {
get_scale_name(loc_id: bigint, dimscale_dset_name: string): string | null,
get_attached_scales(loc_id: bigint, target_dset_name: string, index: number): string[],
set_dimension_label(loc_id: bigint, target_dset_name: string, index: number, label: string): number,
get_dimension_labels(loc_id: bigint, target_dset_name: string): Array<string | null>,
get_dimension_labels(loc_id: bigint, target_dset_name: string): (string | null)[],
create_object_reference(loc_id: bigint, target_name: string): Uint8Array,
create_region_reference(file_id: bigint, path: string, count: bigint[] | null, offset: bigint[] | null, strides: bigint[] | null): Uint8Array,
get_referenced_name(loc_id: bigint, ref_ptr: Uint8Array, is_object: boolean): string;
Expand Down

0 comments on commit 5f92764

Please sign in to comment.