-
Notifications
You must be signed in to change notification settings - Fork 0
/
h5recompress.py
103 lines (99 loc) · 3.29 KB
/
h5recompress.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import sys
import numpy
import h5py
import hdf5plugin
import shutil
import time
import os
import subprocess
xbasename=sys.argv[1] # the basename without the h5 extension
oldname=xbasename+".h5"
compression=sys.argv[2] # the new compression
if len(sys.argv) > 3:
clevel=sys.argv[3]
else:
clevel="2"
if compression.lower()=="zstd":
compression="zstd"
elif compression.lower()=="bszstd":
compression="bszstd"
elif compression.lower()=="lz4":
compression="lz4"
elif compression.lower()=="bslz4":
compression="bslz4"
elif compression.lower()=="blosclz4":
compression="blosclz4"
elif compression.lower()=="bloscbslz4":
compression="bloscbslz4"
else:
print(" invalid compression choice: "+compression)
exit(1)
newname=os.path.join("/dev/shm",xbasename+"_"+compression+"_"+clevel+".h5")
repackname=os.path.join("/dev/shm",xbasename+"_"+compression+"_"+clevel+"_repack.h5")
print(" converting '"+oldname+"' to '"+newname+"'")
shutil.copyfile(oldname, newname)
with h5py.File(newname, 'r+') as hf: ## open in read/write mode
try:
myentry = hf['entry']
except:
print(" failed to find to level entry group ")
hf.close()
exit(1)
try:
mydatagroup = myentry["data"]
except:
print(" failed to find to entry/data group ")
hf.close()
exit(1)
try:
mydatadataset = mydatagroup["data"]
mydatasatesetattrs = mydatagroup["data"].attrs
print(mydatadataset)
mydata = numpy.copy(mydatadataset[:])
del mydatagroup["data"] ## deleting dataset!
t1=time.process_time()
### write with hdf5plugin
if compression=="zstd":
mydatagroup.create_dataset(
'data',
data=mydata,chunks=( 1, 3269, 3110 ),
**hdf5plugin.Blosc(cname='zstd', clevel=clevel, shuffle=hdf5plugin.Blosc.NOSHUFFLE))
elif compression=="bszstd":
mydatagroup.create_dataset(
'data',
data=mydata,chunks=( 1, 3269, 3110 ),
**hdf5plugin.Blosc(cname='zstd', clevel=clevel, shuffle=hdf5plugin.Blosc.SHUFFLE))
elif compression=="lz4":
mydatagroup.create_dataset(
'data',
data=mydata,chunks=( 1, 3269, 3110 ),
**hdf5plugin.LZ4(nbytes=0))
elif compression=="bslz4":
mydatagroup.create_dataset(
'data',
data=mydata,chunks=( 1, 3269, 3110 ),
**hdf5plugin.Bitshuffle(nelems=0, lz4=True))
elif compression=="blosclz4":
mydatagroup.create_dataset(
'data',
data=mydata,chunks=( 1, 3269, 3110 ),
**hdf5plugin.Blosc(cname='lz4', clevel=clevel, shuffle=hdf5plugin.Blosc.NOSHUFFLE))
elif compression=="bloscbslz4":
mydatagroup.create_dataset(
'data',
data=mydata,chunks=( 1, 3269, 3110 ),
**hdf5plugin.Blosc(cname='lz4', clevel=clevel, shuffle=hdf5plugin.Blosc.SHUFFLE))
else:
print(" unrecognized compression on write ")
hf.close()
exit(1)
t2=time.process_time()
print( "recompression time: ", t2-t1)
except:
print(" failed to get the data dataset")
hf.close()
os.system("/nsls2/users/hbernstein/bin/h5repack "+newname+" "+repackname)
os.system("rm "+newname)
os.system("/nsls2/users/hbernstein/bin/h5dump -pH "+repackname+"|grep ' SIZE'")
os.system("ls -alt "+repackname)
os.system("rm "+repackname)