-
Notifications
You must be signed in to change notification settings - Fork 0
/
h5otherrecompress3.py
125 lines (121 loc) · 4.22 KB
/
h5otherrecompress3.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import sys
import numpy
import h5py
import hdf5plugin
import shutil
import time
import os
import subprocess
xbasename=sys.argv[1] # the basename without the h5 extension
oldname=xbasename+".h5"
compression=sys.argv[2] # the new compression
if len(sys.argv) > 3:
clevel=sys.argv[3]
else:
clevel="2"
if compression.lower()=="zstd":
compression="zstd"
elif compression.lower()=="bszstd":
compression="bszstd"
elif compression.lower()=="szstd":
compression="szstd"
elif compression.lower()=="lz4":
compression="lz4"
elif compression.lower()=="bslz4":
compression="bslz4"
elif compression.lower()=="slz4":
compression="slz4"
elif compression.lower()=="blosclz4":
compression="blosclz4"
elif compression.lower()=="bloscbslz4":
compression="bloscbslz4"
elif compression.lower()=="bloscslz4":
compression="bloscslz4"
else:
print(" invalid compression choice: "+compression)
exit(1)
newname=os.path.join("/dev/shm",xbasename+"_"+compression+"_"+clevel+"_other.h5")
repackname=os.path.join("/dev/shm",xbasename+"_"+compression+"_"+clevel+"_repackother.h5")
print(" converting '"+oldname+"' to '"+newname+"'")
shutil.copyfile(oldname, newname)
with h5py.File(newname, 'r+') as hf: ## open in read/write mode
try:
myentry = hf['entry']
except:
print(" failed to find to level entry group ")
hf.close()
exit(1)
try:
mydatagroup = myentry["data"]
except:
print(" failed to find to entry/data group ")
hf.close()
exit(1)
try:
mydatadataset = mydatagroup["data"]
mydatasatesetattrs = mydatagroup["data"].attrs
print(mydatadataset)
mydata = numpy.copy(mydatadataset[:])
del mydatagroup["data"] ## deleting dataset!
print("mydata.shape: ",mydata.shape)
t1=time.process_time()
### write with hdf5plugin
if compression=="zstd":
mydatagroup.create_dataset(
'data',
data=mydata,chunks=( 1, 3269, 3110 ),
**hdf5plugin.Blosc(cname='zstd', clevel=clevel, shuffle=hdf5plugin.Blosc.NOSHUFFLE))
elif compression=="bszstd":
mydatagroup.create_dataset(
'data',
data=mydata,chunks=( 1, 3269, 3110 ),
**hdf5plugin.Blosc(cname='zstd', clevel=clevel, shuffle=hdf5plugin.Blosc.BITSHUFFLE))
elif compression=="szstd":
mydatagroup.create_dataset(
'data',
data=mydata,chunks=( 1, 3269, 3110 ),
**hdf5plugin.Blosc(cname='zstd', clevel=clevel, shuffle=hdf5plugin.Blosc.SHUFFLE))
elif compression=="lz4":
mydatagroup.create_dataset(
'data',
data=mydata,chunks=( 1, 3269, 3110 ),
**hdf5plugin.LZ4(nbytes=0))
elif compression=="bslz4":
mydatagroup.create_dataset(
'data',
data=mydata,chunks=( 1, 3269, 3110 ),
**hdf5plugin.Bitshuffle(nelems=0, lz4=True))
elif compression=="slz4":
mydatagroup.create_dataset(
'data',
data=mydata,chunks=( 1, 3269, 3110 ),
**hdf5plugin.Blosc(cname='lz4',clevel=clevel,shuffle=hdf5plugin.Blosc.SHUFFLE))
elif compression=="blosclz4":
mydatagroup.create_dataset(
'data',
data=mydata,chunks=( 1, 3269, 3110 ),
**hdf5plugin.Blosc(cname='lz4', clevel=clevel, shuffle=hdf5plugin.Blosc.NOSHUFFLE))
elif compression=="bloscbslz4":
mydatagroup.create_dataset(
'data',
data=mydata,chunks=( 1, 3269, 3110 ),
**hdf5plugin.Blosc(cname='lz4', clevel=clevel, shuffle=hdf5plugin.Blosc.BITSHUFFLE))
elif compression=="bloscslz4":
mydatagroup.create_dataset(
'data',
data=mydata,chunks=( 1, 3269, 3110 ),
**hdf5plugin.Blosc(cname='lz4', clevel=clevel, shuffle=hdf5plugin.Blosc.SHUFFLE))
else:
print(" unrecognized compression on write ")
hf.close()
exit(1)
t2=time.process_time()
print( "recompression time: ", t2-t1)
except:
print(" failed to get the data dataset, compression: ", compression," clevel: ", clevel)
hf.close()
os.system("/nsls2/users/hbernstein/bin/h5repack "+newname+" "+repackname)
os.system("rm "+newname)
os.system("/nsls2/users/hbernstein/bin/h5dump -pH "+repackname+"|grep ' SIZE'")
os.system("ls -alt "+repackname)
os.system("rm "+repackname)