forked from Kappie/3d-scattering-transform
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmy_utils.py
244 lines (210 loc) · 8.73 KB
/
my_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
import numpy as np
import pyculib.fft
import scipy.fftpack
import time
import math
from numba import cuda, vectorize, jit
def extract_scattering_coefficients_cpu(X, phi, downsampling_resolution):
"""
Assumes X and phi are in Fourier space.
"""
return downsample(np.abs(scipy.fftpack.ifftn(X * phi)), downsampling_resolution)
def extract_scattering_coefficients(X, phi, downsampling_resolution):
"""
Implemented on GPU.
Assumes X and phi are in Fourier space.
"""
x, y, z = X.shape
X_gpu = cuda.to_device(X)
phi_gpu = cuda.to_device(phi)
blockspergrid, threadsperblock = get_blocks_and_threads(x, y, z)
# Take elementwise product (corresponds to convolution in spatial domain). Store result in `X_gpu.`
MultiplyInPlace[blockspergrid, threadsperblock](X_gpu, phi_gpu)
result = X_gpu
# Transform to spatial domain.
pyculib.fft.ifft_inplace(result)
# Take modulus.
ModulusInPlace[blockspergrid, threadsperblock](result)
result = result.copy_to_host()
n_elements = np.prod(result.shape)
# Normalise inverse fourier transformation.
result = (result / n_elements).real.astype(np.float32)
return downsample(result, downsampling_resolution)
def apply_wavelet_cpu(X, psi, downsampling_resolution):
"""
Assumes X and psi are in Fourier space.
"""
transform = np.downsample(np.abs(scipy.fftpack.ifftn(X * psi)), downsampling_resolution)
return scipy.fftpack.fftn(transform)
def apply_wavelet(X, psi, downsampling_resolution):
"""
Implemented partially on GPU.
Assumes X and psi are in Fourier space.
"""
return scipy.fftpack.fftn( extract_scattering_coefficients(X, psi, downsampling_resolution) )
@jit
def periodize(X, res):
dimensions = X.shape
downsampling_factor = 2**res
width, height, depth = dimensions[0]//downsampling_factor, dimensions[1]//downsampling_factor, dimensions[2]//downsampling_factor
result = np.zeros((width, height, depth), dtype=np.complex64)
for k in range(-width//2, width//2):
for l in range(-height//2, height//2):
for m in range(-depth//2, depth//2):
for a in range(downsampling_factor):
for b in range(downsampling_factor):
for c in range(downsampling_factor):
result[k, l, m] += X[k - a*(width//downsampling_factor), l - b*(height//downsampling_factor), m - c*(depth//downsampling_factor)]
return result/(downsampling_factor**3)
# def extract_scattering_coefficients(X, phi, downsampling_resolution):
# """
# Phi is already in fourier space. calculate | x \conv filter | downsampled at 2**downsampling_resolution.
# """
# x, y, z = X.shape
# X_gpu = cuda.to_device(X)
# phi_gpu = cuda.to_device(phi)
# result_full_scale = cuda.device_array_like(X)
# result = cuda.device_array((x//2**downsampling_resolution, y//2**downsampling_resolution, z//2**downsampling_resolution), dtype=np.complex64)
#
# # Fourier transform X
# pyculib.fft.fft_inplace(X_gpu)
# X_fourier = X_gpu
# # Multiply in Fourier space
# blockspergrid, threadsperblock = get_blocks_and_threads(x, y, z)
# MultiplyInPlace[blockspergrid, threadsperblock](X_fourier, phi_gpu)
# result_multiplication = X_fourier
#
# # Downsample in Fourier space by cropping the highest frequencies (resolution is inferred by shape of `result`.)
# blockspergrid, threadsperblock = get_blocks_and_threads(result.shape[0], result.shape[1], result.shape[2])
# crop_freq_3d_gpu[blockspergrid, threadsperblock](result_multiplication, result)
# # Transform to real space
# pyculib.fft.ifft_inplace(result)
# # Take absolute value
# ModulusInPlace[blockspergrid, threadsperblock](result)
# result = result.copy_to_host()
# n_elements = np.prod(result.shape)
# # normalise inverse fourier transformation
# return (result / n_elements).real.astype(np.float32)
#
#
# def extract_scattering_coefficients_cpu(X, phi, downsampling_resolution):
# """
# Phi is already in fourier space. calculate | x \conv filter | downsampled at 2**downsampling_resolution.
# """
# # Fourier transform X
# X_fourier = scipy.fftpack.fftn(X)
# # First low-pass filter: Extract zeroth order coefficients
# downsampled_product = crop_freq_3d( X_fourier * phi, downsampling_resolution )
# # Transform back to real space and take modulus.
# result = np.abs( scipy.fftpack.ifftn(downsampled_product) )
# return result
#
#
# def abs_after_convolve(A, B, downsampling_resolution):
# """
# A and B are both in real space. Calculate | A \conv B |.
# """
# x, y, z = A.shape
# A_gpu = cuda.to_device(A)
# B_gpu = cuda.to_device(B)
# result_full_scale = cuda.device_array_like(A)
# result = cuda.device_array((x//2**downsampling_resolution, y//2**downsampling_resolution, z//2**downsampling_resolution), dtype=np.complex64)
#
# # Fourier transform X
# pyculib.fft.fft_inplace(A_gpu)
# A_fourier = A_gpu
# pyculib.fft.fft_inplace(B_gpu)
# B_fourier = B_gpu
#
# # Multiply in Fourier space
# blockspergrid, threadsperblock = get_blocks_and_threads(x, y, z)
# MultiplyInPlace[blockspergrid, threadsperblock](A_fourier, B_fourier)
# result_multiplication = A_fourier
#
# # Downsample in Fourier space by cropping the highest frequencies (resolution is inferred by shape of `result`.)
# blockspergrid, threadsperblock = get_blocks_and_threads(result.shape[0], result.shape[1], result.shape[2])
# crop_freq_3d_gpu[blockspergrid, threadsperblock](result_multiplication, result)
# # Transform to real space
# pyculib.fft.ifft_inplace(result)
# # Take absolute value
# ModulusInPlace[blockspergrid, threadsperblock](result)
# result = result.copy_to_host()
# n_elements = np.prod(result.shape)
# # normalise inverse fourier transformation
# return (result / n_elements).astype(np.complex64)
#
#
# def abs_after_convolve_cpu(A, B, downsampling_resolution):
# """
# A and B are both in real space. Calculate | A \conv B |.
# """
# A_fourier = scipy.fftpack.fftn(A)
# B_fourier = scipy.fftpack.fftn(B)
# downsampled_product = crop_freq_3d(A_fourier * B_fourier, downsampling_resolution)
# return np.abs( scipy.fftpack.ifftn(downsampled_product) ).astype(np.complex64)
def crop_freq_3d(x, res):
"""
Crop highest (1 - 2^-res) part of a fourier spectrum.
(So for res = 1, cut highest half of the spectrum, res = 2 cut highest 3/4, etc.)
This comes down to only taking the dim/(2**(res+1)) elements at the front and end of each dimension of the original array.
In 2D, for res = 1 and a 4x4 input, you would get (taking only the single element at the front and back of each dimension)
[[a00 a03], [a30, a33]]
Corresponds to a spatial downsampling of the image by a factor (res + 1).
Expects dimensions of array to be powers of 2.
"""
if res == 0:
return x
M, N, O = x.shape[0], x.shape[1], x.shape[2]
end_x, end_y, end_z = [int(dim * 2 ** (-res - 1)) for dim in [M, N, O]]
indices_x, indices_y, indices_z = [ list(range(end_index)) + list(range(-end_index, 0)) for end_index in [end_x, end_y, end_z] ]
indices = np.ix_(indices_x, indices_y, indices_z)
return x[indices]
def get_blocks_and_threads(x, y, z):
if x < 8 or y < 8 or z < 8:
threadsperblock = (x, y, z)
else:
threadsperblock = (8, 8, 8)
blockspergrid_x = math.ceil(x / threadsperblock[0])
blockspergrid_y = math.ceil(y / threadsperblock[1])
blockspergrid_z = math.ceil(z / threadsperblock[2])
blockspergrid = (blockspergrid_x, blockspergrid_y, blockspergrid_z)
return blockspergrid, threadsperblock
def downsample(X, res):
"""
Downsampling in real space.
"""
return np.ascontiguousarray(X[::2**res, ::2**res, ::2**res])
@cuda.jit()
def crop_freq_3d_gpu(signal_fourier, result):
"""
Result needs to be the correct size, i.e.
(original_width // 2**res, original_height // 2**res, original_depth // 2**res)
"""
x, y, z = cuda.grid(3)
width, height, depth = result.shape
if x < (width // 2):
i = x
elif x < width:
i = -width + x
if y < (height // 2):
j = y
elif y < height:
j = -height + y
if z < (depth // 2):
k = z
elif z < depth:
k = -depth + z
result[x, y, z] = signal_fourier[i, j, k]
@cuda.jit()
def MultiplyInPlace(A, B):
"""
Result is saved in A
"""
x, y, z = cuda.grid(3)
if x < A.shape[0] and y < A.shape[1] and z < A.shape[2]:
A[x, y, z] = A[x, y, z] * B[x, y, z]
@cuda.jit()
def ModulusInPlace(A):
x, y, z = cuda.grid(3)
if x < A.shape[0] and y < A.shape[1] and z < A.shape[2]:
A[x, y, z] = abs(A[x, y, z])