27
27
from tensorflow .python .ops import array_ops
28
28
29
29
30
- class QSeparableConv2DTransposeTPU (Conv2DTranspose ):
31
- """Quantized Separable Conv2DTranspose layer for TPU and GPU ."""
30
+ class QSeparableConv2DTranspose (Conv2DTranspose ):
31
+ """Quantized Separable Conv2DTranspose layer."""
32
32
33
33
# Most of these parameters follow the implementation of Conv2DTranspose
34
34
# in Keras, with the exception of following parameters.
@@ -42,17 +42,6 @@ class QSeparableConv2DTransposeTPU(Conv2DTranspose):
42
42
# we refer the reader to the documentation of Conv2DTranspose in Keras for
43
43
# the other parameters.
44
44
45
- # Important Notes:
46
- # This implementation requies the use of grouped convolution, which is only
47
- # supported in TPU/GPU, not in CPU.
48
- # When running in CPU, it gives the following error:
49
- # "Gradients for grouped convolutions are not supported on CPU.
50
- # Please file a feature request if you run into this issue."
51
- # For now we can train with this implmentation in TPU/GPU,
52
- # for inference in CPU, we will convert the layer to an equivalent
53
- # QSeparableConv2DTransposeCPU layer, which is slow in training,
54
- # but should suffice in inference.
55
-
56
45
def __init__ (self ,
57
46
filters ,
58
47
kernel_size ,
@@ -268,21 +257,48 @@ def conv_transpose_op(self, inputs, filters, strides, padding,
268
257
else :
269
258
quantized_kernel = kernel_weights
270
259
260
+ output_filters = 1 if is_depthwise else filters
261
+
271
262
if self .data_format == "channels_first" :
272
- output_shape = (batch_size , filters , out_height , out_width )
263
+ output_shape = (batch_size , output_filters , out_height , out_width )
273
264
else :
274
- output_shape = (batch_size , out_height , out_width , filters )
265
+ output_shape = (batch_size , out_height , out_width , output_filters )
275
266
276
267
output_shape_tensor = array_ops .stack (output_shape )
277
268
278
- outputs = tf .keras .backend .conv2d_transpose (
279
- inputs ,
280
- quantized_kernel ,
281
- output_shape_tensor ,
282
- strides = strides ,
283
- padding = padding ,
284
- data_format = self .data_format ,
285
- dilation_rate = dilation_rate )
269
+ # Split the input channels into groups.
270
+ x = tf .split (inputs , self ._input_shape [- 1 ], axis = - 1 )
271
+
272
+ if is_depthwise :
273
+ # For depthwise convolution, since CPU doesn't support grouped
274
+ # convolution, we run convolution on each slice of inputs and concat
275
+ # the results.
276
+ outputs = [
277
+ tf .keras .backend .conv2d_transpose (
278
+ x = x [i ],
279
+ kernel = quantized_kernel [:, :, :, i : i + 1 ],
280
+ output_shape = output_shape_tensor ,
281
+ strides = strides ,
282
+ padding = padding ,
283
+ data_format = self .data_format ,
284
+ dilation_rate = dilation_rate ,
285
+ )
286
+ for i in range (len (x ))
287
+ ]
288
+
289
+ # Concat the channels.
290
+ outputs = tf .concat (outputs , axis = - 1 )
291
+
292
+ else :
293
+ outputs = tf .keras .backend .conv2d_transpose (
294
+ inputs ,
295
+ quantized_kernel ,
296
+ output_shape_tensor ,
297
+ strides = strides ,
298
+ padding = padding ,
299
+ data_format = self .data_format ,
300
+ dilation_rate = dilation_rate ,
301
+ )
286
302
287
303
if not context .executing_eagerly ():
288
304
# Infer the static output shape:
@@ -386,92 +402,3 @@ def get_prunable_weights(self):
386
402
w .append (self .bias )
387
403
388
404
return w
389
-
390
-
391
- class QSeparableConv2DTransposeCPU (QSeparableConv2DTransposeTPU ):
392
- """CPU version of Quantized Separable Conv2DTranspose layer.
393
-
394
- Important Notes:
395
- * This implementation can run on TPU, GPU and CPU. But the training speed can
396
- be significantly slower than the TPU/GPU version.
397
-
398
- * QSeparableConv2DTransposeCPU and QSeparableConv2DTransposeTPU layer have
399
- the same shape on kernel and bias variables. With the same input and the same
400
- weights, the output of the two layers are the same.
401
-
402
- """
403
-
404
- def conv_transpose_op (self , inputs , filters , strides , padding ,
405
- output_padding , dilation_rate ,
406
- kernel_quantizer , kernel_weights , use_bias ,
407
- bias_quantizer , bias , activation , is_depthwise ):
408
- """Transpose convolution op that shared by both depthwise and pointwise."""
409
-
410
- batch_size , out_height , out_width , kernel_h , kernel_w = (
411
- self ._get_output_size (inputs , output_padding , padding , strides ,
412
- dilation_rate , kernel_weights ))
413
-
414
- if kernel_quantizer :
415
- quantized_kernel = kernel_quantizer (kernel_weights )
416
- else :
417
- quantized_kernel = kernel_weights
418
-
419
- output_filters = 1 if is_depthwise else filters
420
-
421
- if self .data_format == "channels_first" :
422
- output_shape = (batch_size , output_filters , out_height , out_width )
423
- else :
424
- output_shape = (batch_size , out_height , out_width , output_filters )
425
-
426
- output_shape_tensor = array_ops .stack (output_shape )
427
-
428
- # Split the input channels into groups.
429
- x = tf .split (inputs , self ._input_shape [- 1 ], axis = - 1 )
430
-
431
- if is_depthwise :
432
- # For depthwise convolution, since CPU doesn't support grouped
433
- # convolution, we run convolution on each slice of inputs and concat
434
- # the results.
435
- outputs = [
436
- tf .keras .backend .conv2d_transpose (
437
- x = x [i ],
438
- kernel = quantized_kernel [:, :, :, i : i + 1 ],
439
- output_shape = output_shape_tensor ,
440
- strides = strides ,
441
- padding = padding ,
442
- data_format = self .data_format ,
443
- dilation_rate = dilation_rate ) for i in range (len (x ))]
444
-
445
- # Concat the channels.
446
- outputs = tf .concat (outputs , axis = - 1 )
447
-
448
- else :
449
- outputs = tf .keras .backend .conv2d_transpose (
450
- inputs ,
451
- quantized_kernel ,
452
- output_shape_tensor ,
453
- strides = strides ,
454
- padding = padding ,
455
- data_format = self .data_format ,
456
- dilation_rate = dilation_rate )
457
-
458
- if not context .executing_eagerly ():
459
- # Infer the static output shape:
460
- out_shape = self .compute_final_output_shape (
461
- input_shape = inputs .shape ,
462
- kernel_size = (kernel_h , kernel_w ),
463
- strides = strides ,
464
- is_depthwise = is_depthwise )
465
- outputs .set_shape (out_shape )
466
-
467
- if use_bias :
468
- quantized_bias = bias_quantizer (bias ) if bias_quantizer else bias
469
- outputs = tf .keras .backend .bias_add (
470
- outputs ,
471
- quantized_bias ,
472
- data_format = self .data_format )
473
-
474
- if activation is not None :
475
- return activation (outputs )
476
-
477
- return outputs
0 commit comments