Skip to content

Commit 1308e5d

Browse files
Akshaya Purohitcopybara-github
Akshaya Purohit
authored andcommitted
Update qseparable_conv2d_transpose to have a single version for all platforms.
Upgrading the CPU version to be used everywhere. PiperOrigin-RevId: 685794266 Change-Id: I70dd34ee602d862169dbce8e6b7d5ef3ae2ebb9a
1 parent cbcc62e commit 1308e5d

File tree

3 files changed

+83
-181
lines changed

3 files changed

+83
-181
lines changed

qkeras/__init__.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,21 +21,20 @@
2121

2222
from .b2t import * # pylint: disable=wildcard-import
2323
from .estimate import * # pylint: disable=wildcard-import
24-
from .qlayers import * # pylint: disable=wildcard-import
25-
from .quantizers import * # pylint: disable=wildcard-import
24+
from .qconv2d_batchnorm import QConv2DBatchnorm
2625
from .qconvolutional import * # pylint: disable=wildcard-import
26+
from .qdepthwiseconv2d_batchnorm import QDepthwiseConv2DBatchnorm
27+
from .qlayers import * # pylint: disable=wildcard-import
2728
from .qmac import * # pylint: disable=wildcard-import
28-
from .qrecurrent import * # pylint: disable=wildcard-import
2929
from .qnormalization import * # pylint: disable=wildcard-import
3030
from .qoctave import * # pylint: disable=wildcard-import
3131
from .qpooling import * # pylint: disable=wildcard-import
32-
from .safe_eval import * # pylint: disable=wildcard-import
32+
from .qrecurrent import * # pylint: disable=wildcard-import
33+
from .qseparable_conv2d_transpose import QSeparableConv2DTranspose
3334
#from .qtools.run_qtools import QTools
3435
#from .qtools.settings import cfg
35-
from .qconv2d_batchnorm import QConv2DBatchnorm
36-
from .qdepthwiseconv2d_batchnorm import QDepthwiseConv2DBatchnorm
37-
from .qseparable_conv2d_transpose import QSeparableConv2DTransposeTPU
38-
from .qseparable_conv2d_transpose import QSeparableConv2DTransposeCPU
36+
from .quantizers import * # pylint: disable=wildcard-import
37+
from .safe_eval import * # pylint: disable=wildcard-import
3938

4039

4140
assert tf.executing_eagerly(), "QKeras requires TF with eager execution mode on"

qkeras/qseparable_conv2d_transpose.py

Lines changed: 39 additions & 112 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,8 @@
2727
from tensorflow.python.ops import array_ops
2828

2929

30-
class QSeparableConv2DTransposeTPU(Conv2DTranspose):
31-
"""Quantized Separable Conv2DTranspose layer for TPU and GPU."""
30+
class QSeparableConv2DTranspose(Conv2DTranspose):
31+
"""Quantized Separable Conv2DTranspose layer."""
3232

3333
# Most of these parameters follow the implementation of Conv2DTranspose
3434
# in Keras, with the exception of following parameters.
@@ -42,17 +42,6 @@ class QSeparableConv2DTransposeTPU(Conv2DTranspose):
4242
# we refer the reader to the documentation of Conv2DTranspose in Keras for
4343
# the other parameters.
4444

45-
# Important Notes:
46-
# This implementation requies the use of grouped convolution, which is only
47-
# supported in TPU/GPU, not in CPU.
48-
# When running in CPU, it gives the following error:
49-
# "Gradients for grouped convolutions are not supported on CPU.
50-
# Please file a feature request if you run into this issue."
51-
# For now we can train with this implmentation in TPU/GPU,
52-
# for inference in CPU, we will convert the layer to an equivalent
53-
# QSeparableConv2DTransposeCPU layer, which is slow in training,
54-
# but should suffice in inference.
55-
5645
def __init__(self,
5746
filters,
5847
kernel_size,
@@ -268,21 +257,48 @@ def conv_transpose_op(self, inputs, filters, strides, padding,
268257
else:
269258
quantized_kernel = kernel_weights
270259

260+
output_filters = 1 if is_depthwise else filters
261+
271262
if self.data_format == "channels_first":
272-
output_shape = (batch_size, filters, out_height, out_width)
263+
output_shape = (batch_size, output_filters, out_height, out_width)
273264
else:
274-
output_shape = (batch_size, out_height, out_width, filters)
265+
output_shape = (batch_size, out_height, out_width, output_filters)
275266

276267
output_shape_tensor = array_ops.stack(output_shape)
277268

278-
outputs = tf.keras.backend.conv2d_transpose(
279-
inputs,
280-
quantized_kernel,
281-
output_shape_tensor,
282-
strides=strides,
283-
padding=padding,
284-
data_format=self.data_format,
285-
dilation_rate=dilation_rate)
269+
# Split the input channels into groups.
270+
x = tf.split(inputs, self._input_shape[-1], axis=-1)
271+
272+
if is_depthwise:
273+
# For depthwise convolution, since CPU doesn't support grouped
274+
# convolution, we run convolution on each slice of inputs and concat
275+
# the results.
276+
outputs = [
277+
tf.keras.backend.conv2d_transpose(
278+
x=x[i],
279+
kernel=quantized_kernel[:, :, :, i : i + 1],
280+
output_shape=output_shape_tensor,
281+
strides=strides,
282+
padding=padding,
283+
data_format=self.data_format,
284+
dilation_rate=dilation_rate,
285+
)
286+
for i in range(len(x))
287+
]
288+
289+
# Concat the channels.
290+
outputs = tf.concat(outputs, axis=-1)
291+
292+
else:
293+
outputs = tf.keras.backend.conv2d_transpose(
294+
inputs,
295+
quantized_kernel,
296+
output_shape_tensor,
297+
strides=strides,
298+
padding=padding,
299+
data_format=self.data_format,
300+
dilation_rate=dilation_rate,
301+
)
286302

287303
if not context.executing_eagerly():
288304
# Infer the static output shape:
@@ -386,92 +402,3 @@ def get_prunable_weights(self):
386402
w.append(self.bias)
387403

388404
return w
389-
390-
391-
class QSeparableConv2DTransposeCPU(QSeparableConv2DTransposeTPU):
392-
"""CPU version of Quantized Separable Conv2DTranspose layer.
393-
394-
Important Notes:
395-
* This implementation can run on TPU, GPU and CPU. But the training speed can
396-
be significantly slower than the TPU/GPU version.
397-
398-
* QSeparableConv2DTransposeCPU and QSeparableConv2DTransposeTPU layer have
399-
the same shape on kernel and bias variables. With the same input and the same
400-
weights, the output of the two layers are the same.
401-
402-
"""
403-
404-
def conv_transpose_op(self, inputs, filters, strides, padding,
405-
output_padding, dilation_rate,
406-
kernel_quantizer, kernel_weights, use_bias,
407-
bias_quantizer, bias, activation, is_depthwise):
408-
"""Transpose convolution op that shared by both depthwise and pointwise."""
409-
410-
batch_size, out_height, out_width, kernel_h, kernel_w = (
411-
self._get_output_size(inputs, output_padding, padding, strides,
412-
dilation_rate, kernel_weights))
413-
414-
if kernel_quantizer:
415-
quantized_kernel = kernel_quantizer(kernel_weights)
416-
else:
417-
quantized_kernel = kernel_weights
418-
419-
output_filters = 1 if is_depthwise else filters
420-
421-
if self.data_format == "channels_first":
422-
output_shape = (batch_size, output_filters, out_height, out_width)
423-
else:
424-
output_shape = (batch_size, out_height, out_width, output_filters)
425-
426-
output_shape_tensor = array_ops.stack(output_shape)
427-
428-
# Split the input channels into groups.
429-
x = tf.split(inputs, self._input_shape[-1], axis=-1)
430-
431-
if is_depthwise:
432-
# For depthwise convolution, since CPU doesn't support grouped
433-
# convolution, we run convolution on each slice of inputs and concat
434-
# the results.
435-
outputs = [
436-
tf.keras.backend.conv2d_transpose(
437-
x=x[i],
438-
kernel=quantized_kernel[:, :, :, i : i + 1],
439-
output_shape=output_shape_tensor,
440-
strides=strides,
441-
padding=padding,
442-
data_format=self.data_format,
443-
dilation_rate=dilation_rate) for i in range(len(x))]
444-
445-
# Concat the channels.
446-
outputs = tf.concat(outputs, axis=-1)
447-
448-
else:
449-
outputs = tf.keras.backend.conv2d_transpose(
450-
inputs,
451-
quantized_kernel,
452-
output_shape_tensor,
453-
strides=strides,
454-
padding=padding,
455-
data_format=self.data_format,
456-
dilation_rate=dilation_rate)
457-
458-
if not context.executing_eagerly():
459-
# Infer the static output shape:
460-
out_shape = self.compute_final_output_shape(
461-
input_shape=inputs.shape,
462-
kernel_size=(kernel_h, kernel_w),
463-
strides=strides,
464-
is_depthwise=is_depthwise)
465-
outputs.set_shape(out_shape)
466-
467-
if use_bias:
468-
quantized_bias = bias_quantizer(bias) if bias_quantizer else bias
469-
outputs = tf.keras.backend.bias_add(
470-
outputs,
471-
quantized_bias,
472-
data_format=self.data_format)
473-
474-
if activation is not None:
475-
return activation(outputs)
476-
477-
return outputs

tests/qseparable_conv2d_transpose_test.py

Lines changed: 37 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -17,75 +17,53 @@
1717
from __future__ import absolute_import
1818
from __future__ import division
1919
from __future__ import print_function
20+
21+
import os
22+
import tempfile
23+
2024
import numpy as np
2125
from numpy.testing import assert_allclose, assert_equal
2226
import pytest
23-
import tempfile
24-
import os
25-
2627
import tensorflow as tf
2728

29+
from qkeras import QSeparableConv2DTranspose
2830
from qkeras import quantized_bits
29-
from qkeras import QSeparableConv2DTransposeTPU
30-
from qkeras import QSeparableConv2DTransposeCPU
3131

3232

33-
def create_model(for_tpu=True):
33+
def create_model():
3434
x = img_input = tf.keras.layers.Input(shape=(4, 4, 3))
35-
36-
if for_tpu:
37-
x = QSeparableConv2DTransposeTPU(
38-
filters=2, kernel_size=(2, 2),
39-
strides=(2, 2),
40-
padding="same", name="conv2d_tran",
41-
depthwise_activation=None,
42-
pointwise_activation=None,
43-
depthwise_kernel_quantizer=None,
44-
pointwise_kernel_quantizer=None,
45-
bias_quantizer=None,
46-
)(x)
47-
else:
48-
x = QSeparableConv2DTransposeCPU(
49-
filters=2, kernel_size=(2, 2),
50-
strides=(2, 2),
51-
padding="same", name="conv2d_tran",
52-
depthwise_activation=None,
53-
pointwise_activation=None,
54-
depthwise_kernel_quantizer=None,
55-
pointwise_kernel_quantizer=None,
56-
bias_quantizer=None,
57-
)(x)
35+
x = QSeparableConv2DTranspose(
36+
filters=2,
37+
kernel_size=(2, 2),
38+
strides=(2, 2),
39+
padding="same",
40+
name="conv2d_tran",
41+
depthwise_activation=None,
42+
pointwise_activation=None,
43+
depthwise_kernel_quantizer=None,
44+
pointwise_kernel_quantizer=None,
45+
bias_quantizer=None,
46+
)(x)
5847

5948
model = tf.keras.Model(inputs=img_input, outputs=x)
6049

6150
return model
6251

6352

64-
def create_quantized_model(for_tpu=True):
53+
def create_quantized_model():
6554
x = img_input = tf.keras.layers.Input(shape=(4, 4, 3))
66-
67-
if for_tpu:
68-
x = QSeparableConv2DTransposeTPU(
69-
filters=2, kernel_size=(2, 2),
70-
strides=(1, 1),
71-
padding="same", name="conv2d_tran",
72-
depthwise_activation="quantized_bits(10, 6, 1)",
73-
pointwise_activation="quantized_bits(5, 3, 1)",
74-
depthwise_kernel_quantizer=quantized_bits(1, 0, 1, alpha=1.0),
75-
pointwise_kernel_quantizer=quantized_bits(1, 0, 1, alpha=1.0),
76-
bias_quantizer=quantized_bits(2, 2, 1, alpha=1.0)
77-
)(x)
78-
else:
79-
x = QSeparableConv2DTransposeCPU(
80-
filters=2, kernel_size=(2, 2),
81-
strides=(1, 1),
82-
padding="same", name="conv2d_tran",
83-
depthwise_activation="quantized_bits(10, 6, 1)",
84-
pointwise_activation="quantized_bits(5, 3, 1)",
85-
depthwise_kernel_quantizer=quantized_bits(1, 0, 1, alpha=1.0),
86-
pointwise_kernel_quantizer=quantized_bits(1, 0, 1, alpha=1.0),
87-
bias_quantizer=quantized_bits(2, 2, 1, alpha=1.0)
88-
)(x)
55+
x = QSeparableConv2DTranspose(
56+
filters=2,
57+
kernel_size=(2, 2),
58+
strides=(1, 1),
59+
padding="same",
60+
name="conv2d_tran",
61+
depthwise_activation="quantized_bits(10, 6, 1)",
62+
pointwise_activation="quantized_bits(5, 3, 1)",
63+
depthwise_kernel_quantizer=quantized_bits(1, 0, 1, alpha=1.0),
64+
pointwise_kernel_quantizer=quantized_bits(1, 0, 1, alpha=1.0),
65+
bias_quantizer=quantized_bits(2, 2, 1, alpha=1.0),
66+
)(x)
8967

9068
model = tf.keras.Model(inputs=img_input, outputs=x)
9169

@@ -102,8 +80,8 @@ def test_qseparable_conv2d_transpose():
10280
# mapped from input channel(3) to output channel (2) by pointwise conv.
10381
# Pointwise conv output is (1, 8, 8, 2).
10482

105-
# Create model using CPU version: QSeparableConv2DTransposeCPU.
106-
model = create_model(for_tpu=False)
83+
# Create model.
84+
model = create_model()
10785

10886
output_shape = model.output_shape
10987
ws = model.layers[1].weights
@@ -161,9 +139,8 @@ def test_qseparable_conv2d_transpose():
161139
def test_quantization_in_separable_conv2d_transpose():
162140
# Test if quantization is applied correctly.
163141

164-
# Create model using CPU version: QSeparableConv2DTransposeCPU
165-
# with quantization.
166-
model = create_quantized_model(for_tpu=False)
142+
# Create model with quantization.
143+
model = create_quantized_model()
167144

168145
x = np.array([[0, 0, 0, 0], [1, 1, 1, 1], [2, 2, 2, 2], [3, 3, 3, 3]])
169146
inputs = np.concatenate([x, x, x], axis=-1)
@@ -201,14 +178,13 @@ def test_quantization_in_separable_conv2d_transpose():
201178

202179
def test_save_and_load_model():
203180
# Test if the model can be loaded from a saved model.
204-
model = create_quantized_model(for_tpu=True)
181+
model = create_quantized_model()
205182

206183
fd, fname = tempfile.mkstemp(".hdf5")
207184
model.save(fname)
208185

209186
custom_object = {
210-
"QSeparableConv2DTransposeTPU": QSeparableConv2DTransposeTPU,
211-
"QSeparableConv2DTransposeCPU": QSeparableConv2DTransposeCPU,
187+
"QSeparableConv2DTranspose": QSeparableConv2DTranspose,
212188
}
213189

214190
model_loaded = tf.keras.models.load_model(

0 commit comments

Comments
 (0)