Skip to content

Commit e259867

Browse files
authored
Fallback to default quantization if quantization params is not found (#8788)
1 parent 17b0664 commit e259867

File tree

13 files changed

+34
-30
lines changed

13 files changed

+34
-30
lines changed

onnxruntime/python/tools/quantization/onnx_quantizer.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -463,6 +463,7 @@ def _get_quantization_params(self, param_name, use_scale=None, use_zeropoint=Non
463463
'''
464464
if use_scale is None or use_zeropoint is None:
465465
if self.quantization_params is None or param_name not in self.quantization_params:
466+
logging.info("Quantization parameters for tensor:\"{}\" not specified".format(param_name))
466467
return False, "", "", "", ""
467468

468469
params = self.quantization_params[param_name]
@@ -517,10 +518,7 @@ def _get_quantize_input_nodes(self, node, input_index, qType, given_scale_name=N
517518
[output_name], ql_node_name)
518519
else:
519520
if self.static:
520-
raise ValueError(
521-
"Quantization parameters are not specified for param {}."
522-
"In static mode quantization params for inputs and outputs of nodes to be quantized are required.".
523-
format(input_name))
521+
return None
524522
# dynamic mode
525523
# Scale and Zero Points not available for this input. Add nodes to dynamically compute it
526524
if self.fuse_dynamic_quant and qType == onnx_proto.TensorProto.UINT8:
@@ -655,6 +653,8 @@ def quantize_inputs(self, node, indices, initializer_use_weight_qType=True, redu
655653
self.model.graph())
656654
if qlinear_node is None:
657655
quantize_input_nodes = self._get_quantize_input_nodes(node, input_index, self.input_qType)
656+
if quantize_input_nodes is None:
657+
return (None, None, None, None)
658658
if from_subgraph:
659659
self.add_new_nodes(quantize_input_nodes)
660660
else:

onnxruntime/python/tools/quantization/operators/activation.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,11 +40,9 @@ def quantize(self):
4040
# only try to quantize when given quantization parameters for it
4141
data_found, output_scale_name, output_zp_name, _, _ = \
4242
self.quantizer._get_quantization_params(node.output[0], use_scale, use_zeropoint)
43-
if not data_found:
44-
super().quantize()
45-
return
46-
4743
quantized_input_names, zero_point_names, scale_names, nodes = self.quantizer.quantize_inputs(node, [0])
44+
if not data_found or quantized_input_names is None:
45+
return super().quantize()
4846

4947
qlinear_activation_output = node.output[0] + "_quantized"
5048
qlinear_activation_name = ""

onnxruntime/python/tools/quantization/operators/attention.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ def quantize(self):
2222

2323
(quantized_input_names, zero_point_names, scale_names, nodes) = \
2424
self.quantizer.quantize_inputs(node, [0, 1], reduce_range=True, op_level_per_channel=True)
25+
if quantized_input_names is None:
26+
return super().quantize()
2527

2628
qattention_name = "" if node.name == "" else node.name + "_quant"
2729

onnxruntime/python/tools/quantization/operators/base_operator.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,8 @@ def __init__(self, onnx_quantizer, onnx_node):
55

66
def quantize(self):
77
'''
8-
Given a node which does not support quantization(Conv, Matmul, Gather), this method
9-
checks whether the input to this node is quantized and adds a DequantizeLinear node
10-
to dequantize this input back to FP32
8+
Given a node which does not support quantization, this method checks whether the input to
9+
this node is quantized and adds a DequantizeLinear node to dequantize this input back to FP32
1110
parameter node: Current node
1211
parameter new_nodes_list: List of new nodes created before processing current node
1312
return: List of new nodes created
@@ -19,4 +18,4 @@ def quantize(self):
1918
self.quantizer.new_nodes.append(dequantize_node)
2019

2120
# Append the original node
22-
self.quantizer.new_nodes.append(self.node)
21+
self.quantizer.new_nodes.append(self.node)

onnxruntime/python/tools/quantization/operators/binary_op.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,10 @@ def quantize(self):
1313

1414
data_found, output_scale_name, output_zp_name, _, _ = \
1515
self.quantizer._get_quantization_params(node.output[0])
16-
if (not data_found): # only try to quantize when given quantization parameters for it
17-
return super().quantize()
18-
1916
(quantized_input_names, zero_point_names, scale_names, nodes) = \
2017
self.quantizer.quantize_inputs(node, [0, 1], initializer_use_weight_qType=False)
18+
if not data_found or quantized_input_names is None:
19+
return super().quantize()
2120

2221
qlinear_binary_math_output = node.output[0] + "_quantized"
2322
qlinear_binary_math_name = node.name + "_quant" if node.name != "" else ""

onnxruntime/python/tools/quantization/operators/concat.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,9 @@ def quantize(self):
1212

1313
data_found, output_scale_name, output_zp_name, _, _ = \
1414
self.quantizer._get_quantization_params(node.output[0])
15-
if not data_found:
16-
raise ValueError("Quantization parameters for :\"{}\" of node:\"{}\" not specified".format(node.output[0], node.name))
1715
(q_input_names, zero_point_names, scale_names, nodes) = self.quantizer.quantize_inputs(node, [*range(0, len(node.input))])
16+
if not data_found or q_input_names is None:
17+
return super().quantize()
1818

1919
# Create an entry for output quantized value
2020
quantized_input_value = self.quantizer.quantized_value_map[node.input[0]]

onnxruntime/python/tools/quantization/operators/conv.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,9 @@ def quantize(self):
106106
node = self.node
107107
assert (node.op_type == "Conv")
108108

109+
data_found, output_scale_name, output_zp_name, _, _ = \
110+
self.quantizer._get_quantization_params(node.output[0])
111+
109112
if self.quantizer.is_input_a_weight(node.input[1]) and self.quantizer.is_per_channel():
110113
(quantized_input_names, zero_point_names, scale_names, nodes) = \
111114
self.quantizer.quantize_inputs(node, [0])
@@ -118,17 +121,14 @@ def quantize(self):
118121
(quantized_input_names, zero_point_names, scale_names, nodes) = \
119122
self.quantizer.quantize_inputs(node, [0, 1])
120123

124+
if not data_found or quantized_input_names is None:
125+
return super().quantize()
126+
121127
quantized_bias_name = ""
122128
bias_present = False
123129
if len(node.input) == 3:
124130
quantized_bias_name = self.quantizer.quantize_bias_static(node.input[2], node.input[0], node.input[1])
125131
bias_present = True
126-
data_found, output_scale_name, output_zp_name, _, _ = \
127-
self.quantizer._get_quantization_params(node.output[0])
128-
129-
if not data_found:
130-
raise ValueError("Quantization parameters for output:\"{}\" of node:\"{}\" not specified".format(
131-
node.output[0], node.name))
132132

133133
qlinear_conv_output = node.output[0] + "_quantized"
134134
qlinear_conv_name = qlinear_conv_name = node.name + "_quant" if node.name != "" else ""

onnxruntime/python/tools/quantization/operators/embed_layernorm.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ def quantize(self):
3030
'''
3131
(quantized_input_names, zero_point_names, scale_names, nodes) = \
3232
self.quantizer.quantize_inputs(node, [2, 3, 4, 5, 6])
33+
if quantized_input_names is None:
34+
return super().quantize()
3335

3436
qembed_layer_norm_name = "" if node.name == "" else node.name + "_quant"
3537

onnxruntime/python/tools/quantization/operators/gather.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ def quantize(self):
2020

2121
(quantized_input_names, zero_point_names, scale_names, nodes) = \
2222
self.quantizer.quantize_inputs(node, [0])
23+
if quantized_input_names is None:
24+
return super().quantize()
2325

2426
gather_new_output = node.output[0] + "_quantized"
2527

onnxruntime/python/tools/quantization/operators/gavgpool.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ def quantize(self):
1414
# If input to this node is not quantized then keep this node.
1515
if node.input[0] not in self.quantizer.quantized_value_map:
1616
return super().quantize()
17+
1718
quantized_input_value = self.quantizer.quantized_value_map[node.input[0]]
1819

1920
# Create an entry for output quantized value.

onnxruntime/python/tools/quantization/operators/matmul.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -67,13 +67,10 @@ def quantize(self):
6767

6868
(quantized_input_names, zero_point_names, scale_names, nodes) = \
6969
self.quantizer.quantize_inputs(node, [0, 1], reduce_range=True, op_level_per_channel=True)
70-
7170
data_found, output_scale_name, output_zp_name, _, _ = \
7271
self.quantizer._get_quantization_params(node.output[0])
73-
74-
if not data_found:
75-
raise ValueError("Quantization parameters for output:\"{}\" of node:\"{}\" not specified".format(
76-
node.output[0], node.name))
72+
if not data_found or quantized_input_names is None:
73+
return super().quantize()
7774

7875
qlinear_matmul_output = node.output[0] + "_quantized"
7976
qlinear_matmul_name = node.name + "_quant" if node.name != "" else ""

onnxruntime/python/tools/quantization/operators/pooling.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,13 @@ def quantize(self):
1212
# only try to quantize when given quantization parameters for it
1313
data_found, output_scale_name, output_zp_name, _, _ = \
1414
self.quantizer._get_quantization_params(node.output[0])
15-
if (not data_found):
16-
return super().quantize()
1715

1816
# get quantized input tensor names, quantize input if needed
1917
quantized_input_names, input_zero_point_names, input_scale_names, nodes = self.quantizer.quantize_inputs(node, [0])
2018

19+
if not data_found or quantized_input_names is None:
20+
return super().quantize()
21+
2122
# Create an entry for output quantized value.
2223
qlinear_output_name = node.output[0] + "_quantized"
2324
quantized_output_value = QuantizedValue(

onnxruntime/python/tools/quantization/operators/split.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@ def __init__(self, onnx_quantizer, onnx_node):
1111
def quantize(self):
1212
node = self.node
1313
quantized_input_names, zero_point_names, scale_names, nodes = self.quantizer.quantize_inputs(node, [0])
14+
if quantized_input_names is None:
15+
return super().quantize()
16+
1417
quantized_node_name = ""
1518
if node.name != "":
1619
quantized_node_name = node.name + "_quant"

0 commit comments

Comments
 (0)