Fallback to default quantization if quantization params is not found (#8788)

yufenglee · web-flow · commit e25986781f9b · 2021-08-24T11:20:19.000-07:00
diff --git a/onnxruntime/python/tools/quantization/onnx_quantizer.py b/onnxruntime/python/tools/quantization/onnx_quantizer.py
@@ -463,6 +463,7 @@ def _get_quantization_params(self, param_name, use_scale=None, use_zeropoint=Non
         '''
         if use_scale is None or use_zeropoint is None:
             if self.quantization_params is None or param_name not in self.quantization_params:
+                logging.info("Quantization parameters for tensor:\"{}\" not specified".format(param_name))
                 return False, "", "", "", ""
 
             params = self.quantization_params[param_name]
@@ -517,10 +518,7 @@ def _get_quantize_input_nodes(self, node, input_index, qType, given_scale_name=N
                                                  [output_name], ql_node_name)
         else:
             if self.static:
-                raise ValueError(
-                    "Quantization parameters are not specified for param {}."
-                    "In static mode quantization params for inputs and outputs of nodes to be quantized are required.".
-                    format(input_name))
+                return None
             # dynamic mode
             # Scale and Zero Points not available for this input. Add nodes to dynamically compute it
             if self.fuse_dynamic_quant and qType == onnx_proto.TensorProto.UINT8:
@@ -655,6 +653,8 @@ def quantize_inputs(self, node, indices, initializer_use_weight_qType=True, redu
                                                             self.model.graph())
                 if qlinear_node is None:
                     quantize_input_nodes = self._get_quantize_input_nodes(node, input_index, self.input_qType)
+                    if quantize_input_nodes is None:
+                        return (None, None, None, None)
                     if from_subgraph:
                         self.add_new_nodes(quantize_input_nodes)
                     else:
diff --git a/onnxruntime/python/tools/quantization/operators/activation.py b/onnxruntime/python/tools/quantization/operators/activation.py
@@ -40,11 +40,9 @@ def quantize(self):
         # only try to quantize when given quantization parameters for it
         data_found, output_scale_name, output_zp_name, _, _ = \
             self.quantizer._get_quantization_params(node.output[0], use_scale, use_zeropoint)
-        if not data_found:
-            super().quantize()
-            return
-
         quantized_input_names, zero_point_names, scale_names, nodes = self.quantizer.quantize_inputs(node, [0])
+        if not data_found or quantized_input_names is None:
+            return super().quantize()
 
         qlinear_activation_output = node.output[0] + "_quantized"
         qlinear_activation_name = ""
diff --git a/onnxruntime/python/tools/quantization/operators/attention.py b/onnxruntime/python/tools/quantization/operators/attention.py
@@ -22,6 +22,8 @@ def quantize(self):
 
         (quantized_input_names, zero_point_names, scale_names, nodes) = \
             self.quantizer.quantize_inputs(node, [0, 1], reduce_range=True, op_level_per_channel=True)
+        if quantized_input_names is None:
+            return super().quantize()
 
         qattention_name = "" if node.name == "" else node.name + "_quant"
 
diff --git a/onnxruntime/python/tools/quantization/operators/base_operator.py b/onnxruntime/python/tools/quantization/operators/base_operator.py
@@ -5,9 +5,8 @@ def __init__(self, onnx_quantizer, onnx_node):
 
     def quantize(self):
         '''
-        Given a node which does not support quantization(Conv, Matmul, Gather), this method
-        checks whether the input to this node is quantized and adds a DequantizeLinear node
-        to dequantize this input back to FP32
+        Given a node which does not support quantization, this method checks whether the input to
+        this node is quantized and adds a DequantizeLinear node to dequantize this input back to FP32
             parameter node: Current node
             parameter new_nodes_list: List of new nodes created before processing current node
             return: List of new nodes created
@@ -19,4 +18,4 @@ def quantize(self):
                 self.quantizer.new_nodes.append(dequantize_node)
 
         # Append the original node
-        self.quantizer.new_nodes.append(self.node)
+        self.quantizer.new_nodes.append(self.node)
diff --git a/onnxruntime/python/tools/quantization/operators/binary_op.py b/onnxruntime/python/tools/quantization/operators/binary_op.py
@@ -13,11 +13,10 @@ def quantize(self):
 
         data_found, output_scale_name, output_zp_name, _, _ = \
             self.quantizer._get_quantization_params(node.output[0])
-        if (not data_found):  # only try to quantize when given quantization parameters for it
-            return super().quantize()
-
         (quantized_input_names, zero_point_names, scale_names, nodes) = \
             self.quantizer.quantize_inputs(node, [0, 1], initializer_use_weight_qType=False)
+        if not data_found or quantized_input_names is None:
+            return super().quantize()
 
         qlinear_binary_math_output = node.output[0] + "_quantized"
         qlinear_binary_math_name = node.name + "_quant" if node.name != "" else ""
diff --git a/onnxruntime/python/tools/quantization/operators/concat.py b/onnxruntime/python/tools/quantization/operators/concat.py
@@ -12,9 +12,9 @@ def quantize(self):
 
         data_found, output_scale_name, output_zp_name, _, _ = \
             self.quantizer._get_quantization_params(node.output[0])
-        if not data_found:
-            raise ValueError("Quantization parameters for :\"{}\" of node:\"{}\" not specified".format(node.output[0], node.name))
         (q_input_names, zero_point_names, scale_names, nodes) = self.quantizer.quantize_inputs(node, [*range(0, len(node.input))])
+        if not data_found or q_input_names is None:
+            return super().quantize()
 
         # Create an entry for output quantized value
         quantized_input_value = self.quantizer.quantized_value_map[node.input[0]]
diff --git a/onnxruntime/python/tools/quantization/operators/conv.py b/onnxruntime/python/tools/quantization/operators/conv.py
@@ -106,6 +106,9 @@ def quantize(self):
         node = self.node
         assert (node.op_type == "Conv")
 
+        data_found, output_scale_name, output_zp_name, _, _ = \
+            self.quantizer._get_quantization_params(node.output[0])
+
         if self.quantizer.is_input_a_weight(node.input[1]) and self.quantizer.is_per_channel():
             (quantized_input_names, zero_point_names, scale_names, nodes) = \
                 self.quantizer.quantize_inputs(node, [0])
@@ -118,17 +121,14 @@ def quantize(self):
             (quantized_input_names, zero_point_names, scale_names, nodes) = \
                 self.quantizer.quantize_inputs(node, [0, 1])
 
+        if not data_found or quantized_input_names is None:
+            return super().quantize()
+
         quantized_bias_name = ""
         bias_present = False
         if len(node.input) == 3:
             quantized_bias_name = self.quantizer.quantize_bias_static(node.input[2], node.input[0], node.input[1])
             bias_present = True
-        data_found, output_scale_name, output_zp_name, _, _ = \
-            self.quantizer._get_quantization_params(node.output[0])
-
-        if not data_found:
-            raise ValueError("Quantization parameters for output:\"{}\" of node:\"{}\" not specified".format(
-                node.output[0], node.name))
 
         qlinear_conv_output = node.output[0] + "_quantized"
         qlinear_conv_name = qlinear_conv_name = node.name + "_quant" if node.name != "" else ""
diff --git a/onnxruntime/python/tools/quantization/operators/embed_layernorm.py b/onnxruntime/python/tools/quantization/operators/embed_layernorm.py
@@ -30,6 +30,8 @@ def quantize(self):
         '''
         (quantized_input_names, zero_point_names, scale_names, nodes) = \
             self.quantizer.quantize_inputs(node, [2, 3, 4, 5, 6])
+        if quantized_input_names is None:
+            return super().quantize()
 
         qembed_layer_norm_name = "" if node.name == "" else node.name + "_quant"
 
diff --git a/onnxruntime/python/tools/quantization/operators/gather.py b/onnxruntime/python/tools/quantization/operators/gather.py
@@ -20,6 +20,8 @@ def quantize(self):
 
         (quantized_input_names, zero_point_names, scale_names, nodes) = \
             self.quantizer.quantize_inputs(node, [0])
+        if quantized_input_names is None:
+            return super().quantize()
 
         gather_new_output = node.output[0] + "_quantized"
 
diff --git a/onnxruntime/python/tools/quantization/operators/gavgpool.py b/onnxruntime/python/tools/quantization/operators/gavgpool.py
@@ -14,6 +14,7 @@ def quantize(self):
         # If input to this node is not quantized then keep this node.
         if node.input[0] not in self.quantizer.quantized_value_map:
             return super().quantize()
+
         quantized_input_value = self.quantizer.quantized_value_map[node.input[0]]
 
         # Create an entry for output quantized value.
diff --git a/onnxruntime/python/tools/quantization/operators/matmul.py b/onnxruntime/python/tools/quantization/operators/matmul.py
@@ -67,13 +67,10 @@ def quantize(self):
 
         (quantized_input_names, zero_point_names, scale_names, nodes) = \
             self.quantizer.quantize_inputs(node, [0, 1], reduce_range=True, op_level_per_channel=True)
-
         data_found, output_scale_name, output_zp_name, _, _ = \
             self.quantizer._get_quantization_params(node.output[0])
-
-        if not data_found:
-            raise ValueError("Quantization parameters for output:\"{}\" of node:\"{}\" not specified".format(
-                node.output[0], node.name))
+        if not data_found or quantized_input_names is None:
+            return super().quantize()
 
         qlinear_matmul_output = node.output[0] + "_quantized"
         qlinear_matmul_name = node.name + "_quant" if node.name != "" else ""
diff --git a/onnxruntime/python/tools/quantization/operators/pooling.py b/onnxruntime/python/tools/quantization/operators/pooling.py
@@ -12,12 +12,13 @@ def quantize(self):
         # only try to quantize when given quantization parameters for it
         data_found, output_scale_name, output_zp_name, _, _ = \
             self.quantizer._get_quantization_params(node.output[0])
-        if (not data_found):
-            return super().quantize()
 
         # get quantized input tensor names, quantize input if needed
         quantized_input_names, input_zero_point_names, input_scale_names, nodes = self.quantizer.quantize_inputs(node, [0])
 
+        if not data_found or quantized_input_names is None:
+            return super().quantize()
+
         # Create an entry for output quantized value.
         qlinear_output_name = node.output[0] + "_quantized"
         quantized_output_value = QuantizedValue(
diff --git a/onnxruntime/python/tools/quantization/operators/split.py b/onnxruntime/python/tools/quantization/operators/split.py
@@ -11,6 +11,9 @@ def __init__(self, onnx_quantizer, onnx_node):
     def quantize(self):
         node = self.node
         quantized_input_names, zero_point_names, scale_names, nodes = self.quantizer.quantize_inputs(node, [0])
+        if quantized_input_names is None:
+            return super().quantize()
+
         quantized_node_name = ""
         if node.name != "":
             quantized_node_name = node.name + "_quant"