diff --git a/example/auto_compression/pytorch_huggingface/README.md b/example/auto_compression/pytorch_huggingface/README.md index a7dbff31a..a0949f990 100644 --- a/example/auto_compression/pytorch_huggingface/README.md +++ b/example/auto_compression/pytorch_huggingface/README.md @@ -26,35 +26,36 @@ | bert-base-cased | Base模型 | 60.06 | 84.31 | 90.68 | 90.84 | 63.53 | 91.63 | 88.46 | 81.35 | | bert-base-cased | 剪枝蒸馏+量化训练 | 58.69 | 85.05 | 90.74 | 90.42 | 65.34 | 92.08 | 88.22 | 81.51 | -模型在多个任务上平均精度以及加速对比如下: -| 模型 |策略| Accuracy(avg) | 预测时延FP32
| 预测时延FP16
| 预测时延INT8
| 加速比 | -|:-------:|:----------:|:------------:|:------:|:------:|:------:|:------:| -| bert-base-cased | Base模型 | 81.35 | 195ms | 51.58ms | - | - | -| bert-base-cased | 剪枝+量化训练 | 81.51 | - | - | 31.33ms | 6.22x | +基于bert-base-uncased模型,压缩前后的精度如下: +| 模型 |策略| Accuracy(avg) | trtFP32
| trtFP16
| trtINT8
| 加速比 | 模型 | +|:-------:|:----------:|:------------:|:------:|:------:|:------:|:------:|:------:| +| bert-base-uncased | Base模型 | 92.66 | 173.00ms | 38.42ms | - | - | [Model](https://paddle-slim-models.bj.bcebos.com/act/infer_model.zip) | +| bert-base-uncased | 剪枝+量化训练 | 92.31 | - | - | 33.24ms | 5.20x | [Model](https://paddle-slim-models.bj.bcebos.com/act/unsst2.zip) | - Nvidia GPU 测试环境: - 硬件:NVIDIA Tesla T4 单卡 - - 软件:CUDA 11.2, cuDNN 8.1, TensorRT 8.4 + - 软件:CUDA 11.2, cuDNN 8.1, TensorRT 8.6.1.6 - 测试配置:batch_size: 32, seqence length: 128 + - 预测脚本为[paddle_inference_eval.py](https://github.com/PaddlePaddle/PaddleSlim/blob/develop/example/auto_compression/pytorch_huggingface/paddle_inference_eval.py) ## 3. 自动压缩流程 #### 3.1 准备环境 - python >= 3.6 -- PaddlePaddle >= 2.3 (可从[Paddle官网](https://www.paddlepaddle.org.cn/install/quick?docurl=/documentation/docs/zh/install/pip/linux-pip.html)下载安装) -- PaddleSlim >= 2.3 +- PaddlePaddle ==2.6 (可从[Paddle官网](https://www.paddlepaddle.org.cn/install/quick?docurl=/documentation/docs/zh/install/pip/linux-pip.html)下载安装) +- PaddleSlim ==2.6 - X2Paddle develop版本 - transformers >= 4.18.0 -- PaddleNLP >= 2.3 +- PaddleNLP 2.7.2 - tensorflow == 1.14 (如需压缩TensorFlow模型) -- onnx >= 1.6.0 (如需压缩ONNX模型) -- torch >= 1.5.0 (如需压缩PyTorch模型) +- onnx 1.15.0 (如需压缩ONNX模型) +- torch 1.13.1 (如需压缩PyTorch模型) 安装paddlepaddle: ```shell # CPU -pip install paddlepaddle -# GPU -pip install paddlepaddle-gpu +python -m pip install paddlepaddle==2.6.0 -i https://pypi.tuna.tsinghua.edu.cn/simple +# GPU 以CUDA11.2为例 +python -m pip install paddlepaddle-gpu==2.6.0.post112 -f https://www.paddlepaddle.org.cn/whl/linux/mkl/avx/stable.html ``` 安装paddleslim: @@ -72,7 +73,7 @@ python setup.py install 安装paddlenlp: ```shell -pip install paddlenlp +pip install --pre --upgrade paddlenlp -f https://www.paddlepaddle.org.cn/whl/paddlenlp.html ``` 注:安装PaddleNLP的目的是为了下载PaddleNLP中的数据集。 @@ -99,7 +100,7 @@ attention_msk = torch.zeros([batch_size, max_length]).long() from x2paddle.convert import pytorch2paddle pytorch2paddle(torch_model, save_dir='./x2paddle_cola/', - jit_type="trace", + jit_type="trace", input_examples=[input_ids, attention_msk, token_type_ids]) ``` @@ -186,13 +187,36 @@ python run.py --config_path=./configs/cola.yaml --save_dir='./output/cola/' export CUDA_VISIBLE_DEVICES=0 python run.py --config_path=./configs/cola.yaml --eval True ``` +[bert-base-uncased模型](https://paddle-slim-models.bj.bcebos.com/act/infer_model.zip) +```shell +export CUDA_VISIBLE_DEVICES=0 +python run-uncased.py --config_path=./configs/cola-unsst2.yaml --save_dir='./output/unsst2' +``` ## 4. 预测部署 量化模型在GPU上可以使用TensorRT进行加速,在CPU上可以使用MKLDNN进行加速。 - -- TensorRT预测: +以下字段用于配置预测参数: + +| 参数名 | 含义 | +|:------:|:------:| +| model_path | inference 模型文件所在目录,该目录下需要有文件 model.pdmodel 和 model.pdiparams 两个文件 | +| model_filename | 模型文件的名称,默认值为model.pdmodel | +| params_filename | 参数文件的名称,默认值为model.pdiparams | +| task_name | 要执行的任务名称,默认为cola,这里指定的任务应该是"METRIC_CLASSES"字典中包含的任务之一 | +| model_type | 选择的模型类型,默认为bert-base-cased。这里指定了预训练模型的类型或架构 | +| model_name_or_path | 模型的目录或名称,默认为bert-based-cased。这里可以指定一个预训练模型的目录或HuggingFace预训练模型的模型名称 | +| device | 选择用于推理的设备,默认为gpu,这里可以是gpu或cpu | +| batch_size | 预测的批处理大大小,默认为32 | +| max_seq_length | 输入序列的最大长度,默认为128,超过这个长度的序列将被截断,短于这个长度的序列将被填充 | +| perf_warmup_steps | 性能测试的预热步骤数,默认为20,这是在正式计算推理性能前,进行的预热迭代次数,以确保性能稳定 | +| use_trt | 是否使用TensorRT进行推理 | +| precision | 推理精度,默认为fp32,可设置为fp16或int8 | +| use_mkldnn | 是否使用MKLDNN进行推理,默认为False。这是针对CPU推理时,是否启用MKL-DNN进行加速 | +| cpu_threads | CPU线程数,默认为10。这是针对CPU推理时,指定使用的线程数 | + +- Paddle-TensorRT预测: 环境配置:如果使用 TesorRT 预测引擎,需安装 ```WITH_TRT=ON``` 的Paddle,下载地址:[Python预测库](https://paddleinference.paddlepaddle.org.cn/master/user_guides/download_lib.html#python) @@ -202,6 +226,11 @@ wget https://bj.bcebos.com/v1/paddle-slim-models/act/x2paddle_cola_new_calib.tar tar -xf x2paddle_cola_new_calib.tar ``` +```shell +wget https://paddle-slim-models.bj.bcebos.com/act/unsst2.zip +unzip unstt2.zip +``` + ```shell python paddle_inference_eval.py \ --model_path=x2paddle_cola_new_calib \ @@ -221,6 +250,22 @@ python paddle_inference_eval.py \ --batch_size=1 \ --precision=int8 ``` +bert-base-uncased模型 +```shell +python paddle_inference_eval.py \ + --model_path=infer_model \ + --use_trt \ + --precision=fp32 \ + --batch_size=1 +``` +```shell +python paddle_inference_eval.py \ + --model_path=output/unsst2 \ + --use_trt \ + --precision=int8 \ + --batch_size=32 \ + --task_name=sst-2 +``` diff --git a/example/auto_compression/pytorch_huggingface/configs/cola-unsst2.yaml b/example/auto_compression/pytorch_huggingface/configs/cola-unsst2.yaml new file mode 100644 index 000000000..fcf0d5286 --- /dev/null +++ b/example/auto_compression/pytorch_huggingface/configs/cola-unsst2.yaml @@ -0,0 +1,36 @@ +Global: + input_names: ['input_ids', 'token_type_ids'] + model_dir: ./infer_model + model_filename: model.pdmodel + params_filename: model.pdiparams + model_type: bert-base-uncased + task_name: sst-2 + dataset: glue + batch_size: 1 + max_seq_length: 128 + padding: max_length + return_attention_mask: True + +Distillation: + alpha: 1.0 + loss: soft_label + +QuantAware: + use_pact: true + activation_quantize_type: 'moving_average_abs_max' + weight_bits: 8 + activation_bits: 8 + quantize_op_types: + - matmul + - matmul_v2 + +TrainConfig: + epochs: 3 + eval_iter: 855 + learning_rate: 1.0e-6 + optimizer_builder: + optimizer: + type: AdamW + weight_decay: 0.01 + origin_metric: 0.9266 + diff --git a/example/auto_compression/pytorch_huggingface/configs/cola.yaml b/example/auto_compression/pytorch_huggingface/configs/cola.yaml index d6a06e47f..a681cfdfb 100644 --- a/example/auto_compression/pytorch_huggingface/configs/cola.yaml +++ b/example/auto_compression/pytorch_huggingface/configs/cola.yaml @@ -10,6 +10,20 @@ Global: max_seq_length: 128 padding: max_length return_attention_mask: True + +Distillation: + alpha: 1.0 + loss: soft_label + +QuantAware: + use_pact: true + activation_quantize_type: 'moving_average_abs_max' + weight_bits: 8 + activation_bits: 8 + quantize_op_types: + - matmul + - matmul_v2 + TrainConfig: epochs: 3 eval_iter: 855 diff --git a/example/auto_compression/pytorch_huggingface/configs/mnli.yaml b/example/auto_compression/pytorch_huggingface/configs/mnli.yaml index 5a1e75154..4d7cde626 100644 --- a/example/auto_compression/pytorch_huggingface/configs/mnli.yaml +++ b/example/auto_compression/pytorch_huggingface/configs/mnli.yaml @@ -10,6 +10,21 @@ Global: max_seq_length: 128 padding: max_length return_attention_mask: True + +Distillation: + alpha: 1.0 + loss: soft_label + +QuantAware: + use_pact: true + activation_quantize_type: 'moving_average_abs_max' + weight_bits: 8 + activation_bits: 8 + quantize_op_types: + - matmul + - matmul_v2 + + TrainConfig: epochs: 3 eval_iter: 1710 diff --git a/example/auto_compression/pytorch_huggingface/configs/mrpc.yaml b/example/auto_compression/pytorch_huggingface/configs/mrpc.yaml index 86f997bed..49a130864 100644 --- a/example/auto_compression/pytorch_huggingface/configs/mrpc.yaml +++ b/example/auto_compression/pytorch_huggingface/configs/mrpc.yaml @@ -10,6 +10,21 @@ Global: max_seq_length: 128 padding: max_length return_attention_mask: True + +Distillation: + alpha: 1.0 + loss: soft_label + +QuantAware: + use_pact: true + activation_quantize_type: 'moving_average_abs_max' + weight_bits: 8 + activation_bits: 8 + quantize_op_types: + - matmul + - matmul_v2 + + TrainConfig: epochs: 3 eval_iter: 915 diff --git a/example/auto_compression/pytorch_huggingface/configs/qnli.yaml b/example/auto_compression/pytorch_huggingface/configs/qnli.yaml index 321a04631..c18a2164d 100644 --- a/example/auto_compression/pytorch_huggingface/configs/qnli.yaml +++ b/example/auto_compression/pytorch_huggingface/configs/qnli.yaml @@ -1,6 +1,6 @@ Global: input_names: ['x0', 'x1', 'x2'] - model_dir: ./x2paddle_qnli + model_dir: xqnli model_filename: model.pdmodel params_filename: model.pdiparams model_type: bert-base-cased @@ -10,6 +10,21 @@ Global: max_seq_length: 128 padding: max_length return_attention_mask: True + +Distillation: + alpha: 1.0 + loss: soft_label + +QuantAware: + use_pact: true + activation_quantize_type: 'moving_average_abs_max' + weight_bits: 8 + activation_bits: 8 + quantize_op_types: + - matmul + - matmul_v2 + + TrainConfig: epochs: 3 eval_iter: 855 diff --git a/example/auto_compression/pytorch_huggingface/configs/qqp.yaml b/example/auto_compression/pytorch_huggingface/configs/qqp.yaml index 21676e0a0..d363bc211 100644 --- a/example/auto_compression/pytorch_huggingface/configs/qqp.yaml +++ b/example/auto_compression/pytorch_huggingface/configs/qqp.yaml @@ -10,6 +10,21 @@ Global: max_seq_length: 128 padding: max_length return_attention_mask: True + +Distillation: + alpha: 1.0 + loss: soft_label + +QuantAware: + use_pact: true + activation_quantize_type: 'moving_average_abs_max' + weight_bits: 8 + activation_bits: 8 + quantize_op_types: + - matmul + - matmul_v2 + + TrainConfig: epochs: 3 eval_iter: 855 diff --git a/example/auto_compression/pytorch_huggingface/configs/rte.yaml b/example/auto_compression/pytorch_huggingface/configs/rte.yaml index 70879b5d4..8de643137 100644 --- a/example/auto_compression/pytorch_huggingface/configs/rte.yaml +++ b/example/auto_compression/pytorch_huggingface/configs/rte.yaml @@ -10,6 +10,21 @@ Global: max_seq_length: 128 padding: max_length return_attention_mask: True + +Distillation: + alpha: 1.0 + loss: soft_label + +QuantAware: + use_pact: true + activation_quantize_type: 'moving_average_abs_max' + weight_bits: 8 + activation_bits: 8 + quantize_op_types: + - matmul + - matmul_v2 + + TrainConfig: epochs: 3 eval_iter: 1240 diff --git a/example/auto_compression/pytorch_huggingface/configs/sst2.yaml b/example/auto_compression/pytorch_huggingface/configs/sst2.yaml index 3f9a6f534..4717f58a8 100644 --- a/example/auto_compression/pytorch_huggingface/configs/sst2.yaml +++ b/example/auto_compression/pytorch_huggingface/configs/sst2.yaml @@ -10,6 +10,21 @@ Global: max_seq_length: 128 padding: max_length return_attention_mask: True + +Distillation: + alpha: 1.0 + loss: soft_label + +QuantAware: + use_pact: true + activation_quantize_type: 'moving_average_abs_max' + weight_bits: 8 + activation_bits: 8 + quantize_op_types: + - matmul + - matmul_v2 + + TrainConfig: epochs: 3 eval_iter: 3367 diff --git a/example/auto_compression/pytorch_huggingface/configs/stsb.yaml b/example/auto_compression/pytorch_huggingface/configs/stsb.yaml index 2abc207b9..f130ee820 100644 --- a/example/auto_compression/pytorch_huggingface/configs/stsb.yaml +++ b/example/auto_compression/pytorch_huggingface/configs/stsb.yaml @@ -10,6 +10,21 @@ Global: max_seq_length: 128 padding: max_length return_attention_mask: True + +Distillation: + alpha: 1.0 + loss: soft_label + +QuantAware: + use_pact: true + activation_quantize_type: 'moving_average_abs_max' + weight_bits: 8 + activation_bits: 8 + quantize_op_types: + - matmul + - matmul_v2 + + TrainConfig: epochs: 3 eval_iter: 1710 diff --git a/example/auto_compression/pytorch_huggingface/paddle_inference_eval.py b/example/auto_compression/pytorch_huggingface/paddle_inference_eval.py index d17407ccb..338825a66 100644 --- a/example/auto_compression/pytorch_huggingface/paddle_inference_eval.py +++ b/example/auto_compression/pytorch_huggingface/paddle_inference_eval.py @@ -103,7 +103,8 @@ def parse_args(): "--max_seq_length", default=128, type=int, - help="The maximum total input sequence length after tokenization. Sequences longer " + help= + "The maximum total input sequence length after tokenization. Sequences longer " "than this will be truncated, sequences shorter will be padded.", ) parser.add_argument( "--perf_warmup_steps", @@ -119,7 +120,8 @@ def parse_args(): type=str, default="fp32", choices=["fp32", "fp16", "int8"], - help="The precision of inference. It can be 'fp32', 'fp16' or 'int8'. Default is 'fp16'.", + help= + "The precision of inference. It can be 'fp32', 'fp16' or 'int8'. Default is 'fp16'.", ) parser.add_argument( "--use_mkldnn", @@ -189,6 +191,7 @@ def create_predictor(cls, args): config = paddle.inference.Config( os.path.join(args.model_path, args.model_filename), os.path.join(args.model_path, args.params_filename)) + if args.device == "gpu": # set GPU configs accordingly config.enable_use_gpu(100, 0) @@ -220,8 +223,8 @@ def create_predictor(cls, args): dynamic_shape_file = os.path.join(args.model_path, "dynamic_shape.txt") if os.path.exists(dynamic_shape_file): - config.enable_tuned_tensorrt_dynamic_shape(dynamic_shape_file, - True) + config.enable_tuned_tensorrt_dynamic_shape( + dynamic_shape_file, True) print("trt set dynamic shape done!") else: config.collect_shape_range_info(dynamic_shape_file) diff --git a/example/auto_compression/pytorch_huggingface/run-uncased.py b/example/auto_compression/pytorch_huggingface/run-uncased.py new file mode 100644 index 000000000..e97ea2e1f --- /dev/null +++ b/example/auto_compression/pytorch_huggingface/run-uncased.py @@ -0,0 +1,381 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import numpy as np +import argparse +import paddle +import paddle.nn as nn +import functools +from functools import partial +import shutil +from paddle.io import Dataset, BatchSampler, DataLoader +from paddle.metric import Metric, Accuracy +from paddlenlp.transformers import AutoModelForTokenClassification, AutoTokenizer +from paddlenlp.datasets import load_dataset +from paddlenlp.data import Stack, Tuple, Pad +from paddlenlp.metrics import AccuracyAndF1, Mcc, PearsonAndSpearman +from paddleslim.common import load_config as load_slim_config +from paddleslim.auto_compression.compressor import AutoCompression + + +def argsparser(): + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + '--config_path', + type=str, + default=None, + help="path of compression strategy config.", + required=True) + parser.add_argument( + '--save_dir', + type=str, + default='output', + help="directory to save compressed model.") + parser.add_argument( + '--eval', + type=bool, + default=False, + help="whether validate the model only.") + return parser + + +METRIC_CLASSES = { + "cola": Mcc, + "sst-2": Accuracy, + "mrpc": AccuracyAndF1, + "sts-b": PearsonAndSpearman, + "qqp": AccuracyAndF1, + "mnli": Accuracy, + "qnli": Accuracy, + "rte": Accuracy, +} + +task_to_keys = { + "cola": ("sentence", None), + "mnli": ("sentence1", "sentence2"), + "mrpc": ("sentence1", "sentence2"), + "qnli": ("sentence1", "sentence2"), + "qqp": ("sentence1", "sentence2"), + "rte": ("sentence1", "sentence2"), + "sst-2": ("sentence", None), + "sts-b": ("sentence1", "sentence2"), + "wnli": ("sentence1", "sentence2"), +} + + +def convert_example(example, + tokenizer, + label_list, + max_seq_length=512, + is_test=False, + padding='max_length', + return_attention_mask=False): + if not is_test: + # `label_list == None` is for regression task + label_dtype = "int64" if label_list else "float32" + # Get the label + label = example['labels'] + label = np.array([label], dtype=label_dtype) + # Convert raw text to feature + sentence1_key, sentence2_key = task_to_keys[global_config['task_name']] + texts = ((example[sentence1_key], ) if sentence2_key is None else + (example[sentence1_key], example[sentence2_key])) + example = tokenizer( + *texts, + max_seq_len=max_seq_length, + padding=padding, + return_attention_mask=return_attention_mask, + truncation='longest_first') + if not is_test: + if return_attention_mask: + return example['input_ids'], example['token_type_ids'], label + else: + return example['input_ids'], example['token_type_ids'], label + else: + if return_attention_mask: + return example['input_ids'], example['token_type_ids'] + else: + return example['input_ids'], example['token_type_ids'] + + +def create_data_holder(task_name, input_names): + """ + Define the input data holder for the glue task. + """ + inputs = [] + for name in input_names: + inputs.append( + paddle.static.data(name=name, shape=[-1, -1], dtype="int64")) + + if task_name == "sts-b": + inputs.append( + paddle.static.data(name="label", shape=[-1, 1], dtype="float32")) + else: + inputs.append( + paddle.static.data(name="label", shape=[-1, 1], dtype="int64")) + + return inputs + + +def reader(): + # Create the tokenizer and dataset + tokenizer = AutoTokenizer.from_pretrained( + global_config['model_dir'], use_fast=False) + train_ds = load_dataset( + global_config['dataset'], global_config['task_name'], splits="train") + + trans_func = partial( + convert_example, + tokenizer=tokenizer, + label_list=train_ds.label_list, + max_seq_length=global_config['max_seq_length'], + is_test=True, + padding=global_config['padding'], + return_attention_mask=global_config['return_attention_mask']) + + train_ds = train_ds.map(trans_func, lazy=True) + if global_config['return_attention_mask']: + batchify_fn = lambda samples, fn=Tuple( + Pad(axis=0, pad_val=tokenizer.pad_token_id), # input + # Pad(axis=0, pad_val=0), # attention_mask + Pad(axis=0, pad_val=tokenizer.pad_token_type_id), # token_type + ): fn(samples) + else: + batchify_fn = lambda samples, fn=Tuple( + Pad(axis=0, pad_val=tokenizer.pad_token_id), # input + Pad(axis=0, pad_val=tokenizer.pad_token_type_id), # token_type + ): fn(samples) + + train_batch_sampler = paddle.io.DistributedBatchSampler( + train_ds, + batch_size=global_config['batch_size'], + shuffle=True, + drop_last=True) + + feed_list = create_data_holder(global_config['task_name'], + global_config['input_names']) + train_data_loader = DataLoader( + dataset=train_ds, + feed_list=feed_list[:-1], + batch_sampler=train_batch_sampler, + collate_fn=batchify_fn, + num_workers=0, + return_list=False) + + dev_trans_func = partial( + convert_example, + tokenizer=tokenizer, + label_list=train_ds.label_list, + max_seq_length=global_config['max_seq_length'], + padding=global_config['padding'], + return_attention_mask=global_config['return_attention_mask']) + + if global_config['return_attention_mask']: + dev_batchify_fn = lambda samples, fn=Tuple( + Pad(axis=0, pad_val=tokenizer.pad_token_id), # input + # Pad(axis=0, pad_val=0), # attention_mask + Pad(axis=0, pad_val=tokenizer.pad_token_type_id), # token_type + Stack(dtype="int64" if train_ds.label_list else "float32") # label + ): fn(samples) + else: + dev_batchify_fn = lambda samples, fn=Tuple( + Pad(axis=0, pad_val=tokenizer.pad_token_id), # input + Pad(axis=0, pad_val=tokenizer.pad_token_type_id), # token_type + Stack(dtype="int64" if train_ds.label_list else "float32") # label + ): fn(samples) + + if global_config['task_name'] == "mnli": + dev_ds_matched, dev_ds_mismatched = load_dataset( + global_config['dataset'], + global_config['task_name'], + splits=["dev_matched", "dev_mismatched"]) + dev_ds_matched = dev_ds_matched.map(dev_trans_func, lazy=True) + dev_ds_mismatched = dev_ds_mismatched.map(dev_trans_func, lazy=True) + dev_batch_sampler_matched = paddle.io.BatchSampler( + dev_ds_matched, + batch_size=global_config['batch_size'], + shuffle=False, + drop_last=True) + + dev_data_loader_matched = DataLoader( + dataset=dev_ds_matched, + batch_sampler=dev_batch_sampler_matched, + collate_fn=batchify_fn, + feed_list=feed_list, + num_workers=0, + return_list=False) + dev_batch_sampler_mismatched = paddle.io.BatchSampler( + dev_ds_mismatched, + batch_size=global_config['batch_size'], + shuffle=False, + drop_last=True) + dev_data_loader_mismatched = DataLoader( + dataset=dev_ds_mismatched, + batch_sampler=dev_batch_sampler_mismatched, + collate_fn=batchify_fn, + num_workers=0, + feed_list=feed_list, + return_list=False, + drop_last=True) + return train_data_loader, dev_data_loader_matched, dev_data_loader_mismatched + else: + dev_ds = load_dataset( + global_config['dataset'], global_config['task_name'], splits='dev') + dev_ds = dev_ds.map(dev_trans_func, lazy=True) + dev_batch_sampler = paddle.io.BatchSampler( + dev_ds, + batch_size=global_config['batch_size'], + shuffle=False, + drop_last=True) + dev_data_loader = DataLoader( + dataset=dev_ds, + batch_sampler=dev_batch_sampler, + collate_fn=dev_batchify_fn, + num_workers=0, + feed_list=feed_list, + return_list=False) + return train_data_loader, dev_data_loader + + +def eval_function(exe, compiled_test_program, test_feed_names, test_fetch_list): + metric.reset() + for data in eval_dataloader(): + logits = exe.run( + compiled_test_program, + feed={ + test_feed_names[0]: data[0]['input_ids'], + test_feed_names[1]: data[0]['token_type_ids'], + # test_feed_names[2]: data[0]['position_ids'] + }, + fetch_list=test_fetch_list) + paddle.disable_static() + if isinstance(metric, PearsonAndSpearman): + labels_pd = paddle.to_tensor(np.array(data[0]['label'])).reshape( + (-1, 1)) + logits_pd = paddle.to_tensor(logits[0]).reshape((-1, 1)) + metric.update((logits_pd, labels_pd)) + else: + labels_pd = paddle.to_tensor(np.array(data[0]['label']).flatten()) + logits_pd = paddle.to_tensor(logits[0]) + correct = metric.compute(logits_pd, labels_pd) + metric.update(correct) + paddle.enable_static() + res = metric.accumulate() + + return res[0] if isinstance(res, list) or isinstance(res, tuple) else res + + +def eval(): + devices = paddle.device.get_device().split(':')[0] + places = paddle.device._convert_to_place(devices) + exe = paddle.static.Executor(places) + val_program, feed_target_names, fetch_targets = paddle.static.load_inference_model( + global_config["model_dir"], + exe, + model_filename=global_config["model_filename"], + params_filename=global_config["params_filename"]) + print('Loaded model from: {}'.format(global_config["model_dir"])) + + metric.reset() + print('Evaluating...') + for data in eval_dataloader(): + logits = exe.run( + val_program, + feed={ + feed_target_names[0]: data[0]['input_ids'], + feed_target_names[1]: data[0]['token_type_ids'], + }, + fetch_list=fetch_targets) + paddle.disable_static() + if isinstance(metric, PearsonAndSpearman): + labels_pd = paddle.to_tensor(np.array(data[0]['label'])).reshape( + (-1, 1)) + logits_pd = paddle.to_tensor(logits[0]).reshape((-1, 1)) + metric.update((logits_pd, labels_pd)) + else: + labels_pd = paddle.to_tensor(np.array(data[0]['label']).flatten()) + logits_pd = paddle.to_tensor(logits[0]) + correct = metric.compute(logits_pd, labels_pd) + metric.update(correct) + paddle.enable_static() + res = metric.accumulate() + return res[0] if isinstance(res, list) or isinstance(res, tuple) else res + + +def apply_decay_param_fun(name): + if name.find("bias") > -1: + return True + elif name.find("b_0") > -1: + return True + elif name.find("norm") > -1: + return True + else: + return False + + +def main(): + all_config = load_slim_config(args.config_path) + + global global_config + assert "Global" in all_config, "Key Global not found in config file." + global_config = all_config["Global"] + + if 'TrainConfig' in all_config: + all_config['TrainConfig']['optimizer_builder'][ + 'apply_decay_param_fun'] = apply_decay_param_fun + + global train_dataloader, eval_dataloader + train_dataloader, eval_dataloader = reader() + + global metric + metric_class = METRIC_CLASSES[global_config['task_name']] + metric = metric_class() + + if args.eval: + result = eval() + print('Eval metric:', result) + sys.exit(0) + + ac = AutoCompression( + model_dir=global_config['model_dir'], + model_filename=global_config['model_filename'], + params_filename=global_config['params_filename'], + save_dir=args.save_dir, + config=all_config, + train_dataloader=train_dataloader, + eval_callback=eval_function if + (len(list(all_config.keys())) == 2 and 'TrainConfig' in all_config) or + len(list(all_config.keys())) == 1 or + 'HyperParameterOptimization' not in all_config else eval_dataloader, + eval_dataloader=eval_dataloader) + + if not os.path.exists(args.save_dir): + os.makedirs(args.save_dir) + + for file_name in os.listdir(global_config['model_dir']): + if 'json' in file_name or 'txt' in file_name: + shutil.copy( + os.path.join(global_config['model_dir'], file_name), + args.save_dir) + + ac.compress() + + +if __name__ == '__main__': + paddle.enable_static() + parser = argsparser() + args = parser.parse_args() + main() diff --git a/example/auto_compression/pytorch_huggingface/run.py b/example/auto_compression/pytorch_huggingface/run.py index afa0d9862..a4dab8f7d 100644 --- a/example/auto_compression/pytorch_huggingface/run.py +++ b/example/auto_compression/pytorch_huggingface/run.py @@ -275,6 +275,7 @@ def eval_function(exe, compiled_test_program, test_feed_names, test_fetch_list): metric.update(correct) paddle.enable_static() res = metric.accumulate() + return res[0] if isinstance(res, list) or isinstance(res, tuple) else res