-
Notifications
You must be signed in to change notification settings - Fork 23
all parameters
bugface edited this page Aug 2, 2021
·
2 revisions
- "model_type": "deberta" => what type of transformer architecture you will use e.g., bert, roberta, xlnet
- "data_format_mode": 0, => 0 is for sep mode - [CLS]S1[SEP]S2[SEP]; 1 is for uni mode - [CLS]S1S2[SEP], we recommend 0
- "classification_scheme": 2, => which tokens will be used for classification, 0 will only use [CLS]; 1 will use [CLS], [S1], and [S2]; 2 will use [CLS], [S1], [S2], [E1], [E2]; 3 will use [S1], [S2]
- "pretrained_model": "microsoft/deberta-base", => actual model pretrained weights, you can use models from huggingface repo or our mimic pretrained models
- "data_dir": "../sample_data", => the directory for you data, should have train.tsv, test.tsv, dev.tsv (optional)
- "new_model_dir": "../deberta_re_model", => where to save your fined-tuned checkpoints
- "predict_output_file": "../deberta_re_predict.txt",
- "overwrite_model_dir": true,
- "seed": 1234,
- "max_seq_length": 128,
- "cache_data": false,
- "data_file_header": true,
- "do_train": true,
- "do_eval": false, => if set do_eval, you need to provide dev.tsv, and model selection will be based on performances on dev.tsv
- "do_predict": true,
- "do_lower_case": true,
- "train_batch_size": 2,
- "eval_batch_size": 32,
- "learning_rate": 1e-05,
- "num_train_epochs": 5,
- "gradient_accumulation_steps": 1,
- "do_warmup": true,
- "warmup_ratio": 0.1,
- "weight_decay": 0.0,
- "adam_epsilon": 1e-08,
- "max_grad_norm": 1.0,
- "max_num_checkpoints": 0, => the max number of checkpoints can be saved, if more than the max number, the oldest will be deleted
- "log_file": null, => where to save the log information, if none print loggings to console only
- "log_lvl": "I", => log level; i for info, w for warn, e for error, d for debug
- "log_step": 2,
- "num_core": 4, => how many CPU cores will be used for data processing (tokenization and covert to transformer compatible codes)
- "non_relation_label": "nonRel",
- "progress_bar": false,
- "fp16": false,
- "fp16_opt_level": "O1",
- "use_focal_loss": false, => whether use focal loss function, default loss function is cross entropy
- "focal_loss_gamma": 2,
- "use_binary_classification_mode": false, => where use binary classification loss function, if yes, your labels must have only two categories
- "balance_sample_weights": false => we will add the sample weights into the loss function