From 215beeef9f1e380d035647ffa3d01ceabeff2982 Mon Sep 17 00:00:00 2001
From: "Blok, Pieter" <pieter.blok@wur.nl>
Date: Sat, 29 Jan 2022 12:54:35 +0900
Subject: [PATCH] option to do a comparative experiment on the same initial
 dataset with the same initial model

---
 MISC_SETTINGS.md                              |  4 +-
 README.md                                     |  2 +-
 .../config/maskAL_experiment_random.yaml      | 63 ++++++++++++++
 .../config/maskAL_experiment_uncertainty.yaml | 63 ++++++++++++++
 active_learning/sampling/prepare_dataset.py   | 42 +++++++++-
 maskAL.py                                     | 83 +++++++++----------
 maskAL.yaml                                   |  8 +-
 types.yaml                                    |  8 +-
 8 files changed, 222 insertions(+), 51 deletions(-)
 create mode 100644 active_learning/config/maskAL_experiment_random.yaml
 create mode 100644 active_learning/config/maskAL_experiment_uncertainty.yaml
diff --git a/MISC_SETTINGS.md b/MISC_SETTINGS.md
index 3913974..84d5f77 100644
--- a/MISC_SETTINGS.md
+++ b/MISC_SETTINGS.md
@@ -2,6 +2,8 @@ The following settings can probably stay unchanged: <br/> <br/>
 
 | Setting        			| Description        													|
 | --------------------------------------|-----------------------------------------------------------------------------------------------------------------------|
+| duplicate_initial_model_and_data	| Experimental mode: set this to **True** when you want to duplicate a previously trained model and dataset with a new settings-file. Default **False**	|
+| initial_train_file			| When **duplicate_initial_model_and_data** set to True, then specify the txt-file with the initial dataset. 		|
 | transfer_learning_on_previous_models	| Whether to use the weight-files of the previous trainings for transfer-learning					|
 | warmup_iterations			| The number of warmup-iterations that can be used to stabilize the training process 			 		|
 | train_iterations_base			| The number of training iterations to start the training with (this number of training iterations is used when the total number of training images is below the value of **step_image_number**)								 			 		|
@@ -19,7 +21,7 @@ The following settings can probably stay unchanged: <br/> <br/>
 | minority_classes 			| Only when the **"RepeatFactorTrainingSampler"** is used: specify the minority-classes that have to be repeated	|
 | repeat_factor_smallest_class		| Only when the **"RepeatFactorTrainingSampler"** is used: specify the repeat-factor of the smallest class (use a value higher than 1.0 to repeat the minority classes)																	|
 | experiment_name			| Specify the name of your experiment											|
-| strategies				| Use **'uncertainty'** to select the most uncertain images for the active learning. Other options are **'random'** and **'certainty'** |
+| strategy				| Use **'uncertainty'** to select the most uncertain images for the active learning. Other options are **'random'** and **'certainty'** |
 | mode					| Uncertainty sampling method. Use **'mean'** when you want to sample the most uncertain images, use **'min'** when you want to sample the most uncertain instances																	|
 | equal_pool_size			| When **True** this will sample the same **pool_size** for every sampling iteration. When **False**, an unequal **pool_size** will be sampled for the specified number of loops															|
 | dropout_probability			| Specify the dropout probability between 0.1 and 0.9. Our experiments indicated that **0.25** is a good value		|
diff --git a/README.md b/README.md
index 2169678..888511a 100644
--- a/README.md
+++ b/README.md
@@ -51,11 +51,11 @@ Change the following settings in the maskAL.yaml file: <br/>
 | weightsroot	        | The file directory where the weight-files are stored											|
 | resultsroot		| The file directory where the result-files are stored 											|
 | dataroot	 	| The root directory where all image-files are stored											|
-| use_initial_train_dir | Set this to **True** when you want to start the active-learning from an initial training dataset. When **False**, the initial dataset of size **initial_datasize** is randomly sampled from the **traindir** 																	|
 | initial_train_dir     | When use_initial_train_dir is activated: the file directory where the initial training images and annotations are stored		|
 | traindir	 	| The file directory where the training images and annotations are stored								|
 | valdir	 	| The file directory where the validation images and annotations are stored								|
 | testdir	 	| The file directory where the test images and annotations are stored									|
+| use_initial_train_dir | Set this to **True** when you want to start the active-learning from an initial training dataset. When **False**, the initial dataset of size **initial_datasize** is randomly sampled from the **traindir** 																	|
 | network_config	| The Mask R-CNN configuration-file (.yaml) file (see the folder './configs')								|
 | pretrained_weights	| The pretrained weights to start the active-learning. Either specify the **network_config** (.yaml) or a custom weights-file (.pth or .pkl)|
 | cuda_visible_devices 	| The identifiers of the CUDA device(s) you want to use for training and sampling (in string format, for example: '0,1')		|
diff --git a/active_learning/config/maskAL_experiment_random.yaml b/active_learning/config/maskAL_experiment_random.yaml
new file mode 100644
index 0000000..6362981
--- /dev/null
+++ b/active_learning/config/maskAL_experiment_random.yaml
@@ -0,0 +1,63 @@
+# Parameters for maskAL
+# python maskAL.py --config maskAL.yaml
+# explanation can be found here: https://git.wur.nl/blok012/maskAL
+
+# folders
+weightsroot: "./weights"
+resultsroot: "./results"
+dataroot: "./datasets/experiment/random"
+initial_train_dir: "./datasets"
+traindir: "./datasets/train"
+valdir: "./datasets/val"
+testdir: "./datasets/test03"
+
+# data options
+use_initial_train_dir: False
+duplicate_initial_model_and_data: True
+initial_train_file: "./results/experiment/uncertainty/trainfiles_iteration000.txt"
+
+# network parameters
+network_config: COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml
+pretrained_weights: "./weights/experiment/uncertainty/best_model_000.pth"
+
+# training-parameters
+cuda_visible_devices: '1'
+classes: ['healthy', 'damaged', 'matured', 'cateye', 'headrot']
+transfer_learning_on_previous_models: True
+learning_rate: 0.005
+warmup_iterations: 500
+train_iterations_base: 2500
+train_iterations_step_size: 2500
+step_image_number: 500
+step_ratios: [0.5, 0.8]
+eval_period: 500
+checkpoint_period: -1
+weight_decay: 0.0001
+learning_policy: 'steps_with_decay'
+gamma: 0.1
+train_batch_size: 1
+num_workers: 2
+
+# train-sampler
+train_sampler: "RepeatFactorTrainingSampler"
+minority_classes: ['damaged', 'matured', 'cateye', 'headrot']
+repeat_factor_smallest_class: 1.5
+
+# evaluation-parameters
+confidence_threshold: 0.5
+nms_threshold: 0.01
+
+# active-learning sampling
+experiment_name: 'experiment'
+strategy: 'random'
+mode: 'mean'
+initial_datasize: 100
+pool_size: 200
+equal_pool_size: True
+loops: 5
+dropout_probability: 0.25
+mcd_iterations: 20
+iou_thres: 0.5
+auto_annotate: False
+export_format: labelme
+supervisely_meta_json: "./datasets/meta.json"
diff --git a/active_learning/config/maskAL_experiment_uncertainty.yaml b/active_learning/config/maskAL_experiment_uncertainty.yaml
new file mode 100644
index 0000000..ce195ff
--- /dev/null
+++ b/active_learning/config/maskAL_experiment_uncertainty.yaml
@@ -0,0 +1,63 @@
+# Parameters for maskAL
+# python maskAL.py --config maskAL.yaml
+# explanation can be found here: https://git.wur.nl/blok012/maskAL
+
+# folders
+weightsroot: "./weights"
+resultsroot: "./results"
+dataroot: "./datasets/experiment/uncertainty"
+initial_train_dir: "./datasets"
+traindir: "./datasets/train"
+valdir: "./datasets/val"
+testdir: "./datasets/test03"
+
+# data options
+use_initial_train_dir: False
+duplicate_initial_model_and_data: False
+initial_train_file: ""
+
+# network parameters
+network_config: COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml
+pretrained_weights: COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml
+
+# training-parameters
+cuda_visible_devices: '0'
+classes: ['healthy', 'damaged', 'matured', 'cateye', 'headrot']
+transfer_learning_on_previous_models: True
+learning_rate: 0.005
+warmup_iterations: 500
+train_iterations_base: 2500
+train_iterations_step_size: 2500
+step_image_number: 500
+step_ratios: [0.5, 0.8]
+eval_period: 500
+checkpoint_period: -1
+weight_decay: 0.0001
+learning_policy: 'steps_with_decay'
+gamma: 0.1
+train_batch_size: 1
+num_workers: 2
+
+# train-sampler
+train_sampler: "RepeatFactorTrainingSampler"
+minority_classes: ['damaged', 'matured', 'cateye', 'headrot']
+repeat_factor_smallest_class: 1.5
+
+# evaluation-parameters
+confidence_threshold: 0.5
+nms_threshold: 0.01
+
+# active-learning sampling
+experiment_name: 'experiment'
+strategy: 'uncertainty'
+mode: 'mean'
+initial_datasize: 100
+pool_size: 200
+equal_pool_size: True
+loops: 5
+dropout_probability: 0.25
+mcd_iterations: 20
+iou_thres: 0.5
+auto_annotate: False
+export_format: labelme
+supervisely_meta_json: "./datasets/meta.json"
diff --git a/active_learning/sampling/prepare_dataset.py b/active_learning/sampling/prepare_dataset.py
index cf99a45..d9d039b 100644
--- a/active_learning/sampling/prepare_dataset.py
+++ b/active_learning/sampling/prepare_dataset.py
@@ -1,7 +1,7 @@
 # @Author: Pieter Blok
 # @Date:   2021-03-26 14:30:31
 # @Last Modified by:   Pieter Blok
-# @Last Modified time: 2021-12-09 13:55:46
+# @Last Modified time: 2022-01-27 15:00:20
 
 import sys
 import io
@@ -1394,6 +1394,20 @@ def calculate_iterations(config, dataset_dicts_train):
         max_iterations = config['train_iterations_base'] + ((div_factor - 1) * config['train_iterations_step_size'])
     steps = [int(s * max_iterations) for s in config['step_ratios']]
     return int(max_iterations), steps
+
+
+def read_train_file(txt_file):
+    initial_train_files = []
+    txtfile = open(txt_file, 'r')
+    lines = txtfile.readlines()
+    
+    for line in lines:
+        if line != "\n":
+            full_line = line.strip()
+            train_file = full_line.split(",")[0]
+            initial_train_files.append(train_file)
+    txtfile.close()
+    return initial_train_files 
     
 
 def prepare_all_dataset_randomly(rootdir, imgdir, classes, train_val_test_split, initial_datasize):
@@ -1477,7 +1491,31 @@ def prepare_initial_dataset(config):
     except Exception as e:
         print_exception(e)
         logger.error("Cannot create initial-datasets")
-        sys.exit("Closing application")     
+        sys.exit("Closing application")
+
+
+def prepare_initial_dataset_from_list(config, train_list):
+    try:
+        for i, (imgdir, name) in enumerate(zip([config['traindir'], config['valdir'], config['testdir']], ['train', 'val', 'test'])):
+            print("")
+            print("Processing {:s}-dataset: {:s}".format(name, imgdir))
+            rename_xml_files(imgdir)
+            images, annotations = list_files(imgdir)
+            print("{:d} images found!".format(len(images)))
+            print("{:d} annotations found!".format(len(annotations)))
+            write_file(config['dataroot'], images, name)
+
+            if name == "train":
+                check_json_presence(config, imgdir, train_list, name)
+                create_json(config['dataroot'], imgdir, train_list, config['classes'], name)
+            else:
+                check_json_presence(config, imgdir, images, name)
+                create_json(config['dataroot'], imgdir, images, config['classes'], name)
+
+    except Exception as e:
+        print_exception(e)
+        logger.error("Cannot create initial-datasets")
+        sys.exit("Closing application")      
 
 
 def update_train_dataset(config, cfg, train_list):
diff --git a/maskAL.py b/maskAL.py
index 6b517d6..dfe8c78 100644
--- a/maskAL.py
+++ b/maskAL.py
@@ -1,7 +1,7 @@
 # @Author: Pieter Blok
 # @Date:   2021-03-25 18:48:22
 # @Last Modified by:   Pieter Blok
-# @Last Modified time: 2021-11-22 16:11:35
+# @Last Modified time: 2022-01-27 18:00:39
 
 ## Active learning with Mask R-CNN
 
@@ -45,7 +45,7 @@
 
 ## libraries that are specific for dropout training
 from active_learning.strategies.dropout import FastRCNNConvFCHeadDropout, FastRCNNOutputLayersDropout, MaskRCNNConvUpsampleHeadDropout, Res5ROIHeadsDropout
-from active_learning.sampling import observations, prepare_initial_dataset, prepare_initial_dataset_randomly, update_train_dataset, prepare_complete_dataset, calculate_repeat_threshold, calculate_iterations
+from active_learning.sampling import observations, prepare_initial_dataset, prepare_initial_dataset_from_list, prepare_initial_dataset_randomly, update_train_dataset, prepare_complete_dataset, calculate_repeat_threshold, calculate_iterations, read_train_file
 from active_learning.sampling.montecarlo_dropout import MonteCarloDropout, MonteCarloDropoutHead
 from active_learning.heuristics import uncertainty
 
@@ -105,7 +105,7 @@ def check_pretrained_weights(field, value, error):
                     schema[key] = {'type': value1, 'allowed': ['steps_with_lrs', 'steps_with_decay', 'step', 'cosine_decay', 'exp_decay']}
                 elif key == "train_sampler":
                     schema[key] = {'type': value1, 'allowed': ['TrainingSampler', 'RepeatFactorTrainingSampler']}
-                elif key == "strategies":
+                elif key == "strategy":
                     schema[key] = {'type': value1, 'allowed': ['uncertainty', 'certainty', 'random']}
                 elif key == "mode":
                     schema[key] = {'type': value1, 'allowed': ['mean', 'min']}
@@ -132,7 +132,7 @@ def check_pretrained_weights(field, value, error):
                     if isinstance(value1, list):
                         if not all(isinstance(v, known_types[value1[0]]) for v in value):
                             error_list.update({key: ["not all items are of type: " + str(value1[0])]})
-                        if key == "strategies":
+                        if key == "strategy":
                             if not all(v in ['uncertainty', 'certainty', 'random'] for v in value):
                                 error_list.update({key: ["choose 1 of these 3 options: 'uncertainty', 'certainty', 'random'"]})
                         if key == "mode":
@@ -187,12 +187,12 @@ def init_folders_and_files(config):
     resultsfolders = []
     csv_names = []
 
-    counts = Counter(config['strategies'])
+    counts = Counter(config['strategy'])
     counts = list(counts.values())
     duplicates = any(x > 1 for x in counts)
     hybrid_count = 0
         
-    for s, (strategy, mode, dropout_probability, mcd_iterations, pool_size) in enumerate(zip(config['strategies'], config['mode'], config['dropout_probability'], config['mcd_iterations'], config['pool_size'])):
+    for strategy, mode, dropout_probability, mcd_iterations, pool_size in zip(config['strategy'], config['mode'], config['dropout_probability'], config['mcd_iterations'], config['pool_size']):
         if duplicates:
             if isinstance(pool_size, list):
                 hybrid_count += 1
@@ -226,25 +226,15 @@ def remove_initial_training_set(dataroot):
         os.remove(os.path.join(dataroot, "initial_train.txt"))
 
 
-def store_initial_files(cfg, config, dataset_dicts_train_init, val_value_init, weightsfolders):
-    for wf in range(len(weightsfolders)):
-        with open(os.path.join(weightsfolders[wf], "cfg_init.yaml"), "w") as f1:
-            f1.write(cfg.dump())
-        with open(os.path.join(weightsfolders[wf], 'val_value_init.pkl'), 'wb') as f2:
-            pickle.dump(val_value_init, f2)
+def store_initial_val_value(val_value_init, weightsfolder):
+    with open(os.path.join(weightsfolder, 'val_value_init.pkl'), 'wb') as f1:
+        pickle.dump(val_value_init, f1)
 
-    with open(os.path.join(config['dataroot'], 'dataset_dicts_train_init.pkl'), 'wb') as f3:
-        pickle.dump(dataset_dicts_train_init, f3)
 
-
-def load_initial_files(config, weightsfolders):
-    cfg = get_cfg()
-    cfg.merge_from_file(os.path.join(weightsfolders[0], "cfg_init.yaml"))
-    with open(os.path.join(weightsfolders[0], 'val_value_init.pkl'), 'rb') as f1:
+def load_initial_val_value(weightsfolder):
+    with open(os.path.join(weightsfolder, 'val_value_init.pkl'), 'rb') as f1:
         val_value_init = pickle.load(f1)
-    with open(os.path.join(config['dataroot'], 'dataset_dicts_train_init.pkl'), 'rb') as f2:
-        dataset_dicts_train_init = pickle.load(f2)
-    return cfg, dataset_dicts_train_init, val_value_init
+    return val_value_init
     
 
 def calculate_max_entropy(classes):
@@ -309,13 +299,11 @@ def move_initial_train_dir(initial_train_dir, traindir, export):
             copyfile(os.path.join(initial_train_dir, cur_file), os.path.join(traindir, cur_file))
 
 
-def copy_initial_weight_file(read_folder, weightsfolders, iter):
+def copy_initial_weight_file(read_folder, weightsfolder, iter):
     weight_file = "best_model_{:s}.pth".format(str(iter).zfill(3))
-    for wf in range(1, len(weightsfolders)):
-        write_folder = weightsfolders[wf]
-        check_direxcist(write_folder)
-        if os.path.exists(os.path.join(read_folder, weight_file)):
-            copyfile(os.path.join(read_folder, weight_file), os.path.join(write_folder, weight_file))
+    check_direxcist(weightsfolder)
+    if os.path.exists(os.path.join(read_folder, weight_file)):
+        copyfile(os.path.join(read_folder, weight_file), os.path.join(weightsfolder, weight_file))
         
 
 def copy_previous_weights(weights_folder, iteration):
@@ -326,7 +314,7 @@ def copy_previous_weights(weights_folder, iteration):
         copyfile(previous_weights_file, next_weights_file)
     
 
-def Train_MaskRCNN(config, weightsfolder, gpu_num, iter, val_value, dropout_probability, init):    
+def Train_MaskRCNN(config, weightsfolder, gpu_num, iter, val_value, dropout_probability, init=False, skip_training=False):    
     ## Hook to automatically save the best checkpoint
     class BestCheckpointer(HookBase):
         def __init__(self, iter, eval_period, val_value, metric):
@@ -459,9 +447,11 @@ def build_hooks(self):
     cfg.TEST.EVAL_PERIOD = config['eval_period']
     cfg.MODEL.ROI_HEADS.NUM_CLASSES = len(config['classes'])
     os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
-    trainer = CustomTrainer(cfg)
-    trainer.resume_or_load(resume=False)
-    trainer.train()
+
+    if not skip_training:
+        trainer = CustomTrainer(cfg)
+        trainer.resume_or_load(resume=False)
+        trainer.train()
 
     try:
         val_value_output = trainer.storage._latest_scalars['highest_value'][0]
@@ -471,7 +461,7 @@ def build_hooks(self):
     return cfg, dataset_dicts_train, val_value_output
 
 
-def Eval_MaskRCNN(cfg, config, dataset_dicts_train, weightsfolder, resultsfolder, csv_name, iter, init):      
+def Eval_MaskRCNN(cfg, config, dataset_dicts_train, weightsfolder, resultsfolder, csv_name, iter, init=False):      
     if init:
         register_coco_instances("test", {}, os.path.join(config['dataroot'], "test.json"), config['testdir'])
         test_metadata = MetadataCatalog.get("test")
@@ -621,7 +611,7 @@ def random_pooling(pool_list, pool_size, cfg, config, max_entropy, mcd_iteration
     if not config_ok: 
         sys.exit("Closing application")
 
-    config = process_config_file(config, ['strategies', 'mode', 'equal_pool_size', 'pool_size', 'dropout_probability', 'mcd_iterations', 'loops'])
+    config = process_config_file(config, ['strategy', 'mode', 'equal_pool_size', 'pool_size', 'dropout_probability', 'mcd_iterations', 'loops'])
     os.environ["CUDA_VISIBLE_DEVICES"] = config['cuda_visible_devices']
     gpu_num = len(config['cuda_visible_devices'])
     check_direxcist(config['dataroot'])
@@ -633,20 +623,27 @@ def random_pooling(pool_list, pool_size, cfg, config, max_entropy, mcd_iteration
         move_initial_train_dir(config['initial_train_dir'], config['traindir'], "images")
         prepare_initial_dataset(config)
         move_initial_train_dir(config['initial_train_dir'], config['traindir'], "annotations")
+    elif config['duplicate_initial_model_and_data']:
+        initial_train_files = read_train_file(config['initial_train_file'])
+        prepare_initial_dataset_from_list(config, initial_train_files)
     else:
         prepare_initial_dataset_randomly(config)
         
     ## active-learning
-    for i, (strategy, equal_pool_size, pool_size, mcd_iterations, mode, dropout_probability, loops, weightsfolder, resultsfolder, csv_name) in enumerate(zip(config['strategies'], config['equal_pool_size'], config['pool_size'], config['mcd_iterations'], config['mode'], config['dropout_probability'], config['loops'], weightsfolders, resultsfolders, csv_names)):
-        ## train and evaluate Mask R-CNN on the initial dataset
-        if i == 0:
-            cfg, dataset_dicts_train, val_value = Train_MaskRCNN(config, weightsfolder, gpu_num, 0, 0, dropout_probability, init=True)
-            cfg = Eval_MaskRCNN(cfg, config, dataset_dicts_train, weightsfolder, resultsfolder, csv_name, 0, init=True)
+    for strategy, equal_pool_size, pool_size, mcd_iterations, mode, dropout_probability, loops, weightsfolder, resultsfolder, csv_name in zip(config['strategy'], config['equal_pool_size'], config['pool_size'], config['mcd_iterations'], config['mode'], config['dropout_probability'], config['loops'], weightsfolders, resultsfolders, csv_names):
+        ## duplicate the initial model, when comparing the uncertainty sampling with the random sampling
+        if config['duplicate_initial_model_and_data']:
+            duplicated_weightsfolder = os.path.dirname(config['pretrained_weights'])
+            copy_initial_weight_file(duplicated_weightsfolder, weightsfolder, 0)
+            val_value_init = load_initial_val_value(duplicated_weightsfolder)
+            cfg, dataset_dicts_train, val_value = Train_MaskRCNN(config, weightsfolder, gpu_num, 1, val_value_init, dropout_probability, init=True, skip_training=True)
+        
+        ## train and evaluate Mask R-CNN on the randomly sampled initial dataset
         else:
-            initial_train_names = get_initial_train_names(config)
-            update_train_dataset(config, cfg, initial_train_names)
-            cfg, dataset_dicts_train, val_value = Train_MaskRCNN(config, weightsfolder, gpu_num, 0, 0, dropout_probability, init=False)
-            cfg = Eval_MaskRCNN(cfg, config, dataset_dicts_train, weightsfolder, resultsfolder, csv_name, 0, init=False)
+            cfg, dataset_dicts_train, val_value = Train_MaskRCNN(config, weightsfolder, gpu_num, 0, 0, dropout_probability, init=True)
+            store_initial_val_value(val_value, weightsfolder)
+        
+        cfg = Eval_MaskRCNN(cfg, config, dataset_dicts_train, weightsfolder, resultsfolder, csv_name, 0, init=True)
         train_names = get_train_names(dataset_dicts_train, config['traindir'])
         write_train_files(train_names, resultsfolder, 0)
         
diff --git a/maskAL.yaml b/maskAL.yaml
index c6d0f19..ca82bbb 100644
--- a/maskAL.yaml
+++ b/maskAL.yaml
@@ -6,12 +6,16 @@
 weightsroot: "./weights"
 resultsroot: "./results"
 dataroot: "./datasets"
-use_initial_train_dir: False
 initial_train_dir: "./datasets/initial_train"
 traindir: "./datasets/train"
 valdir: "./datasets/val"
 testdir: "./datasets/test"
 
+# data options
+use_initial_train_dir: False
+duplicate_initial_model_and_data: False
+initial_train_file: ""
+
 # network parameters
 network_config: COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml
 pretrained_weights: COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml
@@ -45,7 +49,7 @@ nms_threshold: 0.2
 
 # active-learning sampling
 experiment_name: 'exp1'
-strategies: 'uncertainty'
+strategy: 'uncertainty'
 mode: 'mean'
 initial_datasize: 100
 pool_size: 200
diff --git a/types.yaml b/types.yaml
index 1bd1d43..102e504 100644
--- a/types.yaml
+++ b/types.yaml
@@ -4,12 +4,16 @@
 weightsroot: 'string'
 resultsroot: 'string'
 dataroot: 'string'
-use_initial_train_dir: 'boolean'
 initial_train_dir: 'string'
 traindir: 'string'
 valdir: 'string'
 testdir: 'string'
 
+# data options
+use_initial_train_dir: 'boolean'
+duplicate_initial_model_and_data: 'boolean'
+initial_train_file: 'string'
+
 # network parameters
 network_config: 'string'
 pretrained_weights: 'string'
@@ -43,7 +47,7 @@ nms_threshold: 'float'
 
 # active-learning sampling
 experiment_name: 'string'
-strategies: ['string', 'list']
+strategy: ['string', 'list']
 mode: ['string', 'list']
 initial_datasize: 'integer'
 pool_size: ['integer', 'list']