option to do a comparative experiment on the same initial dataset wit…

…h the same initial model
pieterblok · Jan 29, 2022 · 215beee · 215beee
1 parent 9534b15
commit 215beee
Show file tree

Hide file tree

Showing 8 changed files with 222 additions and 51 deletions.
diff --git a/MISC_SETTINGS.md b/MISC_SETTINGS.md
@@ -2,6 +2,8 @@ The following settings can probably stay unchanged: <br/> <br/>
 
 | Setting        			| Description        													|
 | --------------------------------------|-----------------------------------------------------------------------------------------------------------------------|
+| duplicate_initial_model_and_data	| Experimental mode: set this to **True** when you want to duplicate a previously trained model and dataset with a new settings-file. Default **False**	|
+| initial_train_file			| When **duplicate_initial_model_and_data** set to True, then specify the txt-file with the initial dataset. 		|
 | transfer_learning_on_previous_models	| Whether to use the weight-files of the previous trainings for transfer-learning					|
 | warmup_iterations			| The number of warmup-iterations that can be used to stabilize the training process 			 		|
 | train_iterations_base			| The number of training iterations to start the training with (this number of training iterations is used when the total number of training images is below the value of **step_image_number**)								 			 		|
@@ -19,7 +21,7 @@ The following settings can probably stay unchanged: <br/> <br/>
 | minority_classes 			| Only when the **"RepeatFactorTrainingSampler"** is used: specify the minority-classes that have to be repeated	|
 | repeat_factor_smallest_class		| Only when the **"RepeatFactorTrainingSampler"** is used: specify the repeat-factor of the smallest class (use a value higher than 1.0 to repeat the minority classes)																	|
 | experiment_name			| Specify the name of your experiment											|
-| strategies				| Use **'uncertainty'** to select the most uncertain images for the active learning. Other options are **'random'** and **'certainty'** |
+| strategy				| Use **'uncertainty'** to select the most uncertain images for the active learning. Other options are **'random'** and **'certainty'** |
 | mode					| Uncertainty sampling method. Use **'mean'** when you want to sample the most uncertain images, use **'min'** when you want to sample the most uncertain instances																	|
 | equal_pool_size			| When **True** this will sample the same **pool_size** for every sampling iteration. When **False**, an unequal **pool_size** will be sampled for the specified number of loops															|
 | dropout_probability			| Specify the dropout probability between 0.1 and 0.9. Our experiments indicated that **0.25** is a good value		|

diff --git a/README.md b/README.md
@@ -51,11 +51,11 @@ Change the following settings in the maskAL.yaml file: <br/>
 | weightsroot	        | The file directory where the weight-files are stored											|
 | resultsroot		| The file directory where the result-files are stored 											|
 | dataroot	 	| The root directory where all image-files are stored											|
-| use_initial_train_dir | Set this to **True** when you want to start the active-learning from an initial training dataset. When **False**, the initial dataset of size **initial_datasize** is randomly sampled from the **traindir** 																	|
 | initial_train_dir     | When use_initial_train_dir is activated: the file directory where the initial training images and annotations are stored		|
 | traindir	 	| The file directory where the training images and annotations are stored								|
 | valdir	 	| The file directory where the validation images and annotations are stored								|
 | testdir	 	| The file directory where the test images and annotations are stored									|
+| use_initial_train_dir | Set this to **True** when you want to start the active-learning from an initial training dataset. When **False**, the initial dataset of size **initial_datasize** is randomly sampled from the **traindir** 																	|
 | network_config	| The Mask R-CNN configuration-file (.yaml) file (see the folder './configs')								|
 | pretrained_weights	| The pretrained weights to start the active-learning. Either specify the **network_config** (.yaml) or a custom weights-file (.pth or .pkl)|
 | cuda_visible_devices 	| The identifiers of the CUDA device(s) you want to use for training and sampling (in string format, for example: '0,1')		|

diff --git a/active_learning/config/maskAL_experiment_random.yaml b/active_learning/config/maskAL_experiment_random.yaml
@@ -0,0 +1,63 @@
+# Parameters for maskAL
+# python maskAL.py --config maskAL.yaml
+# explanation can be found here: https://git.wur.nl/blok012/maskAL
+
+# folders
+weightsroot: "./weights"
+resultsroot: "./results"
+dataroot: "./datasets/experiment/random"
+initial_train_dir: "./datasets"
+traindir: "./datasets/train"
+valdir: "./datasets/val"
+testdir: "./datasets/test03"
+
+# data options
+use_initial_train_dir: False
+duplicate_initial_model_and_data: True
+initial_train_file: "./results/experiment/uncertainty/trainfiles_iteration000.txt"
+
+# network parameters
+network_config: COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml
+pretrained_weights: "./weights/experiment/uncertainty/best_model_000.pth"
+
+# training-parameters
+cuda_visible_devices: '1'
+classes: ['healthy', 'damaged', 'matured', 'cateye', 'headrot']
+transfer_learning_on_previous_models: True
+learning_rate: 0.005
+warmup_iterations: 500
+train_iterations_base: 2500
+train_iterations_step_size: 2500
+step_image_number: 500
+step_ratios: [0.5, 0.8]
+eval_period: 500
+checkpoint_period: -1
+weight_decay: 0.0001
+learning_policy: 'steps_with_decay'
+gamma: 0.1
+train_batch_size: 1
+num_workers: 2
+
+# train-sampler
+train_sampler: "RepeatFactorTrainingSampler"
+minority_classes: ['damaged', 'matured', 'cateye', 'headrot']
+repeat_factor_smallest_class: 1.5
+
+# evaluation-parameters
+confidence_threshold: 0.5
+nms_threshold: 0.01
+
+# active-learning sampling
+experiment_name: 'experiment'
+strategy: 'random'
+mode: 'mean'
+initial_datasize: 100
+pool_size: 200
+equal_pool_size: True
+loops: 5
+dropout_probability: 0.25
+mcd_iterations: 20
+iou_thres: 0.5
+auto_annotate: False
+export_format: labelme
+supervisely_meta_json: "./datasets/meta.json"
diff --git a/active_learning/config/maskAL_experiment_uncertainty.yaml b/active_learning/config/maskAL_experiment_uncertainty.yaml
@@ -0,0 +1,63 @@
+# Parameters for maskAL
+# python maskAL.py --config maskAL.yaml
+# explanation can be found here: https://git.wur.nl/blok012/maskAL
+
+# folders
+weightsroot: "./weights"
+resultsroot: "./results"
+dataroot: "./datasets/experiment/uncertainty"
+initial_train_dir: "./datasets"
+traindir: "./datasets/train"
+valdir: "./datasets/val"
+testdir: "./datasets/test03"
+
+# data options
+use_initial_train_dir: False
+duplicate_initial_model_and_data: False
+initial_train_file: ""
+
+# network parameters
+network_config: COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml
+pretrained_weights: COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml
+
+# training-parameters
+cuda_visible_devices: '0'
+classes: ['healthy', 'damaged', 'matured', 'cateye', 'headrot']
+transfer_learning_on_previous_models: True
+learning_rate: 0.005
+warmup_iterations: 500
+train_iterations_base: 2500
+train_iterations_step_size: 2500
+step_image_number: 500
+step_ratios: [0.5, 0.8]
+eval_period: 500
+checkpoint_period: -1
+weight_decay: 0.0001
+learning_policy: 'steps_with_decay'
+gamma: 0.1
+train_batch_size: 1
+num_workers: 2
+
+# train-sampler
+train_sampler: "RepeatFactorTrainingSampler"
+minority_classes: ['damaged', 'matured', 'cateye', 'headrot']
+repeat_factor_smallest_class: 1.5
+
+# evaluation-parameters
+confidence_threshold: 0.5
+nms_threshold: 0.01
+
+# active-learning sampling
+experiment_name: 'experiment'
+strategy: 'uncertainty'
+mode: 'mean'
+initial_datasize: 100
+pool_size: 200
+equal_pool_size: True
+loops: 5
+dropout_probability: 0.25
+mcd_iterations: 20
+iou_thres: 0.5
+auto_annotate: False
+export_format: labelme
+supervisely_meta_json: "./datasets/meta.json"
diff --git a/active_learning/sampling/prepare_dataset.py b/active_learning/sampling/prepare_dataset.py
@@ -1,7 +1,7 @@
 # @Author: Pieter Blok
 # @Date:   2021-03-26 14:30:31
 # @Last Modified by:   Pieter Blok
-# @Last Modified time: 2021-12-09 13:55:46
+# @Last Modified time: 2022-01-27 15:00:20
 
 import sys
 import io
@@ -1394,6 +1394,20 @@ def calculate_iterations(config, dataset_dicts_train):
         max_iterations = config['train_iterations_base'] + ((div_factor - 1) * config['train_iterations_step_size'])
     steps = [int(s * max_iterations) for s in config['step_ratios']]
     return int(max_iterations), steps
+
+
+def read_train_file(txt_file):
+    initial_train_files = []
+    txtfile = open(txt_file, 'r')
+    lines = txtfile.readlines()
+
+    for line in lines:
+        if line != "\n":
+            full_line = line.strip()
+            train_file = full_line.split(",")[0]
+            initial_train_files.append(train_file)
+    txtfile.close()
+    return initial_train_files 
 
 
 def prepare_all_dataset_randomly(rootdir, imgdir, classes, train_val_test_split, initial_datasize):
@@ -1477,7 +1491,31 @@ def prepare_initial_dataset(config):
     except Exception as e:
         print_exception(e)
         logger.error("Cannot create initial-datasets")
-        sys.exit("Closing application")     
+        sys.exit("Closing application")
+
+
+def prepare_initial_dataset_from_list(config, train_list):
+    try:
+        for i, (imgdir, name) in enumerate(zip([config['traindir'], config['valdir'], config['testdir']], ['train', 'val', 'test'])):
+            print("")
+            print("Processing {:s}-dataset: {:s}".format(name, imgdir))
+            rename_xml_files(imgdir)
+            images, annotations = list_files(imgdir)
+            print("{:d} images found!".format(len(images)))
+            print("{:d} annotations found!".format(len(annotations)))
+            write_file(config['dataroot'], images, name)
+
+            if name == "train":
+                check_json_presence(config, imgdir, train_list, name)
+                create_json(config['dataroot'], imgdir, train_list, config['classes'], name)
+            else:
+                check_json_presence(config, imgdir, images, name)
+                create_json(config['dataroot'], imgdir, images, config['classes'], name)
+
+    except Exception as e:
+        print_exception(e)
+        logger.error("Cannot create initial-datasets")
+        sys.exit("Closing application")      
 
 
 def update_train_dataset(config, cfg, train_list):