aI-lab-glider · Qwebeck · Feb 11, 2022 · Feb 12, 2022
diff --git a/bayesian_cnn_prometheus/evaluation/utils.py b/bayesian_cnn_prometheus/evaluation/utils.py
@@ -19,6 +19,7 @@ def get_lungs_bounding_box_coords(mask: np.ndarray) -> Tuple[slice, slice, slice
 
 def load_lungs_mask(path: str) -> np.ndarray:
     image = load_nifti_file(path)
+
     return image.astype(bool).astype('int16')
 
 

diff --git a/bayesian_cnn_prometheus/preprocessing/data_generator.py b/bayesian_cnn_prometheus/preprocessing/data_generator.py
@@ -29,6 +29,7 @@ class DataGeneratorConfig:
     chunk_size: Tuple[int, int, int]
     should_shuffle: bool
     should_augment: bool = True
+    cutoff_threshold: float = 0.9
 
 
 class DataGenerator:
@@ -45,6 +46,8 @@ def __init__(self, preprocessing_config: Dict, batch_size: int):
         self.config = config = DataGeneratorConfig(
             **preprocessing_config['create_chunks'])
 
+        self.threshold = preprocessing_config.get('cutoff_threshold', None) or 0.9
+
         self.data_splitter = DataSplitter(preprocessing_config['create_data_structure'],
                                           preprocessing_config['update_healthy_patients_indices'])
         self.dataset_structure = self.data_splitter.split_indices()
@@ -90,6 +93,10 @@ def _generate_data(self, dataset_type: str, batch_size: int):
 
             for x_chunk, y_chunk in zip(self._generate_chunks(x_npy_norm, self.config.chunk_size, self.config.stride),
                                         self._generate_chunks(y_npy, self.config.chunk_size, self.config.stride)):
+
+                if not self._is_mostly_lung_chunk(y_chunk):
+                    continue
+
                 x_chunk = x_chunk.reshape((*x_chunk.shape, 1))
                 y_chunk = y_chunk.reshape((*y_chunk.shape, 1))
 
@@ -101,6 +108,15 @@ def _generate_data(self, dataset_type: str, batch_size: int):
                         images_chunks, targets_chunks = self._shuffle_chunks(images_chunks, targets_chunks)
                     yield np.array(images_chunks), np.array(targets_chunks)
 
+
+    def _is_mostly_lung_chunk(self, chunk_mask: np.array):
+        labels_sum_if_lung = np.ones(chunk_mask.shape).sum()
+        actual_sum = chunk_mask.sum()
+        return (actual_sum / labels_sum_if_lung) > self.threshold
+
+
+
+
     def _image_flow(self, dataset_type: str):
         for image_index in self.dataset_structure[dataset_type]:
             x_npy, y_npy = self.image_loader.load(image_index)

diff --git a/bayesian_cnn_prometheus/preprocessing/healthy_patients_indices.json b/bayesian_cnn_prometheus/preprocessing/healthy_patients_indices.json
@@ -44,4 +44,4 @@
     "0140",
     "0600"
   ]
-}
+}
diff --git a/bayesian_cnn_prometheus/preprocessing/image_loader.py b/bayesian_cnn_prometheus/preprocessing/image_loader.py
@@ -19,12 +19,13 @@ def load(self, image_index: str) -> Tuple[np.ndarray, np.ndarray]:
         :param image_index: index of image to be transformed
         :return: image and target as numpy arrays
         """
-        image_file_path, target_file_path = self._get_files_names(
+        image_file_path, target_file_path, lesion_mask_path = self._get_files_names(
             image_index, 'nii.gz')
         image = self._load_image(image_file_path)
         mask = load_lungs_mask(str(target_file_path))
         lungs_bounding_box = get_lungs_bounding_box_coords(mask)
-        return image[lungs_bounding_box], mask[lungs_bounding_box] 
+        lesion_mask = self._load_nifti_as_npy(lesion_mask_path)
+        return image[lungs_bounding_box], (mask[lungs_bounding_box] - lesion_mask[lungs_bounding_box])
 
     def _load_image(self, path):
         if self.extension == 'nii.gz':
@@ -49,7 +50,7 @@ def _load_nifti_as_npy(nifti_file_path: Path) -> np.ndarray:
             raise Exception(f'File {nifti_file_path} does not exist!')
 
     @staticmethod
-    def _get_files_names(image_index: str, file_format: str) -> Tuple[Path, Path]:
+    def _get_files_names(image_index: str, file_format: str) -> Tuple[Path, Path, Path]:
         """
         On the base of the image index generates paths to image and target arrays.
         :param image_index: index of the image to be transformed
@@ -60,4 +61,6 @@ def _get_files_names(image_index: str, file_format: str) -> Tuple[Path, Path]:
             f'{image_index:0>4}', file_format)
         target_file_path = str(Paths.REFERENCE_SEGMENTATION_FILE_PATTERN_PATH).format(
             f'{image_index:0>4}', file_format)
-        return Path(image_file_path), Path(target_file_path)
+        lesion_mask_path = str(Paths.MASK_FILE_PATTERN_PATH).format(
+            f'{image_index:0>4}', file_format)
+        return Path(image_file_path), Path(target_file_path), Path(lesion_mask_path)
diff --git a/bayesian_cnn_prometheus/run_training_for_experiments.py b/bayesian_cnn_prometheus/run_training_for_experiments.py
@@ -13,7 +13,7 @@
 from itertools import chain
 
 
-EXPERIMENTS_DIR = Path('experiments')/'control_group'
+EXPERIMENTS_DIR = Path('experiments')/'correct_data_augmentation'
 EXPERIMENTS_DIR = str(EXPERIMENTS_DIR)
 
 
@@ -146,49 +146,33 @@ def parse():
 
 
 if __name__ == '__main__':
-    stride_exp = {
-        'name': 'stride_change', # Assumption: smaller stride will improve model quality because model will see more data,
-        #  and more importantly it will see siimilar data in different contexts
+
+    exp = {
+        'name': 'chunk_change', # Assumption: bigger window will see more context and be able to get more precise results
         'overrides': [
             {
                 'alias': 's',
-                'key': 'preprocessing.create_chunks.stride',
+                'key': 'preprocessing.create_chunks.chunk_size',
                 'values': [
-                    [64, 8, 8],
-                    [64, 16, 16], 
-                    [128, 16, 16], 
-                    [128, 32, 32]
+                    [128, 16, 16]
                 ]
             },
             {
                 'alias': 'cs',
-                'key': 'preprocessing.create_chunks.chunk_size',
-                'values': [[128, 16, 16]]
-            }
-        ],
-    }
-
-    chunk_exp = {
-        'name': 'chunk_change', # Assumption: bigger window will see more context and be able to get more precise results
-        'overrides': [
-            {
-                'alias': 's',
-                'key': 'preprocessing.create_chunks.chunk_size',
+                'key': 'preprocessing.create_chunks.stride',
                 'values': [
-                    [4, 256, 4],
-                    [8, 256, 8], 
-                    [32, 64, 32], 
-                    [8, 128, 32]
+                    [64,64,32],
+                    # [128,16,16]
                 ]
             },
             {
-                'alias': 'cs',
-                'key': 'preprocessing.create_chunks.stride',
-                'values': [[16, 64, 16]]
-            }
+                'alias': 'th',
+                'key': 'preprocessing.create_chunks.cutoff_threshold',
+                'values': [0.9]
+            },
         ],
     }
-    experiments = [chunk_exp]
+    experiments = [exp]
     args = Args.parse()
     experiments = list(chain(*[ExperimentSetup.from_accumulated_dict(e) for e in experiments]))
     run_tests(experiments, args.is_local_execution)
diff --git a/run_python_script_TEMPLATE.sh b/run_python_script_TEMPLATE.sh
@@ -13,13 +13,13 @@
 #SBATCH --mem-per-cpu=15GB
 
 ## Maksymalny czas trwania zlecenia (format HH:MM:SS)
-#SBATCH --time=12:00:00
+#SBATCH --time=70:00:00
 
 ## Nazwa grantu do rozliczenia zużycia zasobów
 #SBATCH -A plgonwelo
 
 ## Specyfikacja partycji
-#SBATCH -p plgrid-gpu-v100
+#SBATCH -p plgrid-gpu
 #SBATCH --gres=gpu
 
 ## Plik ze standardowym wyjściem
Original file line number	Diff line number	Diff line change
Expand Up		@@ -19,6 +19,7 @@ def get_lungs_bounding_box_coords(mask: np.ndarray) -> Tuple[slice, slice, slice

		def load_lungs_mask(path: str) -> np.ndarray:
		image = load_nifti_file(path)

		return image.astype(bool).astype('int16')


Expand Down
-Original file line number
+Diff line change
@@ Expand Up / @@ -44,4 +44,4 @@ @@
         "0140",
         "0600"
       ]
-    }
+    }