Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Smarter prediction #21

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions bayesian_cnn_prometheus/evaluation/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ def get_lungs_bounding_box_coords(mask: np.ndarray) -> Tuple[slice, slice, slice

def load_lungs_mask(path: str) -> np.ndarray:
image = load_nifti_file(path)

return image.astype(bool).astype('int16')


Expand Down
16 changes: 16 additions & 0 deletions bayesian_cnn_prometheus/preprocessing/data_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ class DataGeneratorConfig:
chunk_size: Tuple[int, int, int]
should_shuffle: bool
should_augment: bool = True
cutoff_threshold: float = 0.9


class DataGenerator:
Expand All @@ -45,6 +46,8 @@ def __init__(self, preprocessing_config: Dict, batch_size: int):
self.config = config = DataGeneratorConfig(
**preprocessing_config['create_chunks'])

self.threshold = preprocessing_config.get('cutoff_threshold', None) or 0.9

self.data_splitter = DataSplitter(preprocessing_config['create_data_structure'],
preprocessing_config['update_healthy_patients_indices'])
self.dataset_structure = self.data_splitter.split_indices()
Expand Down Expand Up @@ -90,6 +93,10 @@ def _generate_data(self, dataset_type: str, batch_size: int):

for x_chunk, y_chunk in zip(self._generate_chunks(x_npy_norm, self.config.chunk_size, self.config.stride),
self._generate_chunks(y_npy, self.config.chunk_size, self.config.stride)):

if not self._is_mostly_lung_chunk(y_chunk):
continue

x_chunk = x_chunk.reshape((*x_chunk.shape, 1))
y_chunk = y_chunk.reshape((*y_chunk.shape, 1))

Expand All @@ -101,6 +108,15 @@ def _generate_data(self, dataset_type: str, batch_size: int):
images_chunks, targets_chunks = self._shuffle_chunks(images_chunks, targets_chunks)
yield np.array(images_chunks), np.array(targets_chunks)


def _is_mostly_lung_chunk(self, chunk_mask: np.array):
labels_sum_if_lung = np.ones(chunk_mask.shape).sum()
actual_sum = chunk_mask.sum()
return (actual_sum / labels_sum_if_lung) > self.threshold




def _image_flow(self, dataset_type: str):
for image_index in self.dataset_structure[dataset_type]:
x_npy, y_npy = self.image_loader.load(image_index)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,4 +44,4 @@
"0140",
"0600"
]
}
}
11 changes: 7 additions & 4 deletions bayesian_cnn_prometheus/preprocessing/image_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,13 @@ def load(self, image_index: str) -> Tuple[np.ndarray, np.ndarray]:
:param image_index: index of image to be transformed
:return: image and target as numpy arrays
"""
image_file_path, target_file_path = self._get_files_names(
image_file_path, target_file_path, lesion_mask_path = self._get_files_names(
image_index, 'nii.gz')
image = self._load_image(image_file_path)
mask = load_lungs_mask(str(target_file_path))
lungs_bounding_box = get_lungs_bounding_box_coords(mask)
return image[lungs_bounding_box], mask[lungs_bounding_box]
lesion_mask = self._load_nifti_as_npy(lesion_mask_path)
return image[lungs_bounding_box], (mask[lungs_bounding_box] - lesion_mask[lungs_bounding_box])

def _load_image(self, path):
if self.extension == 'nii.gz':
Expand All @@ -49,7 +50,7 @@ def _load_nifti_as_npy(nifti_file_path: Path) -> np.ndarray:
raise Exception(f'File {nifti_file_path} does not exist!')

@staticmethod
def _get_files_names(image_index: str, file_format: str) -> Tuple[Path, Path]:
def _get_files_names(image_index: str, file_format: str) -> Tuple[Path, Path, Path]:
"""
On the base of the image index generates paths to image and target arrays.
:param image_index: index of the image to be transformed
Expand All @@ -60,4 +61,6 @@ def _get_files_names(image_index: str, file_format: str) -> Tuple[Path, Path]:
f'{image_index:0>4}', file_format)
target_file_path = str(Paths.REFERENCE_SEGMENTATION_FILE_PATTERN_PATH).format(
f'{image_index:0>4}', file_format)
return Path(image_file_path), Path(target_file_path)
lesion_mask_path = str(Paths.MASK_FILE_PATTERN_PATH).format(
f'{image_index:0>4}', file_format)
return Path(image_file_path), Path(target_file_path), Path(lesion_mask_path)
44 changes: 14 additions & 30 deletions bayesian_cnn_prometheus/run_training_for_experiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from itertools import chain


EXPERIMENTS_DIR = Path('experiments')/'control_group'
EXPERIMENTS_DIR = Path('experiments')/'correct_data_augmentation'
EXPERIMENTS_DIR = str(EXPERIMENTS_DIR)


Expand Down Expand Up @@ -146,49 +146,33 @@ def parse():


if __name__ == '__main__':
stride_exp = {
'name': 'stride_change', # Assumption: smaller stride will improve model quality because model will see more data,
# and more importantly it will see siimilar data in different contexts

exp = {
'name': 'chunk_change', # Assumption: bigger window will see more context and be able to get more precise results
'overrides': [
{
'alias': 's',
'key': 'preprocessing.create_chunks.stride',
'key': 'preprocessing.create_chunks.chunk_size',
'values': [
[64, 8, 8],
[64, 16, 16],
[128, 16, 16],
[128, 32, 32]
[128, 16, 16]
]
},
{
'alias': 'cs',
'key': 'preprocessing.create_chunks.chunk_size',
'values': [[128, 16, 16]]
}
],
}

chunk_exp = {
'name': 'chunk_change', # Assumption: bigger window will see more context and be able to get more precise results
'overrides': [
{
'alias': 's',
'key': 'preprocessing.create_chunks.chunk_size',
'key': 'preprocessing.create_chunks.stride',
'values': [
[4, 256, 4],
[8, 256, 8],
[32, 64, 32],
[8, 128, 32]
[64,64,32],
# [128,16,16]
]
},
{
'alias': 'cs',
'key': 'preprocessing.create_chunks.stride',
'values': [[16, 64, 16]]
}
'alias': 'th',
'key': 'preprocessing.create_chunks.cutoff_threshold',
'values': [0.9]
},
],
}
experiments = [chunk_exp]
experiments = [exp]
args = Args.parse()
experiments = list(chain(*[ExperimentSetup.from_accumulated_dict(e) for e in experiments]))
run_tests(experiments, args.is_local_execution)
4 changes: 2 additions & 2 deletions run_python_script_TEMPLATE.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,13 @@
#SBATCH --mem-per-cpu=15GB

## Maksymalny czas trwania zlecenia (format HH:MM:SS)
#SBATCH --time=12:00:00
#SBATCH --time=70:00:00

## Nazwa grantu do rozliczenia zużycia zasobów
#SBATCH -A plgonwelo

## Specyfikacja partycji
#SBATCH -p plgrid-gpu-v100
#SBATCH -p plgrid-gpu
#SBATCH --gres=gpu

## Plik ze standardowym wyjściem
Expand Down