Skip to content
This repository has been archived by the owner on Jan 7, 2025. It is now read-only.

Supporting DICOM via pydicom #1136

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions digits/dataset/images/classification/job.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ def json_dict(self, verbose=False):
"image_width": t.image_dims[0],
"image_height": t.image_dims[1],
"image_channels": t.image_dims[2],
"image_bpp": t.resize_bpp,
"backend": t.backend,
"encoding": t.encoding,
"compression": t.compression,
Expand Down
9 changes: 8 additions & 1 deletion digits/dataset/images/classification/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ def from_folders(job, form):
backend = backend,
image_dims = job.image_dims,
resize_mode = job.resize_mode,
resize_bpp = job.resize_bpp,
encoding = encoding,
compression = compression,
mean_file = utils.constants.MEAN_FILE_CAFFE,
Expand All @@ -126,6 +127,7 @@ def from_folders(job, form):
backend = backend,
image_dims = job.image_dims,
resize_mode = job.resize_mode,
resize_bpp = job.resize_bpp,
encoding = encoding,
compression = compression,
labels_file = job.labels_file,
Expand All @@ -142,6 +144,7 @@ def from_folders(job, form):
backend = backend,
image_dims = job.image_dims,
resize_mode = job.resize_mode,
resize_bpp = job.resize_bpp,
encoding = encoding,
compression = compression,
labels_file = job.labels_file,
Expand Down Expand Up @@ -188,6 +191,7 @@ def from_files(job, form):
image_dims = job.image_dims,
image_folder= image_folder,
resize_mode = job.resize_mode,
resize_bpp = job.resize.bpp,
encoding = encoding,
compression = compression,
mean_file = utils.constants.MEAN_FILE_CAFFE,
Expand Down Expand Up @@ -220,6 +224,7 @@ def from_files(job, form):
image_dims = job.image_dims,
image_folder= image_folder,
resize_mode = job.resize_mode,
resize_bpp = job.resize_bpp,
encoding = encoding,
compression = compression,
labels_file = job.labels_file,
Expand Down Expand Up @@ -251,6 +256,7 @@ def from_files(job, form):
image_dims = job.image_dims,
image_folder= image_folder,
resize_mode = job.resize_mode,
resize_bpp = job.resize_bpp,
encoding = encoding,
compression = compression,
labels_file = job.labels_file,
Expand Down Expand Up @@ -303,7 +309,8 @@ def create():
int(form.resize_width.data),
int(form.resize_channels.data),
),
resize_mode = form.resize_mode.data
resize_mode = form.resize_mode.data,
resize_bpp = int(form.resize_bpp.data)
)

if form.method.data == 'folder':
Expand Down
5 changes: 5 additions & 0 deletions digits/dataset/images/forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,8 @@ class ImageDatasetForm(DatasetForm):
choices=ImageDatasetJob.resize_mode_choices(),
tooltip = "Options for dealing with aspect ratio changes during resize. See examples below."
)
resize_bpp = utils.forms.SelectField(u'Bits per pixel',
default='8',
choices=[('8', '8-bit (color or grayscale)'), ('32', '32-bit floating point (grayscale only)')],
tooltip="Storing 32-bit floating point for certain medical images."
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

may be useful to have a more verbose tooltip to state the fact that bit depth is only checked, not enforced through conversion

)
1 change: 1 addition & 0 deletions digits/dataset/images/job.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ def __init__(self, **kwargs):
"""
self.image_dims = kwargs.pop('image_dims', None)
self.resize_mode = kwargs.pop('resize_mode', None)
self.resize_bpp = kwargs.pop('resize_bpp', None)

super(ImageDatasetJob, self).__init__(**kwargs)
self.pickver_job_dataset_image = PICKLE_VERSION
Expand Down
2 changes: 2 additions & 0 deletions digits/dataset/tasks/create_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ def __init__(self, input_file, db_name, backend, image_dims, **kwargs):
self.shuffle = kwargs.pop('shuffle', True)
self.resize_mode = kwargs.pop('resize_mode' , None)
self.encoding = kwargs.pop('encoding', None)
self.resize_bpp = kwargs.pop('resize_bpp', None)
self.compression = kwargs.pop('compression', None)
self.mean_file = kwargs.pop('mean_file', None)
self.labels_file = kwargs.pop('labels_file', None)
Expand Down Expand Up @@ -147,6 +148,7 @@ def task_arguments(self, resources, env):
'--backend=%s' % self.backend,
'--channels=%s' % self.image_dims[2],
'--resize_mode=%s' % self.resize_mode,
'--resize_bpp=%s' % self.resize_bpp
]

if self.mean_file is not None:
Expand Down
7 changes: 7 additions & 0 deletions digits/templates/datasets/images/classification/new.html
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,13 @@ <h1>New Image Classification Dataset</h1>
{{ form.resize_mode(class='form-control') }}
</div>
</div>
<div class="row">
<div class="form-group{{mark_errors([form.resize_bpp])}}">
{{ form.resize_bpp.label }}
{{ form.resize_bpp.tooltip }}
{{ form.resize_bpp(class='form-control') }}
</div>
</div>
<div class="row">
<a class="btn btn-info" href="#" onClick="return showResizeExample();">See example</a>
<br>
Expand Down
2 changes: 2 additions & 0 deletions digits/templates/datasets/images/classification/show.html
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ <h4>Job Information</h4>
<dd>{{'Color' if job.image_dims[2] == 3 else 'Grayscale'}}</dd>
<dt>Resize Transformation</dt>
<dd>{{ job.resize_mode_name() }}</dd>
<dt>Image bit-depth</dt>
<dd>{{ job.resize_bpp }}</dd>
<dt>DB Backend</dt>
<dd>{{job.get_backend()}}</dd>
<dt>Image Encoding</dt>
Expand Down
2 changes: 2 additions & 0 deletions digits/templates/datasets/images/classification/summary.html
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ <h4>
GRAYSCALE
{% endif %}
</dd>
<dt>Image bit-depth</dt>
<dd>{{dataset.resize_bpp}}</dd>
<dt>DB backend</dt>
<dd>{{dataset.get_backend()}}</dd>
{% for task in dataset.create_db_tasks() %}
Expand Down
1 change: 1 addition & 0 deletions digits/templates/datasets/images/generic/summary.html
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ <h4>
<li><b>Image Count</b> - {{task.image_count}}</li>
<li><b>Image Dimensions</b> -
{{task.image_width}}x{{task.image_height}}x{{task.image_channels}}</li>
<li><b>Image bit-depth</b></li>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this useful?

Copy link
Contributor Author

@IsaacYangSLA IsaacYangSLA Oct 6, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I missed your point. That line of code is complete nonsense. I removed it in the latest commit.

</ul>
{% endfor %}
</ul>
Expand Down
70 changes: 60 additions & 10 deletions digits/tools/create_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,7 @@ def create_db(input_file, output_dir,
image_width, image_height, image_channels,
backend,
resize_mode = None,
resize_bpp = None,
image_folder = None,
shuffle = True,
mean_files = None,
Expand Down Expand Up @@ -241,6 +242,8 @@ def create_db(input_file, output_dir,
raise ValueError('invalid number of channels')
if resize_mode not in [None, 'crop', 'squash', 'fill', 'half_crop']:
raise ValueError('invalid resize_mode')
if resize_bpp not in [None, '8', '32']:
raise ValueError('invalid resize_bpp')
if image_folder is not None and not os.path.exists(image_folder):
raise ValueError('image_folder does not exist')
if mean_files:
Expand Down Expand Up @@ -269,11 +272,14 @@ def create_db(input_file, output_dir,
write_queue = Queue.Queue(2*batch_size)
summary_queue = Queue.Queue()

# Init helper function for notification between threads
_notification(reset=True)

for _ in xrange(num_threads):
p = threading.Thread(target=_load_thread,
args=(load_queue, write_queue, summary_queue,
image_width, image_height, image_channels,
resize_mode, image_folder, compute_mean),
resize_mode, image_folder, compute_mean, resize_bpp),
kwargs={'backend': backend,
'encoding': kwargs.get('encoding', None)},
)
Expand Down Expand Up @@ -331,6 +337,9 @@ def _create_lmdb(image_count, write_queue, batch_size, output_dir,

processed_something = False

if _notification():
break

if not summary_queue.empty():
result_count, result_sum = summary_queue.get()
images_loaded += result_count
Expand Down Expand Up @@ -360,6 +369,9 @@ def _create_lmdb(image_count, write_queue, batch_size, output_dir,
_write_batch_lmdb(db, batch, images_written)
images_written += len(batch)

if _notification():
raise WriteError('. '.join(_notification()))

if images_loaded == 0:
raise LoadError('no images loaded from input file')
logger.debug('%s images loaded' % images_loaded)
Expand Down Expand Up @@ -498,6 +510,28 @@ def _fill_load_queue(filename, queue, shuffle):

return valid_lines

def _notification(reset=False, message=None):
"""

Args:
reset: clear the message list if True
message: the error message

Returns:
False: if no message stored and not reset
The messages (a list): if some messages stored
"""
if not reset:
if message is None:
if len(_notification.messages) == 0:
return False
else:
return _notification.messages
else:
_notification.messages.append(message)
else:
_notification.messages = list()

def _parse_line(line, distribution):
"""
Parse a line in the input file into (path, label)
Expand Down Expand Up @@ -541,7 +575,7 @@ def _calculate_num_threads(batch_size, shuffle):

def _load_thread(load_queue, write_queue, summary_queue,
image_width, image_height, image_channels,
resize_mode, image_folder, compute_mean,
resize_mode, image_folder, compute_mean, resize_bpp,
backend=None, encoding=None):
"""
Consumes items in load_queue
Expand Down Expand Up @@ -574,17 +608,21 @@ def _load_thread(load_queue, write_queue, summary_queue,
image_height, image_width,
channels = image_channels,
resize_mode = resize_mode,
resize_bpp = resize_bpp
)

if compute_mean:
image_sum += image

if backend == 'lmdb':
datum = _array_to_datum(image, label, encoding)
write_queue.put(datum)
else:
write_queue.put((image, label))

try:
if backend == 'lmdb':
datum = _array_to_datum(image, label, encoding)
write_queue.put(datum)
else:
write_queue.put((image, label))
except IOError as e: # report error to user (possibly save 16-bit image to PNG/JPG)
_notification(message=e.message)
break
images_added += 1

summary_queue.put((images_added, image_sum))
Expand Down Expand Up @@ -616,6 +654,8 @@ def _array_to_datum(image, label, encoding):
image = image[np.newaxis,:,:]
else:
raise Exception('Image has unrecognized shape: "%s"' % image.shape)
if np.issubdtype(image.dtype, float):
image = image.astype(float)
datum = caffe.io.array_to_datum(image, label)
else:
datum = caffe_pb2.Datum()
Expand Down Expand Up @@ -667,7 +707,7 @@ def _save_means(image_sum, image_count, mean_files):
"""
Save mean[s] to file
"""
mean = np.around(image_sum / image_count).astype(np.uint8)
mean = np.around(image_sum / image_count).astype(np.float)
for mean_file in mean_files:
if mean_file.lower().endswith('.npy'):
np.save(mean_file, mean)
Expand All @@ -693,7 +733,13 @@ def _save_means(image_sum, image_count, mean_files):
with open(mean_file, 'wb') as outfile:
outfile.write(blob.SerializeToString())
elif mean_file.lower().endswith(('.jpg', '.jpeg', '.png')):
image = PIL.Image.fromarray(mean)
#ensure pixel range is within supported format
if mean.max() < 256: # works for three formats
image = PIL.Image.fromarray(mean.astype(np.uint8))
elif mean_file.lower().endswith('.png'): # png supports higher color depth
image = PIL.Image.fromarray(mean).convert('I')
else: # reduce color depth for jpg or jpeg
image = PIL.Image.fromarray(mean*255/mean.max()).convert('L')
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can't we resort to 16-bit png in this case?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are right. I don't think any pixel in mean can be greater than 65535. We should be able to do that for PNG case.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@gheinrich , it looks like if the max value is greater than 255, but much smaller than 65536 (for example, around 800), and we save it to png file. That image is almost black. The reason, I believe, is the dynamic range of 16-bit PNG is much higher than mean pixel value. We can either scale it up to full dynamic range of 16-bit PNG, or scale it down to 8-bit png/jpg. Both look the same on screen.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Indeed it's impossible to know how to best visualize data on server side. This is something we discussed with @jmancewicz. Joe thinks the best way to deal with this is to have the user choose the amount of contrast, etc. though sliders on client side. I agree with him.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That sounds a good idea.

image.save(mean_file)
else:
logger.warning('Unrecognized file extension for mean file: "%s"' % mean_file)
Expand Down Expand Up @@ -730,6 +776,9 @@ def _save_means(image_sum, image_count, mean_files):
parser.add_argument('-r', '--resize_mode',
help='resize mode for images (must be "crop", "squash" [default], "fill" or "half_crop")'
)
parser.add_argument('--resize_bpp',
help='bit per pixel for resized images (must be 8 (color/grayscale) or 32 (grayscale only)")'
)
parser.add_argument('-m', '--mean_file', action='append',
help="location to output the image mean (doesn't save mean if not specified)")
parser.add_argument('-f', '--image_folder',
Expand Down Expand Up @@ -766,6 +815,7 @@ def _save_means(image_sum, image_count, mean_files):
args['width'], args['height'], args['channels'],
args['backend'],
resize_mode = args['resize_mode'],
resize_bpp = args['resize_bpp'],
image_folder = args['image_folder'],
shuffle = args['shuffle'],
mean_files = args['mean_file'],
Expand Down
Loading