Skip to content

Commit

Permalink
compress DICOM using dcmtk
Browse files Browse the repository at this point in the history
  • Loading branch information
Sandip117 committed Sep 6, 2024
1 parent 8c5fa3e commit eae6f9c
Show file tree
Hide file tree
Showing 5 changed files with 262 additions and 24 deletions.
3 changes: 3 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ WORKDIR /usr/local/src/pl-dicommake

COPY requirements.txt .
RUN pip install -r requirements.txt
RUN apt-get update \
&& apt-get install dcmtk -y


COPY . .
ARG extras_require=none
Expand Down
59 changes: 41 additions & 18 deletions dicommake.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python

from email.mime import image
from jobController import jobber
from pathlib import Path
from argparse import ArgumentParser, Namespace, ArgumentDefaultsHelpFormatter

Expand All @@ -19,7 +19,7 @@
from PIL import Image
import numpy as np
from loguru import logger
from pydicom.uid import ExplicitVRLittleEndian,RLELossless
from pydicom.uid import ExplicitVRLittleEndian

LOG = logger.debug
logger_format = (
Expand All @@ -36,7 +36,7 @@



__version__ = '2.3.4'
__version__ = '2.3.6'

DISPLAY_TITLE = r"""
_ _ _ _
Expand Down Expand Up @@ -103,7 +103,7 @@ def preamble_show(options: Namespace) -> None:
LOG("%25s: [%s]" % (k, v))
LOG("")

def image_intoDICOMinsert(image: Image.Image, ds: pydicom.Dataset, compress: bool) -> pydicom.Dataset:
def image_intoDICOMinsert(image: Image.Image, ds: pydicom.Dataset) -> pydicom.Dataset:
"""
Insert the "image" into the DICOM chassis "ds" and update/adapt
DICOM tags where necessary. Also create a new
Expand Down Expand Up @@ -159,11 +159,6 @@ def npimage_get(image):

# NB! If this is not set, images will not render properly in Cornerstone
ds.PlanarConfiguration = 0

if compress:
# Compress the pixel data
ds.compress(RLELossless)
ds.file_meta.TransferSyntaxUID = RLELossless
return ds

def doubly_map(x: PathMapper, y: PathMapper) -> Iterable[tuple[Path, Path, Path, Path]]:
Expand Down Expand Up @@ -345,6 +340,28 @@ def imageNames_areSame(imgfile:Path, dcmfile:Path) -> bool:
"""
return True if imgfile.stem == dcmfile.stem else False

def compress_DICOM(image: Image.Image, ds: pydicom.Dataset, op_path: str):
"""
Compress the final DICOM to JPEG lossless encoding using
`dcmcjpeg` , which is a library available in the `dcmtk`
package.
"""
tmp_path = '/tmp/uncompressed.dcm'
image_intoDICOMinsert(image, ds).save_as(tmp_path)
LOG(f"Compressing final DICOM as {op_path}")
shell = jobber({'verbosity': 1, 'noJobLogging': True})
str_cmd = (f"dcmcjpeg"
f" {tmp_path}"
f" {op_path}")

d_response = shell.job_run(str_cmd)
LOG(f"Command: {d_response['cmd']}")
if d_response['returncode']:
LOG(f"Error: {d_response['stderr']}")
raise Exception(d_response["stderr"])
else:
LOG("Response: File compressed successfully.")

def imagePaths_process(*args) -> None:
"""
The input *args is a tuple that contains three
Expand All @@ -354,23 +371,29 @@ def imagePaths_process(*args) -> None:
unpack the arguments in either case.
"""
try:
dcm_in:Path = args[0][0]
img_in:Path = args[0][1]
dcm_out:Path = args[0][2]
dcm_in:Path = args[0][0]
img_in:Path = args[0][1]
dcm_out:Path = args[0][2]
b_compress:bool = args[0][3]
except:
dcm_in:Path = args[0]
img_in:Path = args[1]
dcm_out:Path = args[2]
dcm_in:Path = args[0]
img_in:Path = args[1]
dcm_out:Path = args[2]
b_compress: bool = args[3]

if imageNames_areSame(img_in, dcm_in):
image:Image.Image = Image.open(str(img_in))
DICOM:pydicom.Dataset = pydicom.dcmread(str(dcm_in))
LOG("Processing %s using %s" % (dcm_in.name, img_in.name))
image_intoDICOMinsert(image, DICOM, b_compress).save_as(str(dcm_out))

if b_compress:
compress_DICOM(image, DICOM, str(dcm_out))
else:
image_intoDICOMinsert(image, DICOM).save_as(str(dcm_out))

LOG("Saved %s" % dcm_out)


@chris_plugin(
parser = parser,
title = 'DICOM image make',
Expand Down Expand Up @@ -413,8 +436,8 @@ def main(options: Namespace, inputdir: Path, outputdir: Path) -> int:
for _ in results:
pass
else:
for dcm_in, img_in, dcm_out in mapper:
imagePaths_process(dcm_in, img_in, dcm_out, options.compress)
for dcm_in, img_in, dcm_out, compress in mapper:
imagePaths_process(dcm_in, img_in, dcm_out, compress)

return 0

Expand Down
216 changes: 216 additions & 0 deletions jobController.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
str_description = """
This module provides some very simple shell-based job running
methods.
"""


import subprocess
import os
import pudb
import json
import time
from pathlib import Path
from datetime import datetime
import uuid
import ast

class jobber:

def __init__(self, d_args : dict):
"""Constructor for the jobber class.
Args:
d_args (dict): a dictionary of "arguments" (parameters) for the
object.
"""
self.args = d_args.copy()
self.execCmd:Path = Path('somefile.cmd')
self.histlogPath:Path = Path('/tmp')
if not 'verbosity' in self.args.keys(): self.args['verbosity'] = 0
if not 'noJobLogging' in self.args.keys(): self.args['noJobLogging'] = False

def v2JSONcli(self, v:str) -> str:
"""
attempts to convert a JSON string serialization explicitly into a string
with enclosed single quotes. If the input is not a valid JSON string, simply
return it unchanged.
An input string of
'{ "key1": "value1", "key2": N, "key3": true }'
is explicitly enclosed in embedded single quotes:
'\'{ "key1": "value1", "key2": N, "key3": true }\''
args:
v(str): a value to process
returns:
str: cli equivalent string.
"""
vb:str = ""
try:
d_dict = json.loads(v)
vb = f"'{v}'"
except:
vb = '%s' % v
return vb

def dict2cli(self, d_dict : dict) -> str:
"""convert a dictionary into a cli conformant json string.
an input dictionary of
{
'key1': 'value1',
'key2': 'value2',
'key3': true,
'key4': false
}
is converted to a string:
"--key1 value1 --key2 value2 --key3"
args:
d_dict (dict): a python dictionary to convert
returns:
str: cli equivalent string.
"""
str_cli : str = ""
for k,v in d_dict.items():
if type(v) == bool:
if v:
str_cli += '--%s ' % k
elif len(v):
v = self.v2JSONcli(v)
str_cli += '--%s %s ' % (k, v)
return str_cli

def job_run(self, str_cmd: str):
"""
running some cli process via python is cumbersome. the typical/easy
path of
os.system(str_cmd)
is deprecated and prone to hidden complexity. The preferred
method is via subprocess, which has a cumbersome processing
syntax. Still, this method runs the `str_cmd` and returns the
stderr and stdout strings as well as a returncode.
Providing readtime output of both stdout and stderr seems
problematic. The approach here is to provide realtime
output on stdout and only provide stderr on process completion.
"""
d_ret : dict = {
'stdout': "",
'stderr': "",
'cmd': "",
'cwd': "",
'returncode': 0
}
str_stdoutline : str = ""
str_stdout : str = ""

p = subprocess.Popen(
str_cmd.split(),
stdout = subprocess.PIPE,
stderr = subprocess.PIPE,
)

# realtime output on stdout
while True:
stdout = p.stdout.readline()
if p.poll() is not None:
break
if stdout:
str_stdoutline = stdout.decode()
if int(self.args['verbosity']):
print(str_stdoutline, end = '')
str_stdout += str_stdoutline
d_ret['cmd'] = str_cmd
d_ret['cwd'] = os.getcwd()
d_ret['stdout'] = str_stdout
d_ret['stderr'] = p.stderr.read().decode()
d_ret['returncode'] = p.returncode
if int(self.args['verbosity']) and len(d_ret['stderr']):
print('\nstderr: \n%s' % d_ret['stderr'])
return d_ret

def job_runbg(self, str_cmd : str) -> dict:
"""run a job in the background.
after much (probably unecessary pain) the best solution seemed to
be:
* create a shell script on the fs that contains the
<str_cmd> and a "&"
* run the shell script in subprocess.popen
args:
str_cmd (str): cli string to run
returns:
dict: a dictionary of exec state
"""

def txscript_content(message:str) -> str:
str_script:str = ""
str_script = f"""#!/bin/bash
{message}
"""
str_script = ''.join(str_script.split(r'\r'))
return str_script

def txscript_save(str_content) -> None:
with open(self.execCmd, "w") as f:
f.write(f'%s' % str_content)
self.execCmd.chmod(0o755)

def execstr_build(input:Path) -> str:
""" the histlogPath might have spaces, esp on non-Linux systems """
ret:str = ""
t_parts:tuple = input.parts
ret = '/'.join(['"{0}"'.format(arg) if ' ' in arg else arg for arg in t_parts])
return ret

baseFileName:str = f"job-{uuid.uuid4().hex}"
self.execCmd = logHistoryPath_create() / Path(baseFileName + ".sh")
d_ret:dict = {
'uid' : "",
'cmd' : "",
'cwd' : "",
'script' : self.execCmd
}
# pudb.set_trace()
str_cmd += " &"
txscript_save(txscript_content(str_cmd))
execCmd:str = execstr_build(self.execCmd)
process = subprocess.Popen(
execCmd.split(),
stdout = subprocess.PIPE,
stderr = subprocess.PIPE,
close_fds = True
)
#self.execCmd.unlink()
d_ret['uid'] = str(os.getuid())
d_ret['cmd'] = str_cmd
d_ret['cwd'] = os.getcwd()
return d_ret

def job_stdwrite(self, d_job : dict, str_outputDir : str, str_prefix : str = "") -> dict:
"""
Capture the d_job entries to respective files.
"""
if not self.args['noJobLogging']:
for key in d_job.keys():
with open(
'%s/%s%s' % (str_outputDir, str_prefix, key), "w"
) as f:
f.write(str(d_job[key]))
f.close()
return {
'status': True
}
6 changes: 1 addition & 5 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,4 @@ numpy
loguru
pftag==1.2.22
pflog==1.2.26
pftel-client
# for bug fix on transfer syntax errors
pylibjpeg
pylibjpeg-libjpeg
python-gdcm
pftel-client
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def get_version(rel_path: str) -> str:
author='FNNDSC',
author_email='[email protected]',
url='https://github.com/FNNDSC/pl-dicommake',
py_modules=['dicommake'],
py_modules=['dicommake','jobController'],
install_requires=['chris_plugin'],
license='MIT',
entry_points={
Expand Down

0 comments on commit eae6f9c

Please sign in to comment.