From 77e3aad9e2b8bbf8b60de3f4c720b89e8aed70a2 Mon Sep 17 00:00:00 2001 From: Dylan Hall Date: Fri, 13 Sep 2024 13:38:55 -0400 Subject: [PATCH] complete run through of run_coherent_eyes --- run_coherent_eyes.sh | 30 ++++---- .../python/coherent-data/associate_images.py | 72 ++++++++++--------- .../python/coherent-data/requirements.txt | 2 +- 3 files changed, 53 insertions(+), 51 deletions(-) diff --git a/run_coherent_eyes.sh b/run_coherent_eyes.sh index 144c7fdf9..9510fb84d 100755 --- a/run_coherent_eyes.sh +++ b/run_coherent_eyes.sh @@ -3,6 +3,13 @@ set -e basedir=`pwd` +rm -rf selected1000/ selected100/ selected10/ + +python3 -m venv ./venv/ +source ./venv/bin/activate +python3 -m pip install -r src/main/python/coherent-data/requirements.txt + + base_run_synthea () { ./run_synthea -a 55-70 \ -fm src/test/resources/flexporter/eyes_on_fhir.yaml \ @@ -28,13 +35,6 @@ run_population () { base_run_synthea -p $((popcount / 4)) -k keep_pdr.json } - -python3 -m venv ./venv/ -source ./venv/bin/activate -python3 -m pip install -r src/main/python/coherent-data/requirements.txt - -rm -rf selected1000/ selected100/ selected10/ - # all populations have: # 25% diabetes but no DR # 50% NPDR, no PDR @@ -100,15 +100,13 @@ mkdir selected10 ./copy.sh selected_files10.txt selected10/ cp output_population10/fhir/*Information*.json selected10 +## IMPORTANT: this last one was manually curated, I put results are in the subfolder keep/ -# cd src/main/python/coherent-data/ -# source ./venv/bin/activate - -# ./venv/bin/python associate_images.py ${basedir}/images/fundus_index.csv ${basedir}/images/oct_index.csv ${basedir}/output/fhir --clean --output ${basedir}/coherent_eyes - -# # ./venv/bin/python associate_images.py ${basedir}/images/fundus_index.csv ${basedir}/images/oct_index.csv ${basedir}/samples --clean --output ${basedir}/coherent_eyes +cd src/main/python/coherent-data/ -# rm ${basedir}/dicom_errors.txt +./venv/bin/python associate_images.py ${basedir}/images/Model1_250step/index.csv ${basedir}/images/oct_index.csv ${basedir}/selected10/keep --clean --add_dup_images --output ${basedir}/coherent_eyes10 +./venv/bin/python associate_images.py ${basedir}/images/Model1_250step/index.csv ${basedir}/images/oct_index.csv ${basedir}/selected100/ --clean --image_limit 2 --output ${basedir}/coherent_eyes100 +./venv/bin/python associate_images.py ${basedir}/images/Model1_250step/index.csv ${basedir}/images/oct_index.csv ${basedir}/selected1000/ --clean --image_limit 1 --reuse_images --output ${basedir}/coherent_eyes1000 -# validate_iods --verbose /Users/dehall/synthea/nei/coherent_eyes/dicom/Annabel185_Lettie611_Fisher429_af88404e-aad1-c9cb-3e7f-07daf0e44eac_fundus_1.2.840.99999999.10633938.1562002233954_1.2.840.99999999.1.1.99330560.1562002233954.dcm > ${basedir}/dicom_errors.txt -# validate_iods --verbose /Users/dehall/synthea/nei/coherent_eyes/dicom/Annabel185_Lettie611_Fisher429_af88404e-aad1-c9cb-3e7f-07daf0e44eac_OCT_1.2.840.99999999.11240513.1609790227954_1.2.840.99999999.1.1.66970829.1609790227954.dcm >> ${basedir}/dicom_errors.txt \ No newline at end of file +# Note tool to validate dicoms: +# validate_iods --verbose coherent_eyes/dicom/Annabel185_Lettie611_Fisher429_af88404e-aad1-c9cb-3e7f-07daf0e44eac_fundus_1.2.840.99999999.10633938.1562002233954_1.2.840.99999999.1.1.99330560.1562002233954.dcm > ${basedir}/dicom_errors.txt diff --git a/src/main/python/coherent-data/associate_images.py b/src/main/python/coherent-data/associate_images.py index 51be59c8a..284465703 100644 --- a/src/main/python/coherent-data/associate_images.py +++ b/src/main/python/coherent-data/associate_images.py @@ -57,6 +57,25 @@ def parse_args(): default="./output", help="Output directory", ) + parser.add_argument( + "--reuse_images", + dest="reuse_images", + action="store_true", + help="Reuse images between patients", + ) + parser.add_argument( + "--add_dup_images", + dest="add_dup_images", + action="store_true", + help="Add DICOM and FHIR Media for duplicate images (eg, when there's more than one ImagingStudy with no change in disease state)", + ) + parser.add_argument( + "--image_limit", + dest="image_limit", + type=float, + default=float('inf'), + help="Maximum number of images to associate, default: no limit", + ) args = parser.parse_args() return args @@ -81,7 +100,12 @@ def main(): for file in fhir_jsons: if 'hospitalInformation' in file or 'practitionerInformation' in file: continue - process_file(file, fundus_index, oct_index, args.output) + process_file(file, fundus_index, oct_index, args.output, args) + + if args.reuse_images: + fundus_index['selected'] = False + oct_index['selected'] = False + def clean(output): outputpath = Path(output) @@ -94,7 +118,7 @@ def clean(output): (outputpath / '.keep').touch() -def process_file(file, fundus_index, oct_index, output): +def process_file(file, fundus_index, oct_index, output, args): print(f"Processing {file}") with open(file) as f: bundle = json.load(f) @@ -104,6 +128,7 @@ def process_file(file, fundus_index, oct_index, output): diag_reports = [] diagnoses = { 'npdr': None, 'pdr': None, 'dme': None } observations = [] + added_img_count = 0 for entry in bundle['entry']: resource = entry['resource'] @@ -136,14 +161,14 @@ def process_file(file, fundus_index, oct_index, output): if not imaging_studies: return - # import pdb; pdb.set_trace() - - # print(f"Found {len(imaging_studies)} imaging studies") previous_context = None previous_image = { 'OCT': [None, None], 'fundus': [None, None] } for i in range(len(imaging_studies)): + if added_img_count > args.image_limit: + break + imaging_study = imaging_studies[i] diag_report = diag_reports[i] # these should always be 1:1 @@ -164,6 +189,9 @@ def process_file(file, fundus_index, oct_index, output): previous_image[img_type][index] = None continue + if not args.add_dup_images and image == previous_image[img_type][index]: + continue + dicom = create_dicom(image, imaging_study, context) dicom_uid = imaging_study['identifier'][0]['value'][8:] # cut off urn:uuid: instance_uid = context['instance']['uid'] @@ -176,6 +204,7 @@ def process_file(file, fundus_index, oct_index, output): media = create_fhir_media(context, imaging_study, image, dicom) bundle['entry'].append(wrap_in_entry(media)) previous_image[img_type][index] = image + added_img_count = added_img_count + 1 previous_context = context @@ -249,6 +278,7 @@ def pick_image(fundus_index, oct_index, context): index.at[selected.index[0], 'selected'] = True path = selected['File Path'].iat[0] + print(f"Loading image from {path}") image = Image.open(path) return image @@ -259,43 +289,17 @@ def filter_oct_index(oct_index, context): # CNV = Choroidal neovascularization # DME = diabetic macular edema - if context['dme']: + if context['dme'] or context['pdr']: oct_index = oct_index[oct_index['Class'] == 'DME'] - elif context['pdr']: - oct_index = oct_index[oct_index['Class'] == 'CNV'] else: oct_index = oct_index[oct_index['Class'] == 'Normal'] return oct_index -# def filter_fundus_index(fundus_index, context): -# # fundus_index items are 0/1 -# # DR = diabetic retinopathy -# # MH = macular hole -# # DN = ?? -# # BRVO = Branch Retinal Vein Occlusion -# # ODC = Optic Disc Coloboma? -# # ODE = Optic disc edema? -# # (there were more but i deleted all columns with all 0s) - -# if context['npdr']: -# fundus_index = fundus_index[fundus_index['DR'] == '1'] -# else: -# fundus_index = fundus_index[fundus_index['DR'] == '0'] - -# return fundus_index - def filter_fundus_index(fundus_index, context): - # Retinopathy grade = lines up to our stages - # Risk of macular edema = unclear. seems like 0 = no DME, 1/2 = DME - - fundus_index = fundus_index[fundus_index['Retinopathy grade'] == context['dr_stage']] - - if context['dme']: - fundus_index = fundus_index[fundus_index['Risk of macular edema'] != '0'] - else: - fundus_index = fundus_index[fundus_index['Risk of macular edema'] == '0'] + # dr_stage = lines up to our stages + fundus_index = fundus_index[fundus_index['dr_stage'] == context['dr_stage']] return fundus_index diff --git a/src/main/python/coherent-data/requirements.txt b/src/main/python/coherent-data/requirements.txt index 812f5e577..5f8c55155 100644 --- a/src/main/python/coherent-data/requirements.txt +++ b/src/main/python/coherent-data/requirements.txt @@ -1,4 +1,4 @@ pandas==1.5.3 numpy==1.26.4 pillow==10.2.0 -pydicom==2.4.4 \ No newline at end of file +pydicom==3.0.0 \ No newline at end of file