From da0f1a38929e7568af0f385645466489239d28a8 Mon Sep 17 00:00:00 2001 From: Dylan Hall Date: Fri, 6 Sep 2024 09:43:15 -0400 Subject: [PATCH] latest updates before rebase --- run_coherent_eyes.sh | 78 ++++++++++++++++++++++++-- src/main/python/coherent-data/dicom.py | 12 ++-- 2 files changed, 77 insertions(+), 13 deletions(-) diff --git a/run_coherent_eyes.sh b/run_coherent_eyes.sh index b4ac2854e..c5794e019 100755 --- a/run_coherent_eyes.sh +++ b/run_coherent_eyes.sh @@ -1,11 +1,77 @@ -#!/bin/sh +#!/bin/bash basedir=`pwd` -#./run_synthea -p 10 -a 55-70 -k keep_diabetes.json -fm src/test/resources/flexporter/eyes_on_fhir.yaml -# pre-processed files are now in ./output +base_run_synthea () { + ./run_synthea -a 55-70 \ + -fm src/test/resources/flexporter/eyes_on_fhir.yaml \ + --exporter.baseDirectory=$outputfolder \ + -s $seed \ + --exporter.years_of_history=$years_of_history \ + --generate.log_patients.detail=none \ + --generate.only_alive_patients=true \ + --generate.max_attempts_to_keep_patient=2000 \ + "$@" \ + $location +} -cd src/main/python/coherent-data/ -source ./venv/bin/activate -./venv/bin/python associate_images.py ${basedir}/images/fundus_index.csv ${basedir}/images/oct_index.csv ${basedir}/output/fhir --clean --output ${basedir}/coherent_eyes \ No newline at end of file +run_population () { + popcount=$1 + + base_run_synthea -p $((popcount / 4)) -k keep_diabetes_no_dr.json + base_run_synthea -p $((popcount / 2)) -k keep_npdr_no_pdr.json + base_run_synthea -p $((popcount / 4)) -k keep_pdr.json +} + + +#rm -rf output + +# all populations have: +# 25% diabetes but no DR +# 50% NPDR, no PDR +# 25% PDR +# This is not a realistic proportion, but there's not much point in including a lot of records that have no relevant data +# Also, the total population run is 10x the target so we can downselect. +# We want records with a recent diagnosis, for 2 reasons. +# 1) DR treatment is modeled per current standards (2024). +# Treatment from say the 80s would have been a lot different and we're not trying to model that. +# We minimize anachronism by picking records where things happen when they are supposed to. +# 2) File size. Treatment loops and images add a lot of data, so making those start later means the files don't get as crazy large. + +# population 1 +# 1000 records with 5-year history and only relevant conditions enabled +outputfolder="./output_population1000" +seed=12345 +location=Massachusetts +years_of_history=5 +run_population 10000 + +# population 2 +# 100 records with 5 year history and all conditions enabled +outputfolder="./output_population100" +seed=98765 +location=Virginia +years_of_history=5 +run_population 1000 + +# population 3 +# 5-10 curated records with full history and all conditions enabled +outputfolder="./output_population10" +seed=4444 +location=Washington +years_of_history=0 +run_population 1000 + + +# cd src/main/python/coherent-data/ +# source ./venv/bin/activate + +# ./venv/bin/python associate_images.py ${basedir}/images/fundus_index.csv ${basedir}/images/oct_index.csv ${basedir}/output/fhir --clean --output ${basedir}/coherent_eyes + +# # ./venv/bin/python associate_images.py ${basedir}/images/fundus_index.csv ${basedir}/images/oct_index.csv ${basedir}/samples --clean --output ${basedir}/coherent_eyes + +# rm ${basedir}/dicom_errors.txt + +# validate_iods --verbose /Users/dehall/synthea/nei/coherent_eyes/dicom/Annabel185_Lettie611_Fisher429_af88404e-aad1-c9cb-3e7f-07daf0e44eac_fundus_1.2.840.99999999.10633938.1562002233954_1.2.840.99999999.1.1.99330560.1562002233954.dcm > ${basedir}/dicom_errors.txt +# validate_iods --verbose /Users/dehall/synthea/nei/coherent_eyes/dicom/Annabel185_Lettie611_Fisher429_af88404e-aad1-c9cb-3e7f-07daf0e44eac_OCT_1.2.840.99999999.11240513.1609790227954_1.2.840.99999999.1.1.66970829.1609790227954.dcm >> ${basedir}/dicom_errors.txt \ No newline at end of file diff --git a/src/main/python/coherent-data/dicom.py b/src/main/python/coherent-data/dicom.py index c3b5fb3fa..17b3a3556 100644 --- a/src/main/python/coherent-data/dicom.py +++ b/src/main/python/coherent-data/dicom.py @@ -42,7 +42,9 @@ def create_dataset_common(image, imaging_study, context): ds.SeriesNumber = '1' ds.AcquisitionNumber = '1' ds.InstanceNumber = str(context['instance']['number']) - ds.ImageLaterality = 'L' if context['laterality'] == 'OS' else 'OD' + laterality = 'L' if context['laterality'] == 'OS' else 'R' + ds.ImageLaterality = laterality + ds.ImageLaterality = laterality ds.AccessionNumber = '' ds.PupilDilated = 'YES' @@ -123,7 +125,7 @@ def create_oct_dicom(image, imaging_study, context): ds.AcquisitionDuration = 0.0 ds.FrameOfReferenceUID = '1.2.392.200106.1651.6.2.1.20231214124222' - ds.ImageLaterality = 'R' + ds.SynchronizationFrameOfReferenceUID = '1.2.392.200106.1651.6.2.1803921148151.3911546542' ds.SOPInstanceUIDOfConcatenationSource = '1.2.392.200106.1651.6.2.1803921148151.45272.2.2' ds.PositionReferenceIndicator = '' @@ -257,9 +259,6 @@ def create_oct_dicom(image, imaging_study, context): # Frame Content Sequence: Frame Content 1 frame_content1 = Dataset() frame_content_sequence.append(frame_content1) - frame_content1.FrameAcquisitionDateTime = '' - frame_content1.FrameReferenceDateTime = '' - frame_content1.FrameAcquisitionDuration = None frame_content1.StackID = '1' frame_content1.InStackPositionNumber = 1 frame_content1.DimensionIndexValues = [1, 1] @@ -340,7 +339,7 @@ def create_fundus_dicom(image, imaging_study, context): ds = create_dataset_common(image, imaging_study, context) ds.file_meta = file_meta ds.SpecificCharacterSet = 'ISO_IR 100' - ds.ImageType = ['ORIGINAL', 'PRIMARY', '3D WIDE'] + ds.ImageType = ['ORIGINAL', 'PRIMARY', 'COLOR'] ds.SOPClassUID = '1.2.840.10008.5.1.4.1.1.77.1.5.1' ds.SOPInstanceUID = '1.2.392.200106.1651.6.2.1803921148151.3911546542.14' @@ -397,7 +396,6 @@ def create_fundus_dicom(image, imaging_study, context): ds.NumberOfFrames = '1' ds.FrameIncrementPointer = (0x0018, 0x1063) - ds.PixelSpacing = [0, 0] ds.BitsAllocated = 8 ds.BitsStored = 8 ds.HighBit = 7