Skip to content

Commit

Permalink
latest updates before rebase
Browse files Browse the repository at this point in the history
  • Loading branch information
dehall committed Sep 6, 2024
1 parent 6c21bc7 commit da0f1a3
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 13 deletions.
78 changes: 72 additions & 6 deletions run_coherent_eyes.sh
Original file line number Diff line number Diff line change
@@ -1,11 +1,77 @@
#!/bin/sh
#!/bin/bash

basedir=`pwd`

#./run_synthea -p 10 -a 55-70 -k keep_diabetes.json -fm src/test/resources/flexporter/eyes_on_fhir.yaml
# pre-processed files are now in ./output
base_run_synthea () {
./run_synthea -a 55-70 \
-fm src/test/resources/flexporter/eyes_on_fhir.yaml \
--exporter.baseDirectory=$outputfolder \
-s $seed \
--exporter.years_of_history=$years_of_history \
--generate.log_patients.detail=none \
--generate.only_alive_patients=true \
--generate.max_attempts_to_keep_patient=2000 \
"$@" \
$location
}

cd src/main/python/coherent-data/
source ./venv/bin/activate

./venv/bin/python associate_images.py ${basedir}/images/fundus_index.csv ${basedir}/images/oct_index.csv ${basedir}/output/fhir --clean --output ${basedir}/coherent_eyes
run_population () {
popcount=$1

base_run_synthea -p $((popcount / 4)) -k keep_diabetes_no_dr.json
base_run_synthea -p $((popcount / 2)) -k keep_npdr_no_pdr.json
base_run_synthea -p $((popcount / 4)) -k keep_pdr.json
}


#rm -rf output

# all populations have:
# 25% diabetes but no DR
# 50% NPDR, no PDR
# 25% PDR
# This is not a realistic proportion, but there's not much point in including a lot of records that have no relevant data
# Also, the total population run is 10x the target so we can downselect.
# We want records with a recent diagnosis, for 2 reasons.
# 1) DR treatment is modeled per current standards (2024).
# Treatment from say the 80s would have been a lot different and we're not trying to model that.
# We minimize anachronism by picking records where things happen when they are supposed to.
# 2) File size. Treatment loops and images add a lot of data, so making those start later means the files don't get as crazy large.

# population 1
# 1000 records with 5-year history and only relevant conditions enabled
outputfolder="./output_population1000"
seed=12345
location=Massachusetts
years_of_history=5
run_population 10000

# population 2
# 100 records with 5 year history and all conditions enabled
outputfolder="./output_population100"
seed=98765
location=Virginia
years_of_history=5
run_population 1000

# population 3
# 5-10 curated records with full history and all conditions enabled
outputfolder="./output_population10"
seed=4444
location=Washington
years_of_history=0
run_population 1000


# cd src/main/python/coherent-data/
# source ./venv/bin/activate

# ./venv/bin/python associate_images.py ${basedir}/images/fundus_index.csv ${basedir}/images/oct_index.csv ${basedir}/output/fhir --clean --output ${basedir}/coherent_eyes

# # ./venv/bin/python associate_images.py ${basedir}/images/fundus_index.csv ${basedir}/images/oct_index.csv ${basedir}/samples --clean --output ${basedir}/coherent_eyes

# rm ${basedir}/dicom_errors.txt

# validate_iods --verbose /Users/dehall/synthea/nei/coherent_eyes/dicom/Annabel185_Lettie611_Fisher429_af88404e-aad1-c9cb-3e7f-07daf0e44eac_fundus_1.2.840.99999999.10633938.1562002233954_1.2.840.99999999.1.1.99330560.1562002233954.dcm > ${basedir}/dicom_errors.txt
# validate_iods --verbose /Users/dehall/synthea/nei/coherent_eyes/dicom/Annabel185_Lettie611_Fisher429_af88404e-aad1-c9cb-3e7f-07daf0e44eac_OCT_1.2.840.99999999.11240513.1609790227954_1.2.840.99999999.1.1.66970829.1609790227954.dcm >> ${basedir}/dicom_errors.txt
12 changes: 5 additions & 7 deletions src/main/python/coherent-data/dicom.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,9 @@ def create_dataset_common(image, imaging_study, context):
ds.SeriesNumber = '1'
ds.AcquisitionNumber = '1'
ds.InstanceNumber = str(context['instance']['number'])
ds.ImageLaterality = 'L' if context['laterality'] == 'OS' else 'OD'
laterality = 'L' if context['laterality'] == 'OS' else 'R'
ds.ImageLaterality = laterality
ds.ImageLaterality = laterality
ds.AccessionNumber = ''

ds.PupilDilated = 'YES'
Expand Down Expand Up @@ -123,7 +125,7 @@ def create_oct_dicom(image, imaging_study, context):
ds.AcquisitionDuration = 0.0

ds.FrameOfReferenceUID = '1.2.392.200106.1651.6.2.1.20231214124222'
ds.ImageLaterality = 'R'

ds.SynchronizationFrameOfReferenceUID = '1.2.392.200106.1651.6.2.1803921148151.3911546542'
ds.SOPInstanceUIDOfConcatenationSource = '1.2.392.200106.1651.6.2.1803921148151.45272.2.2'
ds.PositionReferenceIndicator = ''
Expand Down Expand Up @@ -257,9 +259,6 @@ def create_oct_dicom(image, imaging_study, context):
# Frame Content Sequence: Frame Content 1
frame_content1 = Dataset()
frame_content_sequence.append(frame_content1)
frame_content1.FrameAcquisitionDateTime = ''
frame_content1.FrameReferenceDateTime = ''
frame_content1.FrameAcquisitionDuration = None
frame_content1.StackID = '1'
frame_content1.InStackPositionNumber = 1
frame_content1.DimensionIndexValues = [1, 1]
Expand Down Expand Up @@ -340,7 +339,7 @@ def create_fundus_dicom(image, imaging_study, context):
ds = create_dataset_common(image, imaging_study, context)
ds.file_meta = file_meta
ds.SpecificCharacterSet = 'ISO_IR 100'
ds.ImageType = ['ORIGINAL', 'PRIMARY', '3D WIDE']
ds.ImageType = ['ORIGINAL', 'PRIMARY', 'COLOR']
ds.SOPClassUID = '1.2.840.10008.5.1.4.1.1.77.1.5.1'
ds.SOPInstanceUID = '1.2.392.200106.1651.6.2.1803921148151.3911546542.14'

Expand Down Expand Up @@ -397,7 +396,6 @@ def create_fundus_dicom(image, imaging_study, context):
ds.NumberOfFrames = '1'
ds.FrameIncrementPointer = (0x0018, 0x1063)

ds.PixelSpacing = [0, 0]
ds.BitsAllocated = 8
ds.BitsStored = 8
ds.HighBit = 7
Expand Down

0 comments on commit da0f1a3

Please sign in to comment.