From f142af8fe89041ee2902c65467020a7a644d2799 Mon Sep 17 00:00:00 2001 From: WillForan Date: Thu, 24 Oct 2024 21:58:51 -0400 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20dcmmeta2tsv.py:=20slower=20but=20CS?= =?UTF-8?q?A=20capiable=20replacment=20for=20dicom=5Fhinfo?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 00_build_db.bash | 48 ++++++++++++++++++++-------------------- 01_txt_to_sqlite.py | 22 ++++++++++++++----- dcmmeta2tsv.py | 53 +++++++++++++++++++++++++++++++++++++++++++++ taglist.txt | 22 +++++++++++++++++++ 4 files changed, 115 insertions(+), 30 deletions(-) create mode 100755 dcmmeta2tsv.py create mode 100644 taglist.txt diff --git a/00_build_db.bash b/00_build_db.bash index 66663fe0..a1f234bc 100755 --- a/00_build_db.bash +++ b/00_build_db.bash @@ -2,31 +2,29 @@ # quick pass at building minimal text database of dicom headers # 20240907 WF - init # -declare -A t -t[AcqTime]="0008,0032" # Acquisition Time like 145446.685000 -t[AcqDate]="0008,0022" # like 20241004 -t[SeriesNumber]="0020,0011" # REL Series Number -t[SubID]="0010,0010" # patient name -t[iPAT]="0051,1011" # PATModeText (private field) -t[Comments]="0020,4000" #REL Image Comments//Unaliased MB3/PE4/LB SENSE1 -t[Operator]="0008,1070" -t[Project]="0008,1030" # ID Study Description//Brain^wpc-8620 -t[SequenceName]="0008,103e" # series descripton -t[SequenceType]="0018,0024" # ACQ Sequence Name -t[PED_major]="0018,1312" # ACQ Phase Encoding Direction, ROW or COL -t[TR]="0018,0080" -t[TE]="0018,0081" -t[Matrix]="0018,1310" # ACQ Acquisition Matrix -t[PixelResol]="0028,0030" # IMG Pixel Spacing//2.2978723049164\2.2978723049164 -# https://neurostars.org/t/how-is-bandwidthperpixelphaseencode-calculated/26526 (0021,1153) -t[BWP]="0018,0095" # ACQ Pixel Bandwidth (?) -t[BWPPE]="0019,1028" # in matlab S.BandwidthPerPixelPhaseEncode; -t[FA]="0018,1314" -t[TA]="0051,100a" -t[FoV]="0051,100c" # eg FoV 1617*1727; but actually cocaluated from matrix and spacing? +export TAG_ARGS=$(cut -f2 taglist.txt | sed '1d;/#/d;s/^/-tag /;'|paste -sd' ') +dcminfo(){ + declare -g TAG_ARGS + #echo "# $1" >&2 + gdcmdump -dC "$1" | + perl -ne 'BEGIN{%a=(Phase=>"NA", ucPAT=>"NA")} + $a{substr($1,0,5)} = $2 if m/(PhaseEncodingDirectionPositive.*Data..|ucPATMode\s+=\s+)(\d+)/; + END {print join("\t", @a{qw/Phase ucPAT/}), "\t"}' + dicom_hinfo -sepstr $'\t' -last -full_entry $TAG_ARGS "$@" +} -for d in /Volumes/Hera/Raw/MRprojects/Habit/20*-*/1*_2*/dMRI_*/; do - find $d -maxdepth 1 -type f -print -quit +export -f dcminfo + +cnt=0 +#for d in /Volumes/Hera/Raw/MRprojects/Habit/20*-*/1*_2*/dMRI_*/; do +for d in /Volumes/Hera/Raw/MRprojects/Habit/2022.08.23-14.24.18/11878_20220823/HabitTask_704x752.19/ /Volumes/Hera/Raw/MRprojects/Habit/2022.08.23-14.24.18/11878_20220823/dMRI_b0_AP_140x140.35/ /Volumes/Hera/Raw/MRprojects/Habit/2022.08.23-14.24.18/11878_20220823/Resting-state_ME_476x504.14/; do + echo "# $d" >&2 + # just one dicom + find $d -maxdepth 1 -type f -print -quit + let ++cnt + [ $cnt -gt 2 ] && break done | - xargs dicom_hinfo -sepstr $'\t' -last -full_entry $(printf " -tag %s" "${t[@]}") | + # TODO: use './dcmmeta2tsv.py' instead of dcminfo? + #xargs ./dcm2nii_check.bash | + parallel -n1 dcminfo | tee db.txt diff --git a/01_txt_to_sqlite.py b/01_txt_to_sqlite.py index 5735bcc1..c4e87ff4 100755 --- a/01_txt_to_sqlite.py +++ b/01_txt_to_sqlite.py @@ -1,7 +1,18 @@ #!/usr/bin/env python3 +""" +convert db.txt into a sqlite database +""" import sqlite3 -# col names from 00_build_db.bash -colnames = ["AcqTime", "AcqDate", "SeriesNumber", "SubID", "iPAT", "Comments", "Operator", "Project", "SequenceName", "SequenceType", "PED_major", "TR", "TE", "Matrix", "PixelResol", "BWP", "BWPPE", "FA", "TA", "FoV"] +import re + +# CSA col names from 00_build_db.bash not in taglist.txt +colnames = ["Phase", "iPAT"] +with open('taglist.txt','r') as f: + tag_colnames = [line.split("\t")[0] + for line in f.readlines() + if not re.search("^name|^#", line)] +colnames += tag_colnames +colnames += ['filename'] # final file name column also not in taglist.txt (not a tag) sql = sqlite3.connect("db.sqlite") # see schema.sql @@ -14,11 +25,12 @@ with open('db.txt','r') as f: while line := f.readline(): vals = line.split("\t") - d = {k:v for (k,v) in zip(colnames, vals)} - val_array = ",".join([d[k] for k in consts]) + d = dict(zip(colnames, vals)) + val_array = [d[k] for k in consts] print(val_array) sql.execute(sql_cmd, val_array) - break + continue # TODO: FIX ME last_row_id = sql.execute("SELECT id FROM acq_param WHERE = ?;", ()) sql.execute("insert into acq() values () ", (last_row_id)) +sql.commit() diff --git a/dcmmeta2tsv.py b/dcmmeta2tsv.py new file mode 100755 index 00000000..70da12a0 --- /dev/null +++ b/dcmmeta2tsv.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python3 +""" +Give a tab separated metadata value line per dicom file. +""" +import os +import sys +import re +import pydicom +#import warnings +#warnings.filterwarnings("ignore", module="nibabel.nicom.csareader") +import nibabel.nicom.csareader as csareader + +def tagpair_to_hex(csv_str): + """ + move our text files has tags like "0051,1017" + to pydicom indexe like (0x51,0x1017) + """ + return tuple(hex(int(x,16)) for x in csv_str.split(",")) + +def read_known_tags(tagfile="taglist.txt"): + """ + read in tsv file like with header name,tag,desc. + skip comments and header + """ + with open(tagfile,'r') as f: + tags = [dict(zip(["name","tag","desc"],line.split("\t"))) + for line in f.readlines() + if not re.search("^name|^#", line)] + return tags + +if __name__ == "__main__": + tags = read_known_tags() + for i in range(len(tags)): + tags[i]['tag'] = tagpair_to_hex(tags[i]['tag']) + + for dcm_path in sys.argv[1:]: + if not os.path.isfile(dcm_path): + raise Exception("Bad command line argument: '{dcm_path}' DNE") + dcm = pydicom.dcmread(dcm_path) + meta = [dcm[tag_d['tag']].value for tag_d in tags] + + csa_str = dcm[(0x0029,0x1010)].value + csa_tr = csareader.read(csa_str) + pedp = csa_tr['tags']['PhaseEncodingDirectionPositive']['items'] + pedp = pedp[0] if pedp else "null" + ipat = csa_tr['tags']['ImaPATModeText']['items'] + ipat = ipat[0] if ipat else "null" + # order here matches 00_build_db.bash + csa_tags = [pedp, ipat] + # NB. arrays are '[x, y, z]' instead of ' x y z ' or 'x/y' + # like in dicom_hdr (00_build_db.bash) + all_tags =[str(x) for x in csa_tags + meta] + [dcm_path] + print("\t".join(all_tags)) diff --git a/taglist.txt b/taglist.txt new file mode 100644 index 00000000..08d0f377 --- /dev/null +++ b/taglist.txt @@ -0,0 +1,22 @@ +name tag desc +AcqTime 0008,0032 Acquisition Time like 145446.685000 +AcqDate 0008,0022 like 20241004 +SeriesNumber 0020,0011 REL Series Number +SubID 0010,0010 patient name +#iPAT 0051,1011 PATModeText (private field); not implemented, use CSA value ucPAT +Comments 0020,4000 REL Image Comments//Unaliased MB3/PE4/LB SENSE1 +Operator 0008,1070 +Project 0008,1030 ID Study Description//Brain^wpc-8620 +SequenceName 0008,103e series descripton +SequenceType 0018,0024 ACQ Sequence Name +PED_major 0018,1312 ACQ Phase Encoding Direction, ROW or COL +TR 0018,0080 +TE 0018,0081 +Matrix 0018,1310 ACQ Acquisition Matrix +PixelResol 0028,0030 IMG Pixel Spacing//2.2978723049164\2.2978723049164 +#https://neurostars.org/t/how-is-bandwidthperpixelphaseencode-calculated/26526 (0021,1153) +BWP 0018,0095 ACQ Pixel Bandwidth (?) also unimplemented? need CSA value? +BWPPE 0019,1028 in matlab S.BandwidthPerPixelPhaseEncode; +FA 0018,1314 +TA 0051,100a +FoV 0051,100c eg FoV 1617*1727; but actually cocaluated from matrix and spacing?