Skip to content

Commit

Permalink
✨ dcmmeta2tsv.py: slower but CSA capiable replacment for dicom_hinfo
Browse files Browse the repository at this point in the history
  • Loading branch information
WillForan committed Oct 25, 2024
1 parent 49ba977 commit f142af8
Show file tree
Hide file tree
Showing 4 changed files with 115 additions and 30 deletions.
48 changes: 23 additions & 25 deletions 00_build_db.bash
Original file line number Diff line number Diff line change
Expand Up @@ -2,31 +2,29 @@
# quick pass at building minimal text database of dicom headers
# 20240907 WF - init
#
declare -A t
t[AcqTime]="0008,0032" # Acquisition Time like 145446.685000
t[AcqDate]="0008,0022" # like 20241004
t[SeriesNumber]="0020,0011" # REL Series Number
t[SubID]="0010,0010" # patient name
t[iPAT]="0051,1011" # PATModeText (private field)
t[Comments]="0020,4000" #REL Image Comments//Unaliased MB3/PE4/LB SENSE1
t[Operator]="0008,1070"
t[Project]="0008,1030" # ID Study Description//Brain^wpc-8620
t[SequenceName]="0008,103e" # series descripton
t[SequenceType]="0018,0024" # ACQ Sequence Name
t[PED_major]="0018,1312" # ACQ Phase Encoding Direction, ROW or COL
t[TR]="0018,0080"
t[TE]="0018,0081"
t[Matrix]="0018,1310" # ACQ Acquisition Matrix
t[PixelResol]="0028,0030" # IMG Pixel Spacing//2.2978723049164\2.2978723049164
# https://neurostars.org/t/how-is-bandwidthperpixelphaseencode-calculated/26526 (0021,1153)
t[BWP]="0018,0095" # ACQ Pixel Bandwidth (?)
t[BWPPE]="0019,1028" # in matlab S.BandwidthPerPixelPhaseEncode;
t[FA]="0018,1314"
t[TA]="0051,100a"
t[FoV]="0051,100c" # eg FoV 1617*1727; but actually cocaluated from matrix and spacing?
export TAG_ARGS=$(cut -f2 taglist.txt | sed '1d;/#/d;s/^/-tag /;'|paste -sd' ')
dcminfo(){
declare -g TAG_ARGS
#echo "# $1" >&2
gdcmdump -dC "$1" |
perl -ne 'BEGIN{%a=(Phase=>"NA", ucPAT=>"NA")}
$a{substr($1,0,5)} = $2 if m/(PhaseEncodingDirectionPositive.*Data..|ucPATMode\s+=\s+)(\d+)/;
END {print join("\t", @a{qw/Phase ucPAT/}), "\t"}'
dicom_hinfo -sepstr $'\t' -last -full_entry $TAG_ARGS "$@"
}

for d in /Volumes/Hera/Raw/MRprojects/Habit/20*-*/1*_2*/dMRI_*/; do
find $d -maxdepth 1 -type f -print -quit
export -f dcminfo

cnt=0
#for d in /Volumes/Hera/Raw/MRprojects/Habit/20*-*/1*_2*/dMRI_*/; do
for d in /Volumes/Hera/Raw/MRprojects/Habit/2022.08.23-14.24.18/11878_20220823/HabitTask_704x752.19/ /Volumes/Hera/Raw/MRprojects/Habit/2022.08.23-14.24.18/11878_20220823/dMRI_b0_AP_140x140.35/ /Volumes/Hera/Raw/MRprojects/Habit/2022.08.23-14.24.18/11878_20220823/Resting-state_ME_476x504.14/; do
echo "# $d" >&2
# just one dicom
find $d -maxdepth 1 -type f -print -quit
let ++cnt
[ $cnt -gt 2 ] && break
done |
xargs dicom_hinfo -sepstr $'\t' -last -full_entry $(printf " -tag %s" "${t[@]}") |
# TODO: use './dcmmeta2tsv.py' instead of dcminfo?
#xargs ./dcm2nii_check.bash |
parallel -n1 dcminfo |
tee db.txt
22 changes: 17 additions & 5 deletions 01_txt_to_sqlite.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,18 @@
#!/usr/bin/env python3
"""
convert db.txt into a sqlite database
"""
import sqlite3
# col names from 00_build_db.bash
colnames = ["AcqTime", "AcqDate", "SeriesNumber", "SubID", "iPAT", "Comments", "Operator", "Project", "SequenceName", "SequenceType", "PED_major", "TR", "TE", "Matrix", "PixelResol", "BWP", "BWPPE", "FA", "TA", "FoV"]
import re

# CSA col names from 00_build_db.bash not in taglist.txt
colnames = ["Phase", "iPAT"]
with open('taglist.txt','r') as f:
tag_colnames = [line.split("\t")[0]
for line in f.readlines()
if not re.search("^name|^#", line)]
colnames += tag_colnames
colnames += ['filename'] # final file name column also not in taglist.txt (not a tag)

sql = sqlite3.connect("db.sqlite") # see schema.sql

Expand All @@ -14,11 +25,12 @@
with open('db.txt','r') as f:
while line := f.readline():
vals = line.split("\t")
d = {k:v for (k,v) in zip(colnames, vals)}
val_array = ",".join([d[k] for k in consts])
d = dict(zip(colnames, vals))
val_array = [d[k] for k in consts]
print(val_array)
sql.execute(sql_cmd, val_array)
break
continue
# TODO: FIX ME
last_row_id = sql.execute("SELECT id FROM acq_param WHERE = ?;", ())
sql.execute("insert into acq() values () ", (last_row_id))
sql.commit()
53 changes: 53 additions & 0 deletions dcmmeta2tsv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#!/usr/bin/env python3
"""
Give a tab separated metadata value line per dicom file.
"""
import os
import sys
import re
import pydicom
#import warnings
#warnings.filterwarnings("ignore", module="nibabel.nicom.csareader")
import nibabel.nicom.csareader as csareader

def tagpair_to_hex(csv_str):
"""
move our text files has tags like "0051,1017"
to pydicom indexe like (0x51,0x1017)
"""
return tuple(hex(int(x,16)) for x in csv_str.split(","))

def read_known_tags(tagfile="taglist.txt"):
"""
read in tsv file like with header name,tag,desc.
skip comments and header
"""
with open(tagfile,'r') as f:
tags = [dict(zip(["name","tag","desc"],line.split("\t")))
for line in f.readlines()
if not re.search("^name|^#", line)]
return tags

if __name__ == "__main__":
tags = read_known_tags()
for i in range(len(tags)):
tags[i]['tag'] = tagpair_to_hex(tags[i]['tag'])

for dcm_path in sys.argv[1:]:
if not os.path.isfile(dcm_path):
raise Exception("Bad command line argument: '{dcm_path}' DNE")
dcm = pydicom.dcmread(dcm_path)
meta = [dcm[tag_d['tag']].value for tag_d in tags]

csa_str = dcm[(0x0029,0x1010)].value
csa_tr = csareader.read(csa_str)
pedp = csa_tr['tags']['PhaseEncodingDirectionPositive']['items']
pedp = pedp[0] if pedp else "null"
ipat = csa_tr['tags']['ImaPATModeText']['items']
ipat = ipat[0] if ipat else "null"
# order here matches 00_build_db.bash
csa_tags = [pedp, ipat]
# NB. arrays are '[x, y, z]' instead of ' x y z ' or 'x/y'
# like in dicom_hdr (00_build_db.bash)
all_tags =[str(x) for x in csa_tags + meta] + [dcm_path]
print("\t".join(all_tags))
22 changes: 22 additions & 0 deletions taglist.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
name tag desc
AcqTime 0008,0032 Acquisition Time like 145446.685000
AcqDate 0008,0022 like 20241004
SeriesNumber 0020,0011 REL Series Number
SubID 0010,0010 patient name
#iPAT 0051,1011 PATModeText (private field); not implemented, use CSA value ucPAT
Comments 0020,4000 REL Image Comments//Unaliased MB3/PE4/LB SENSE1
Operator 0008,1070
Project 0008,1030 ID Study Description//Brain^wpc-8620
SequenceName 0008,103e series descripton
SequenceType 0018,0024 ACQ Sequence Name
PED_major 0018,1312 ACQ Phase Encoding Direction, ROW or COL
TR 0018,0080
TE 0018,0081
Matrix 0018,1310 ACQ Acquisition Matrix
PixelResol 0028,0030 IMG Pixel Spacing//2.2978723049164\2.2978723049164
#https://neurostars.org/t/how-is-bandwidthperpixelphaseencode-calculated/26526 (0021,1153)
BWP 0018,0095 ACQ Pixel Bandwidth (?) also unimplemented? need CSA value?
BWPPE 0019,1028 in matlab S.BandwidthPerPixelPhaseEncode;
FA 0018,1314
TA 0051,100a
FoV 0051,100c eg FoV 1617*1727; but actually cocaluated from matrix and spacing?

0 comments on commit f142af8

Please sign in to comment.