Skip to content

Commit

Permalink
Added code to replace text after removing PHI
Browse files Browse the repository at this point in the history
  • Loading branch information
Sandip117 committed Feb 7, 2024
1 parent 0aa6872 commit a93c3b5
Showing 1 changed file with 54 additions and 7 deletions.
61 changes: 54 additions & 7 deletions image_textRemove.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,9 @@
import os
import sys
from difflib import SequenceMatcher
import hashlib

__version__ = '1.1.6'
__version__ = '1.1.7'

DISPLAY_TITLE = r"""
_ _ _ _ ______
Expand All @@ -38,6 +39,8 @@
help='output file type(only the extension)')
parser.add_argument('-j', '--filterTextFromJSON', default='anonymizedTags.json', type=str,
help='A dictionary of dicom tags and their values')
parser.add_argument('-r', '--replaceTextFromJSON', default='replaceTags.json', type=str,
help='A dictionary of dicom tags and their replacement values')
parser.add_argument('-t', '--threshold', default=0.8, type=float,
help='threshold of similarity ration between two words')
parser.add_argument( '--pftelDB',
Expand Down Expand Up @@ -93,13 +96,24 @@ def main(options: Namespace, inputdir: Path, outputdir: Path):
data = json.load(f)
except Exception as ex:
print("Error: ",ex)
# To be removed later
replace_json_data_path = ''
l_replace_json_path = list(inputdir.glob('**/*.json'))
for json_path in l_replace_json_path:
if json_path.name == options.replaceTextFromJSON:
replace_json_data_path = json_path
try:
f = open(replace_json_data_path, 'r')
replace_data = json.load(f)
except Exception as ex:
print("Error: ", ex)
box_list = []
mapper = PathMapper.file_mapper(inputdir, outputdir, glob=f"**/*.{options.fileFilter}", fail_if_empty=False)
for input_file, output_file in mapper:
# The code block below is a small and easy example of how to use a ``PathMapper``.
# It is recommended that you put your functionality in a helper function, so that
# it is more legible and can be unit tested.
box_list, final_image = inpaint_text(str(input_file), data, box_list, options.threshold)
box_list, final_image = inpaint_text(str(input_file), data, replace_data, box_list, options.threshold)
img_rgb = cv2.cvtColor(final_image, cv2.COLOR_BGR2RGB)
output_file = str(output_file).replace(options.fileFilter, options.outputType)
print(f"Saving output file as ----->{output_file}<-----\n\n")
Expand All @@ -112,18 +126,29 @@ def midpoint(x1, y1, x2, y2):
return x_mid, y_mid


def inpaint_text(img_path, data, box_list, similarity_threshold):
def inpaint_text(img_path, data, replace_data, box_list, similarity_threshold):
word_list = []
d_replace_text = {}
for item in data.keys():
if item == 'PatientName':
word_list.extend(data.get(item).split('^'))
anon_name = replace_data.get(item).split('^')
real_name = data.get(item).split('^')
for i in range(len(real_name)):
word_list.append(real_name[i])
d_replace_text[real_name[i]] = anon_name[i]
elif item == 'PatientBirthDate':
yyyy = data.get(item)[0:4]
mm = data.get(item)[4:6]
dd = data.get(item)[6:8]
yyyy1 = replace_data.get(item)[0:4]
mm1 = replace_data.get(item)[4:6]
dd1 = replace_data.get(item)[6:8]
word_list.append(f'{mm}1{dd}1{yyyy}')
d_replace_text[f'{mm}1{dd}1{yyyy}'] = f'{mm1}/{dd1}/{yyyy1}'
else:
word_list.append(data.get(item))
d_replace_text[data.get(item)] = replace_data.get(item)
print(d_replace_text)
# read image
print(f"Reading input file from ---->{img_path}<----")
img = cv2.imread(img_path)
Expand All @@ -134,9 +159,10 @@ def inpaint_text(img_path, data, box_list, similarity_threshold):


mask = np.zeros(img.shape[:2], dtype="uint8")
l_coordinates = []
l_text = []
for box in box_list:
if (box[0].upper() in word_list) or close_to_similar(box[0].upper(), word_list, similarity_threshold):
# Remove PatientName only
print(f"Removing {box[0].upper()} from image")
x0, y0 = box[1][0]
x1, y1 = box[1][1]
Expand All @@ -151,9 +177,30 @@ def inpaint_text(img_path, data, box_list, similarity_threshold):
cv2.line(mask, (x_mid0, y_mid0), (x_mid1, y_mi1), 255,
thickness)
img = cv2.inpaint(img, mask, 7, cv2.INPAINT_NS)
org = (round(x3),round(y3))
l_coordinates.append(org)
if box[0].upper() in d_replace_text:
l_text.append(d_replace_text[box[0].upper()])
else:
word = close_to_similar(box[0].upper(), word_list, similarity_threshold)
l_text.append(d_replace_text[word])

color = (235, 235, 235)
print(l_text)
for org,text in zip(l_coordinates,l_text):
#text = generate_hash_text(text)
img = cv2.putText(img, text, org, cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, 2)

return box_list, img

def generate_hash_text(input_text: str) -> str:
"""
Given an input text, generate a md5 hash and
return a hashed text of the original text's length
"""
result = hashlib.md5(input_text.encode())

return result.hexdigest()[0:len(input_text)].upper()

def read_input_dicom(input_file_path):
"""
Expand Down Expand Up @@ -195,9 +242,9 @@ def similar(a: str, b: str):
def close_to_similar(target: str, wordlist: str, similarity_threshold: float):
for word in wordlist:
if similar(target, word) >= similarity_threshold:
return True
return word

return False
return None


if __name__ == '__main__':
Expand Down

0 comments on commit a93c3b5

Please sign in to comment.