Skip to content

Commit

Permalink
remove phi infrastructure implemented
Browse files Browse the repository at this point in the history
  • Loading branch information
mdevans committed Oct 7, 2024
1 parent d9521a4 commit b0468c7
Show file tree
Hide file tree
Showing 12 changed files with 11,288 additions and 53 deletions.
2 changes: 1 addition & 1 deletion .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
"autoReload": {
"enable": true
},
"justMyCode": false
"justMyCode": false,
},
{
"name": "Anonymizer GUI Optimized 1",
Expand Down
2 changes: 1 addition & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
"uids",
"VNA's"
],
"python.analysis.typeCheckingMode": "basic",
"python.analysis.typeCheckingMode": "off",
"editor.defaultFoldingRangeProvider": "ms-python.black-formatter",
"editor.defaultFormatter": "ms-python.black-formatter",
"python.analysis.inlayHints.functionReturnTypes": false,
Expand Down
2 changes: 1 addition & 1 deletion Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ pywin32-ctypes = "*"
pefile = "*"
openpyxl = "*"
psutil = "*"
pylibjpeg = {extras = ["all"], version = "*"}
opencv-python-headless = "*"
easyocr = "*"
pylibjpeg = {extras = ["all"], version = "*"}

[dev-packages]
radon = "*"
Expand Down
48 changes: 38 additions & 10 deletions src/anonymizer.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os, sys, json, shutil, time, platform

from pathlib import Path
from pprint import pformat
from copy import copy
Expand All @@ -12,10 +13,13 @@
# The following unused imports are for pyinstaller
# TODO: pyinstaller cmd line special import doesn't work
# from pydicom.encoders import pylibjpeg
# pylibjpeg = {extras = ["all"], version = "*"}

import tkinter as tk

from tkinter import ttk, filedialog, messagebox
import customtkinter as ctk

from customtkinter import ThemeManager

from utils.logging import init_logging
Expand Down Expand Up @@ -162,9 +166,10 @@ def metrics_loop(self):
return

# Update dashboard if anonymizer model has changed:
if self.dashboard and self.controller.anonymizer.model_changed():
self.dashboard.update_anonymizer_queue(self.controller.anonymizer._anon_Q.qsize())
self.dashboard.update_totals(self.controller.anonymizer.model.get_totals())
if self.dashboard:
self.dashboard.update_anonymizer_queues(*self.controller.anonymizer.queued())
if self.controller.anonymizer.model_changed():
self.dashboard.update_totals(self.controller.anonymizer.model.get_totals())

self.after(self.metrics_loop_interval, self.metrics_loop)

Expand Down Expand Up @@ -474,10 +479,8 @@ def shutdown_controller(self):
self.export_view.destroy()
self.export_view = None

self.save_config()

def close_project(self, event=None):
logging.info("Close Project")
logger.info("Close Project")
if self.query_view and self.query_view.busy():
logger.info(f"QueryView busy, cannot close project")
messagebox.showerror(
Expand All @@ -495,6 +498,19 @@ def close_project(self, event=None):
)
return

if not self.controller.anonymizer.idle():
logger.info(f"Anonymizer busy, cannot close project")
messagebox.showerror(
title=_("Anonymizer Workers Busy"),
message=_(
"Anonymizer queues are not empty, please wait for workers to process files before closing project."
),
parent=self,
)
return

# TODO: Do not allow project close if Import Files/Import Directory is busy

self.shutdown_controller()

self.welcome_view = WelcomeView(self, self.change_language)
Expand All @@ -508,7 +524,7 @@ def close_project(self, event=None):
self.save_config()

def clone_project(self, event=None):
logging.info("Clone Project")
logger.info("Clone Project")

if not self.controller:
logger.info(f"Clone Project Cancelled, no project open")
Expand Down Expand Up @@ -567,7 +583,9 @@ def clone_project(self, event=None):

try:
# Create New Controller with cloned project model
self.controller = ProjectController(cloned_model)
self.controller = ProjectController(
cloned_model
) # this will recreate AnonymizerController and restart associated worker threads
self.controller.save_model()
logger.info(f"Project cloned successfully: {self.controller}")
except Exception as e:
Expand Down Expand Up @@ -800,7 +818,7 @@ def export(self):
self.export_view.focus()

def settings(self):
logging.info("Settings")
logger.info("Settings")

if not self.controller:
logger.error("Internal Error: no ProjectController")
Expand Down Expand Up @@ -831,7 +849,18 @@ def settings(self):
return

logger.info(f"User Edited ProjectModel")

# Some settings change require the project to be closed and re-opened:
# TODO: elegantly open and close project, see clone project above
if self.controller.model.remove_pixel_phi != edited_model.remove_pixel_phi:
messagebox.showwarning(
title=_("Project restart"),
message=_("The settings change will take effect when the project is next opened."),
parent=self,
)

self.controller.update_model(edited_model)

logger.info(f"{self.controller}")

def help_filename_to_title(self, filename):
Expand Down Expand Up @@ -944,7 +973,6 @@ def main():
logs_dir = init_logging(install_dir, run_as_exe)
os.chdir(install_dir)

logger = logging.getLogger() # get root logger
logger.info(f"cmd line args={args}")

# TODO: command line interface for server side deployments
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
# For Burnt-IN PHI Removal:
# For Burnt-IN Pixel PHI Removal:
import os
import time
import logging
import threading

import numpy as np
from numpy import ndarray
Expand Down Expand Up @@ -208,12 +207,12 @@ def process_grayscale_image(self, ds: Dataset, nlp, reader: Reader):

# To improve OCR processing speed:
# TODO: Work out more precisely using readable text size, pixel spacing (not always present), mask blur kernel size & inpainting radius
# Downscale the image if its width exceeds the widht_threshold
widht_threshold = 1200
# Downscale the image if its width exceeds the width_threshold
width_threshold = 1200
scale_factor = 1
downscale = cols > widht_threshold
downscale = cols > width_threshold
if downscale:
scale_factor = widht_threshold / cols
scale_factor = width_threshold / cols

border_size = 40 # pixels

Expand Down Expand Up @@ -242,11 +241,11 @@ def process_grayscale_image(self, ds: Dataset, nlp, reader: Reader):
logger.info(f"After normalization: pixels.value.range:[{pixels.min(), pixels.max()}]")

if downscale:
new_size = (widht_threshold, int(rows * scale_factor))
new_size = (width_threshold, int(rows * scale_factor))
pixels = resize(pixels, new_size, interpolation=INTER_LINEAR)
logger.info(f"Downscaled image, new pixels.shape: {pixels.shape}")
else:
logger.info(f"Image width < {widht_threshold}, no downscaling required.")
logger.info(f"Image width < {width_threshold}, no downscaling required.")

# Add a border to the resized image
pixels = copyMakeBorder(
Expand Down
87 changes: 68 additions & 19 deletions src/controller/anonymizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@
from model.project import DICOMNode, ProjectModel
from model.anonymizer import AnonymizerModel

# from .remove_pixel_phi import process_grayscale_image

logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -68,7 +70,7 @@ class AnonymizerController:
PRIVATE_BLOCK_NAME = "RSNA"
DEFAULT_ANON_DATE = "20000101" # if source date is invalid or before 19000101

NUMBER_OF_WORKER_THREADS = 2
NUMBER_OF_DATASET_WORKER_THREADS = 2
WORKER_THREAD_SLEEP_SECS = 0.075 # for UX responsiveness
MODEL_AUTOSAVE_INTERVAL_SECS = 30

Expand Down Expand Up @@ -133,21 +135,31 @@ def __init__(self, project_model: ProjectModel):
)
logger.info(f"New Default Anonymizer Model initialised from script: {project_model.anonymizer_script_path}")

self._anon_Q: Queue = Queue()
self._anon_ds_Q: Queue = Queue() # queue for dataset workers
self._anon_px_Q: Queue = Queue() # queue for pixel phi workers
self._worker_threads = []

# Spawn Anonymizer worker threads:
for i in range(self.NUMBER_OF_WORKER_THREADS):
worker = self._worker = threading.Thread(
target=self._anonymize_worker,
name=f"AnonWorker_{i+1}",
args=(self._anon_Q,),
# daemon=True,
# Spawn Anonymizer DATASET worker threads:
for i in range(self.NUMBER_OF_DATASET_WORKER_THREADS):
ds_worker = self._worker = threading.Thread(
target=self._anonymize_dataset_worker,
name=f"AnonDatasetWorker_{i+1}",
args=(self._anon_ds_Q,),
)
ds_worker.start()
self._worker_threads.append(ds_worker)

# Spawn Remove Pixel PHI Thread:
if self.project_model.remove_pixel_phi:
px_worker = self._worker = threading.Thread(
target=self._anonymizer_pixel_phi_worker,
name=f"AnonPixelWorker_1",
args=(self._anon_px_Q,),
)
worker.start()
self._worker_threads.append(worker)
px_worker.start()
self._worker_threads.append(px_worker)

# Setup Model Autosave Thread:
# Spawn Model Autosave Thread:
self._model_change_flag = False
self._autosave_event = threading.Event()
self._autosave_worker_thread = threading.Thread(
Expand All @@ -161,19 +173,29 @@ def __init__(self, project_model: ProjectModel):
def model_changed(self) -> bool:
return self._model_change_flag

def idle(self) -> bool:
return self._anon_ds_Q.empty() and self._anon_px_Q.empty()

def queued(self) -> tuple[int, int]:
return (self._anon_ds_Q.qsize(), self._anon_px_Q.qsize())

def _stop_worker_threads(self):
logger.info("Stopping Anonymizer Worker Threads")

if not self._active:
logger.error("_stop_worker_threads but controller not active")
logger.error("_stop_worker_threads but AnonymizerController not active")
return

# Send sentinel value to worker threads to terminate:
for _ in range(self.NUMBER_OF_WORKER_THREADS):
self._anon_Q.put((None, None))
for _ in range(self.NUMBER_OF_DATASET_WORKER_THREADS):
self._anon_ds_Q.put((None, None))

# Wait for all sentinal values to be processed
self._anon_ds_Q.join()

# Wait for all tasks to be processed
self._anon_Q.join()
if self.project_model.remove_pixel_phi:
self._anon_px_Q.put(None)
self._anon_px_Q.join()

# Wait for all worker threads to finish
for worker in self._worker_threads:
Expand Down Expand Up @@ -496,6 +518,10 @@ def anonymize(self, source: DICOMNode | str, ds: Dataset) -> str | None:
# TODO: Optimize / Transcoding / DICOM Compliance File Verification - as per extra project options
# see options for write_like_original=True
ds.save_as(filename, write_like_original=False)

# If enabled for project, queue this file for pixel PHI scanning and removal:
if self.project_model.remove_pixel_phi:
self._anon_px_Q.put(filename)
return None

except Exception as e:
Expand Down Expand Up @@ -569,9 +595,9 @@ def anonymize_dataset_ex(self, source: DICOMNode | str, ds: Dataset | None) -> N
Returns:
None
"""
self._anon_Q.put((source, ds))
self._anon_ds_Q.put((source, ds))

def _anonymize_worker(self, ds_Q: Queue) -> None:
def _anonymize_dataset_worker(self, ds_Q: Queue) -> None:
"""
An internal worker method that performs the anonymization process.
Expand All @@ -593,3 +619,26 @@ def _anonymize_worker(self, ds_Q: Queue) -> None:
ds_Q.task_done()

logger.info(f"thread={threading.current_thread().name} end")

def _anonymizer_pixel_phi_worker(self, px_Q: Queue) -> None:

logger.info(f"thread={threading.current_thread().name} start")

while True:
time.sleep(self.WORKER_THREAD_SLEEP_SECS)

path = px_Q.get() # Blocks by default
if path is None: # sentinel value set by _stop_worker_threads
px_Q.task_done()
break

logger.info(f"Remove Pixel PHI from: {path}")
result = 0
for i in range(1000000):
result += i * (i % 3)

logger.info("*PHI GONE*")

px_Q.task_done()

logger.info(f"thread={threading.current_thread().name} end")
3 changes: 2 additions & 1 deletion src/model/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ class ProjectModel:
"""

# Project Model Version Control
MODEL_VERSION = 2
MODEL_VERSION = 3

# As per instructions here: https://www.medicalconnections.co.uk/kb/ImplementationUID-And-ImplementationName
RSNA_ROOT_ORG_UID = "1.2.826.0.1.3680043.10.474" # sub UID from medicalconnections.co.uk as used by JavaAnonymizer
Expand Down Expand Up @@ -163,6 +163,7 @@ def default_logging_levels() -> LoggingLevels:
site_id: str = field(default_factory=default_site_id)
project_name: str = field(default_factory=default_project_name)
uid_root: str = field(default_factory=default_uid_root)
remove_pixel_phi: bool = False
storage_dir: Path = field(default_factory=default_storage_dir)
modalities: List[str] = field(default_factory=default_modalities)
storage_classes: List[str] = field(default_factory=default_storage_classes) # re-initialised in post_init
Expand Down
Loading

0 comments on commit b0468c7

Please sign in to comment.