Skip to content

Commit

Permalink
Added De-ID redact flag.
Browse files Browse the repository at this point in the history
  • Loading branch information
vladd-bit committed Feb 29, 2024
1 parent 07d84eb commit 4ef752a
Showing 1 changed file with 5 additions and 1 deletion.
6 changes: 5 additions & 1 deletion medcat_service/nlp_processor/medcat_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ def __init__(self):
self.bulk_nproc = int(os.getenv("APP_BULK_NPROC", 8))
self.torch_threads = int(os.getenv("APP_TORCH_THREADS", -1))
self.DEID_MODE = os.getenv("DEID_MODE", "False")
self.DEID_REDACT = os.getenv("DEID_REDACT", True)
self.model_card_info = {}

# this is available to constrain torch threads when there
Expand Down Expand Up @@ -175,6 +176,7 @@ def process_content_bulk(self, content):
nproc += 1

self.log.debug("NPROC:" + str(nproc))
self.log.debug("Batch size:" + str(batch_size))

# use generators both to provide input documents and to provide resulting annotations
# to avoid too many mem-copies
Expand All @@ -185,7 +187,9 @@ def process_content_bulk(self, content):

try:
if eval(self.DEID_MODE):
ann_res = self.cat.deid_text()
for text_record in content:
ann_res.append(text_record[0], self.cat.deid_text(text_record[1], redact=eval(self.DEID_REDACT)))
#ann_res = self.cat.deid_multi_texts(content, MedCatProcessor._generate_input_doc(content, invalid_doc_ids), batch_size=batch_size, nproc=nproc)
else:
ann_res = self.cat.multiprocessing_batch_docs_size(
MedCatProcessor._generate_input_doc(content, invalid_doc_ids), batch_size=batch_size, nproc=nproc)
Expand Down

0 comments on commit 4ef752a

Please sign in to comment.