diff --git a/deploy/docker/cp-tools/research/wsi-parser/process_files.py b/deploy/docker/cp-tools/research/wsi-parser/process_files.py index a3e712bada..db1a2ea176 100644 --- a/deploy/docker/cp-tools/research/wsi-parser/process_files.py +++ b/deploy/docker/cp-tools/research/wsi-parser/process_files.py @@ -66,7 +66,27 @@ FORCE_CATALOG_UPDATE = os.getenv('WSI_PARSING_FORCE_CATALOG_UPDATE', 'false') == 'true' CLOUD_STORAGE_ID = os.getenv('WSI_PARSING_STORAGE_ID', None) CATALOG_UPDATE_URL = os.getenv('WSI_PARSING_CATALOG_UPDATE_URL', None) -OME_TIFF_TARGET_FORMAT_MASK = os.getenv('WSI_PARSING_OME_TIFF_TARGET_FORMAT_MASK', '*.qptiff') +OME_TIFF_TARGET_FORMAT_MASK = os.getenv('WSI_PARSING_OME_TIFF_TARGET_FORMAT_MASK', '*.qptiff,*.tif') + + +def parse_extension_mapping_property(format_to_extension_mapping_str): + result = {} + if not format_to_extension_mapping_str: + return result + for format in format_to_extension_mapping_str.split(";"): + try: + format, extensions = format.split("=") + for extension in extensions.split(","): + result[extension] = format + except ValueError as e: + # skip if problem + pass + return result + + +EXTENSION_TO_FORMAT_MAPPING = parse_extension_mapping_property( + os.getenv('WSI_EXTENSION_TO_FORMAT_MAPPING', 'qptiff=.qptiff,.tif') +) STAIN_METHOD_MAPPINGS = { 'GENERAL': 'General', @@ -99,6 +119,16 @@ def prepare_exception_tags_mapping(): EXCEPTION_TAGS_MAPPING = prepare_exception_tags_mapping() +def get_file_extension(file): + file_and_extension = os.path.splitext(file) + return file_and_extension[1] if file and len(file_and_extension) == 2 else None + + +def is_file_has_format(file, format): + file_extension = get_file_extension(file) + return EXTENSION_TO_FORMAT_MAPPING.get(file_extension, None) == format + + class ImageDetails(object): def __init__(self, series_id, name, width, height, refractive_index, objective_id): @@ -730,6 +760,11 @@ def map_to_metadata_dict(self, metadata): def add_qptiff_tags_from_filename(self, existing_tags): name_tags = WsiParsingUtils.get_basename_without_extension(self.file_path).split('_') + if len(name_tags) != 4: + self.log_processing_info( + 'File name {} is not in format: ___. ' + 'Skipping retrieving tas from file name.'.format(self.file_path)) + return if not existing_tags.get(STUDY_NAME_CAT_ATTR_NAME): existing_tags[STUDY_NAME_CAT_ATTR_NAME] = {name_tags[0]} if not existing_tags.get(ANIMAL_ID_CAT_ATTR_NAME): @@ -1310,10 +1345,11 @@ def process_file(self): target_series = target_image_details.id self.create_tmp_stat_file(target_image_details) target_tags_file = self.file_path - if self.file_path.endswith('.qptiff'): + is_qptiff = is_file_has_format(self.file_path, "qptiff") + if is_qptiff: + self.log_processing_info('File {} is recognized as QPTIFF'.format(self.file_path)) target_tags_file = self.create_empty_vsi() - tags_processing_result = self.try_process_tags(target_tags_file, target_image_details, - self.file_path.endswith('.qptiff')) + tags_processing_result = self.try_process_tags(target_tags_file, target_image_details, is_qptiff) if TAGS_PROCESSING_ONLY: return tags_processing_result elif self._is_same_series_selected(target_series): @@ -1362,8 +1398,8 @@ def process_file(self): def try_process_tags(self, target_file_path, target_image_details, is_qptiff=False): tags_processing_result = 0 try: - if WsiFileTagProcessor(target_file_path, self.xml_info_tree, - is_qptiff).process_tags(target_image_details) != 0: + wsi_file_tag_processor = WsiFileTagProcessor(target_file_path, self.xml_info_tree, is_qptiff) + if wsi_file_tag_processor.process_tags(target_image_details) != 0: self.log_processing_info('Some errors occurred during file tagging') tags_processing_result = 1 except Exception as e: