From 21fd7fa1f8d9b814bad638ff482619cc505f00f2 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Mon, 19 Aug 2024 20:00:37 -0400 Subject: [PATCH] ENH: organize - show files with conflicts requiring adding _obj- Logging check is currently not really functioning since IIRC we do enable logging into a file at higher level than INFO. So we might need to tune decision making here --- dandi/organize.py | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/dandi/organize.py b/dandi/organize.py index c7217437c..0c951bfd1 100644 --- a/dandi/organize.py +++ b/dandi/organize.py @@ -9,6 +9,7 @@ from collections.abc import Sequence from copy import deepcopy from enum import Enum +import logging import os import os.path as op from pathlib import Path, PurePosixPath @@ -307,9 +308,28 @@ def _assign_obj_id(metadata, non_unique): # Avoid heavy import by importing within function: from .pynwb_utils import get_object_id - msg = "%d out of %d paths are not unique" % (len(non_unique), len(metadata)) + msg = "%d out of %d paths are not unique." % (len(non_unique), len(metadata)) + non_unique_paths = sorted(non_unique) - lgr.info(msg + ". We will try adding _obj- based on crc32 of object_id") + # provide more information to the user + def get_msg(path, indent=" "): + in_paths = non_unique[path] + return ( + f"{len(in_paths)} paths 'compete' for the path {path!r}:" + + f"\n{indent}".join([""] + in_paths) + ) + + msg += "\n " + get_msg(non_unique_paths[0]) + if len(non_unique) > 1: + if not lgr.isEnabledFor(logging.DEBUG): + msg += ( + " Rerun with logging at DEBUG level '-l debug' " + "to see {len(non_unique) - 1} more cases." + ) + else: + for ex_path in non_unique_paths[1:]: + msg += "\n " + get_msg(ex_path) + lgr.info(msg + " We will try adding _obj- based on crc32 of object_id") seen_obj_ids = {} # obj_id: object_id seen_object_ids = {} # object_id: path recent_nwb_msg = "NWB>=2.1.0 standard (supported by pynwb>=1.1.0)."