diff --git a/scripts/Utils/CheckTapeRecall.py b/scripts/Utils/CheckTapeRecall.py index e7029e020f..c922ec1f24 100644 --- a/scripts/Utils/CheckTapeRecall.py +++ b/scripts/Utils/CheckTapeRecall.py @@ -64,7 +64,7 @@ def sendAndCheck(document): def main(): """ - get all rules for this account, find pending ones, + get all rules, find pending ones, order, pretty format, print them and write an HTML file """ @@ -80,12 +80,13 @@ def main(): logger.setLevel(logging.INFO) rucio, crab = ensureEnvironment(logger) - account = 'crab_tape_recall' + activity = "Analysis TapeRecall" - # get rules for this account - ruleGen = rucio.list_replication_rules({'account': account}) + # get rules for this activity + ruleGen = rucio.list_replication_rules({'activity': activity}) rules = list(ruleGen) - logger.info(f"{len(rules)} rules exist for account: {account}") + msg = f"{len(rules)} rules exist for activity: {activity}" + logger.info(msg) # make a DataFrame df = pd.DataFrame(rules) @@ -103,7 +104,11 @@ def main(): # combine all pending rules in a single dataframe pending = pd.concat([stuck, replicating, suspended]).reset_index(drop=True) - pendingCompact = createRulesDataframe(pending, rucio, crab, logger) + + if not pending.empty: + pendingCompact = createRulesDataframe(pending, rucio, crab, logger) + else: + pendingCompact = pd.DataFrame() # prepare an HTML table if pendingCompact.empty: @@ -377,7 +382,8 @@ def ensureEnvironment(logger=None): "export RUCIO_ACCOUNT='crab_server'") sys.exit() # make sure Rucio client is initialized, this also ensures X509 proxy - rucio = Client( + # our robot certificate can access multiple Rucio account, use the non-privileged one here + rucio = Client(account="crab_server", creds={"client_cert": "/data/certs/robotcert.pem", "client_key": "/data/certs/robotkey.pem"}, auth_type='x509', ) diff --git a/src/python/TaskWorker/Actions/Recurring/TapeRecallManager.py b/src/python/TaskWorker/Actions/Recurring/TapeRecallManager.py index 50d6b2b9b4..917a8c56e2 100644 --- a/src/python/TaskWorker/Actions/Recurring/TapeRecallManager.py +++ b/src/python/TaskWorker/Actions/Recurring/TapeRecallManager.py @@ -128,8 +128,8 @@ def handleRecall(self): # Make sure data will stay on disk for NOW + 4 days. A new rule will kick in when task is submitted self.logger.info("Extending rule lifetime to last 4 days") self.privilegedRucioClient.update_replication_rule(reqId, {'lifetime': (4 * 24 * 60 * 60)}) # lifetime is in seconds - else: - # still in progress, report status and keep waiting + elif rule['state'] in ['REPLICATING', 'STUCK', 'SUSPENDED']: + # in progress, report status and keep waiting ok = rule['locks_ok_cnt'] rep = rule['locks_replicating_cnt'] stuck = rule['locks_stuck_cnt'] diff --git a/src/python/TaskWorker/Actions/RucioActions.py b/src/python/TaskWorker/Actions/RucioActions.py index 0363b5a4a2..98e088a68d 100644 --- a/src/python/TaskWorker/Actions/RucioActions.py +++ b/src/python/TaskWorker/Actions/RucioActions.py @@ -90,10 +90,17 @@ def createOrReuseRucioRule(self, did=None, grouping=None, activity=None, self.logger.debug("A duplicate rule for this account, did, rse_expression, copies already exists. Use that") # find the existing rule id ruleIdGen = self.rucioClient.list_did_rules(scope=did['scope'], name=did['name']) + self.logger.debug("List of existing rules for this DID") + ruleId = None for rule in ruleIdGen: - if rule['account'] == self.rucioAccount: + self.logger.debug("id: %s account: %s activity %s") + if rule['account'] == account: ruleId = rule['id'] break + if not ruleId: + msg = "Failed to creaed Rucio rule to recall data. Rucio DuplicateException raised " + msg += "but a rule for this account was not found in the list" + raise TaskWorkerException(msg) from DuplicateRule # extend rule lifetime self.rucioClient.update_replication_rule(ruleId, {'lifetime': lifetime}) except (InsufficientTargetRSEs, InsufficientAccountLimit, FullStorage) as e: