Skip to content

Commit

Permalink
Fix small bugs (#8632)
Browse files Browse the repository at this point in the history
* check rule status. Fix #8626

* protect CheckTapeRecall against empty dataframe

* adapt CheckTapeRecall to current use of Rucio. Fix #8630

* add diagnostic and tentative fix for #8624

* pylint
  • Loading branch information
belforte authored Aug 19, 2024
1 parent 4842023 commit 3087e06
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 10 deletions.
20 changes: 13 additions & 7 deletions scripts/Utils/CheckTapeRecall.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def sendAndCheck(document):

def main():
"""
get all rules for this account, find pending ones,
get all rules, find pending ones,
order, pretty format, print them and write an HTML file
"""

Expand All @@ -80,12 +80,13 @@ def main():
logger.setLevel(logging.INFO)

rucio, crab = ensureEnvironment(logger)
account = 'crab_tape_recall'
activity = "Analysis TapeRecall"

# get rules for this account
ruleGen = rucio.list_replication_rules({'account': account})
# get rules for this activity
ruleGen = rucio.list_replication_rules({'activity': activity})
rules = list(ruleGen)
logger.info(f"{len(rules)} rules exist for account: {account}")
msg = f"{len(rules)} rules exist for activity: {activity}"
logger.info(msg)

# make a DataFrame
df = pd.DataFrame(rules)
Expand All @@ -103,7 +104,11 @@ def main():

# combine all pending rules in a single dataframe
pending = pd.concat([stuck, replicating, suspended]).reset_index(drop=True)
pendingCompact = createRulesDataframe(pending, rucio, crab, logger)

if not pending.empty:
pendingCompact = createRulesDataframe(pending, rucio, crab, logger)
else:
pendingCompact = pd.DataFrame()

# prepare an HTML table
if pendingCompact.empty:
Expand Down Expand Up @@ -377,7 +382,8 @@ def ensureEnvironment(logger=None):
"export RUCIO_ACCOUNT='crab_server'")
sys.exit()
# make sure Rucio client is initialized, this also ensures X509 proxy
rucio = Client(
# our robot certificate can access multiple Rucio account, use the non-privileged one here
rucio = Client(account="crab_server",
creds={"client_cert": "/data/certs/robotcert.pem", "client_key": "/data/certs/robotkey.pem"},
auth_type='x509',
)
Expand Down
4 changes: 2 additions & 2 deletions src/python/TaskWorker/Actions/Recurring/TapeRecallManager.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,8 @@ def handleRecall(self):
# Make sure data will stay on disk for NOW + 4 days. A new rule will kick in when task is submitted
self.logger.info("Extending rule lifetime to last 4 days")
self.privilegedRucioClient.update_replication_rule(reqId, {'lifetime': (4 * 24 * 60 * 60)}) # lifetime is in seconds
else:
# still in progress, report status and keep waiting
elif rule['state'] in ['REPLICATING', 'STUCK', 'SUSPENDED']:
# in progress, report status and keep waiting
ok = rule['locks_ok_cnt']
rep = rule['locks_replicating_cnt']
stuck = rule['locks_stuck_cnt']
Expand Down
9 changes: 8 additions & 1 deletion src/python/TaskWorker/Actions/RucioActions.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,10 +90,17 @@ def createOrReuseRucioRule(self, did=None, grouping=None, activity=None,
self.logger.debug("A duplicate rule for this account, did, rse_expression, copies already exists. Use that")
# find the existing rule id
ruleIdGen = self.rucioClient.list_did_rules(scope=did['scope'], name=did['name'])
self.logger.debug("List of existing rules for this DID")
ruleId = None
for rule in ruleIdGen:
if rule['account'] == self.rucioAccount:
self.logger.debug("id: %s account: %s activity %s")
if rule['account'] == account:
ruleId = rule['id']
break
if not ruleId:
msg = "Failed to creaed Rucio rule to recall data. Rucio DuplicateException raised "
msg += "but a rule for this account was not found in the list"
raise TaskWorkerException(msg) from DuplicateRule
# extend rule lifetime
self.rucioClient.update_replication_rule(ruleId, {'lifetime': lifetime})
except (InsufficientTargetRSEs, InsufficientAccountLimit, FullStorage) as e:
Expand Down

0 comments on commit 3087e06

Please sign in to comment.