Skip to content

Require path match for allocation pipelines #327

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Aug 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 16 additions & 20 deletions coldfront/core/utils/fasrc.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,34 +50,30 @@ def sort_by(list1, sorter, how='attr'):
raise Exception('unclear sorting method')
return is_true, is_false

def select_one_project_allocation(project_obj, resource_obj, dirpath=None):
def select_one_project_allocation(project_obj, resource_obj, dirpath):
"""
Get one allocation for a given project/resource pairing; handle return of
Get one allocation for a given project/resource/path pairing; handle return of
zero or multiple allocations.

If multiple allocations are in allocation_query, choose the one with the
similar dirpath.

Parameters
----------
project_obj
resource_obj
dirpath
"""
filter_vals = {'resources__id': resource_obj.id}
# if dirpath:
# filter_vals['allocationattribute__value'] = dirpath
allocation_query = project_obj.allocation_set.filter(**filter_vals)
if allocation_query.count() == 1:
allocation_obj = allocation_query.first()
if allocation_obj.path and dirpath and allocation_obj.path not in dirpath and dirpath not in allocation_obj.path:
return None
elif allocation_query.count() < 1:
allocation_obj = None
elif allocation_query.count() > 1:
allocation_obj = next((a for a in allocation_query if a.path.lower() in dirpath.lower()),
None)
return allocation_obj

if allocation_query.count() < 1:
return None
else:
allocations = [
a for a in allocation_query
if a.path and dirpath and (a.path in dirpath or dirpath in a.path)
]
if len(allocations) == 1:
return allocations[0]
elif len(allocations) > 1:
raise Exception('multiple allocations found for project/resource/path pairing')

def determine_size_fmt(byte_num):
"""Return the correct human-readable byte measurement.
Expand All @@ -86,9 +82,9 @@ def determine_size_fmt(byte_num):
for u in units:
unit = u
if abs(byte_num) < 1024.0:
return round(byte_num, 3), unit
return (round(byte_num, 3), unit)
byte_num /= 1024.0
return(round(byte_num, 3), unit)
return (round(byte_num, 3), unit)

def convert_size_fmt(num, target_unit, source_unit='B'):
units = ['B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB']
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def handle(self, *args, **options):
lab_path = entry['fs_path'].replace(f'/n/{entry["server"]}/', '')

resource = Resource.objects.get(name__contains=entry['server'])
alloc_obj = select_one_project_allocation(project, resource, dirpath=lab_path)
alloc_obj = select_one_project_allocation(project, resource, lab_path)
if alloc_obj is not None:
continue
lab_usage_entries = [
Expand Down
85 changes: 36 additions & 49 deletions coldfront/plugins/fasrc/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,13 @@ def produce_query_statement(self, vol_type, volumes=None):

query_dict = {
'quota': {
'volumes': '|'.join(r.name.split('/')[0] for r in Resource.objects.filter(name__contains='tier0')),
'relation': 'HasQuota',
'match': "(e:Quota) MATCH (d:ConfigValue {Name: 'Quota.Invocation'})",
'server': 'filesystem',
'validation_query':
"NOT ((e.SizeGB IS null) OR (e.usedBytes = 0 AND e.SizeGB = 1024)) \
AND (datetime() - duration('P31D') <= datetime(r.DotsLFSUpdateDate)) \
"NOT ((e.SizeGB IS null) OR (e.usedBytes = 0 AND e.SizeGB = 1024))\
AND (datetime() - duration('P31D') <= datetime(r.DotsLFSUpdateDate))\
AND NOT (e.Path IS null)",
'r_updated': 'DotsLFSUpdateDate',
'storage_type': 'Quota',
Expand All @@ -42,17 +43,18 @@ def produce_query_statement(self, vol_type, volumes=None):
'usedbytes': 'usedBytes',
'fs_path': 'Path',
'server_replace': '/n/',
'path_replace': '/n//',
'path_def': "substring(e.Path, size('/n/') + size(split(e.Path, '/')[2]) + 1)",
'unique':'datetime(e.DotsLFSUpdateDate) as begin_date'
},
'isilon': {
'volumes': '|'.join(r.name.split('/')[0] for r in Resource.objects.filter(name__contains='tier1')),
'relation': 'Owns',
'match': "(e:IsilonPath) MATCH (d:ConfigValue {Name: 'IsilonPath.Invocation'})",
'server': 'Isilon',
'validation_query': "r.DotsUpdateDate = d.DotsUpdateDate \
'validation_query': "r.DotsUpdateDate = d.DotsUpdateDate\
AND NOT (e.Path =~ '.*/rc_admin/.*')\
AND (e.Path =~ '.*labs.*')\
AND (datetime() - duration('P31D') <= datetime(r.DotsUpdateDate)) \
AND (datetime() - duration('P31D') <= datetime(r.DotsUpdateDate))\
AND NOT (e.SizeGB = 0)",
'fs_path':'Path',
'r_updated': 'DotsUpdateDate',
Expand All @@ -61,48 +63,43 @@ def produce_query_statement(self, vol_type, volumes=None):
'sizebytes': 'SizeBytes',
'usedbytes': 'UsedBytes',
'server_replace': '01.rc.fas.harvard.edu',
'path_replace': '/ifs/',
'path_def': "replace(e.Path, '/ifs/', '')",
'unique': 'datetime(e.DotsUpdateDate) as begin_date'
},
'volume': {
'volumes': '|'.join(r.name.split('/')[0] for r in Resource.objects.filter(name__contains='tier2')),
'relation': 'Owns',
'match': '(e:Volume)',
'server': 'Hostname',
'validation_query': 'NOT (e.SizeGB = 0)',
'r_updated': 'DotsLVSUpdateDate',
'storage_type': 'Volume',
'fs_path': 'LogicalVolume',
'path_replace': '/dev/data/',
'path_def': "replace(e.LogicalVolume, '/dev/data/', '')",
'usedgb': 'UsedGB',
'sizebytes': 'SizeGB * 1000000000',
'usedbytes': 'UsedGB * 1000000000',
'server_replace': '.rc.fas.harvard.edu',
'unique': 'datetime(e.DotsLVSUpdateDate) as update_date, \
'unique': 'datetime(e.DotsLVSUpdateDate) as update_date,\
datetime(e.DotsLVDisplayUpdateDate) as display_date'
},
}
d = query_dict[vol_type]

if not volumes:
volumes = [
r.name.split('/')[0] for r in Resource.objects.filter(resource_type__name='Storage')
]
volumes = '|'.join(volumes)
where = f"(e.{d['server']} =~ '.*({volumes}).*')"
statement = {
'statement': f"MATCH p=(g:Group)-[r:{d['relation']}]-{d['match']} \
WHERE {where} AND {d['validation_query']}\
'statement': f"MATCH p=(g:Group)-[r:{d['relation']}]-{d['match']}\
WHERE (e.{d['server']} =~ '.*({d['volumes']}).*') AND {d['validation_query']}\
AND NOT (g.ADSamAccountName =~ '.*(disabled|rc_admin).*')\
RETURN \
{d['unique']}, \
RETURN\
{d['unique']},\
g.ADSamAccountName as lab,\
(e.SizeGB / 1024.0) as tb_allocation, \
(e.SizeGB / 1024.0) as tb_allocation,\
e.{d['sizebytes']} as byte_allocation,\
e.{d['usedbytes']} as byte_usage,\
(e.{d['usedgb']} / 1024.0) as tb_usage,\
replace(e.{d['fs_path']}, '{d['path_replace']}', '') as fs_path, \
'{d['storage_type']}' as storage_type, \
datetime(r.{d['r_updated']}) as rel_updated, \
{d['path_def']} as fs_path,\
'{d['storage_type']}' as storage_type,\
datetime(r.{d['r_updated']}) as rel_updated,\
replace(e.{d['server']}, '{d['server_replace']}', '') as server"
}
self.queries['statements'].append(statement)
Expand Down Expand Up @@ -141,14 +138,14 @@ def _standardize_nesefile(self):
headers = headers_df.columns.values.tolist()
data = pd.read_csv(datafile, names=headers, sep='\s+')
data = data.loc[data['pool'].str.contains('1')]
data['fs_path'] = data['pool']
data['lab'] = data['pool'].str.replace('1', '').str.replace('hugl', '').str.replace('hus3', '')
data['server'] = 'nesetape'
data['storage_type'] = 'tape'
data['byte_allocation'] = data['mib_capacity'] * 1048576
data['byte_usage'] = data['mib_used'] * 1048576
data['tb_allocation'] = round(((data['mib_capacity']+ data['mib_capacity']*0.025) / 953674.3164), -1)
data['tb_usage'] = data['mib_used'] / 953674.3164
data['fs_path'] = None
data = data[[
'lab', 'server', 'storage_type', 'byte_allocation',
'byte_usage', 'tb_allocation', 'tb_usage', 'fs_path',
Expand Down Expand Up @@ -187,17 +184,17 @@ def stage_user_member_query(self, groupsearch, pi=False):
return_vars = 'u.ADParentCanonicalName AS path, u.ADDepartment AS department'
query = {'statements': [{
'statement': f"MATCH {match_vars} (g.ADSamAccountName =~ '({groupsearch})')\
RETURN \
u.ADgivenName AS first_name, \
u.ADsurname AS last_name, \
u.ADSamAccountName AS user_name, \
u.ADenabled AS user_enabled, \
RETURN\
u.ADgivenName AS first_name,\
u.ADsurname AS last_name,\
u.ADSamAccountName AS user_name,\
u.ADenabled AS user_enabled,\
g.ADSamAccountName AS group_name,\
{return_vars},\
g.ADManaged_By AS group_manager, \
u.ADgidNumber AS user_gid_number, \
u.ADTitle AS title, \
u.ADCompany AS company, \
g.ADManaged_By AS group_manager,\
u.ADgidNumber AS user_gid_number,\
u.ADTitle AS title,\
u.ADCompany AS company,\
g.ADgidNumber AS group_gid_number"
}]}
resp_json = self.post_query(query)
Expand Down Expand Up @@ -247,37 +244,27 @@ def matched_dict_processing(allocation, data_dicts, paired_allocs, log_message):
def pair_allocations_data(project, quota_dicts):
"""pair allocations with usage dicts"""
logger = logging.getLogger('coldfront.import_quotas')
unpaired_allocs = project.allocation_set.filter(
allocs = project.allocation_set.filter(
status__name__in=['Active','Pending Deactivation'],
resources__resource_type__name='Storage'
)
paired_allocs = {}
# first, pair allocations with those that have same
for allocation in unpaired_allocs:
for allocation in allocs:
dicts = [
d for d in quota_dicts
if d['fs_path'] and allocation.path.lower() == d['fs_path'].lower()
if d['fs_path'] and d['fs_path'].lower() == allocation.path.replace('HDD/', '').replace('SSD-HGST/', '').replace('SSD/', '').lower()
and d['server'] in allocation.resources.first().name
]
if dicts:
log_message = f'Path-based match: {allocation}, {allocation.path}, {dicts[0]}'
paired_allocs = matched_dict_processing(allocation, dicts, paired_allocs, log_message)
unpaired_allocs = [
a for a in unpaired_allocs if a not in paired_allocs
]
unpaired_allocs = [a for a in allocs if a not in paired_allocs]
unpaired_dicts = [d for d in quota_dicts if d not in paired_allocs.values()]
for allocation in unpaired_allocs:
dicts = [
d for d in unpaired_dicts if d['server'] in allocation.resources.first().name
]
if dicts:
log_message = f'Resource-based match: {allocation}, {dicts[0]}'
paired_allocs = matched_dict_processing(allocation, dicts, paired_allocs, log_message)
unpaired_allocs = [
a for a in unpaired_allocs if a not in paired_allocs and a.status.name == 'Active'
]
unpaired_dicts = [d for d in unpaired_dicts if d not in paired_allocs.values()]
if unpaired_dicts or unpaired_allocs:
print(
f"unpaired allocation data. Allocation: {unpaired_allocs} | Dict: {unpaired_dicts}"
)
logger.warning(
"unpaired allocation data. Allocation: %s | Dict: %s", unpaired_allocs, unpaired_dicts
)
Expand Down
3 changes: 1 addition & 2 deletions coldfront/plugins/sftocf/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,6 @@ def get_zone_by_name(self, zone_name):
zones = self.get_zones()
return next((z for z in zones if z['name'] == zone_name), None)


def create_zone(self, zone_name, paths, managers, managing_groups):
"""Create a zone via the API"""
url = self.api_url + 'zone/'
Expand Down Expand Up @@ -616,7 +615,7 @@ def allocationquerymatches(self):
total_sort_key = itemgetter('path','volume')
allocation_usage_grouped = return_dict_of_groupings(self.sf_usage_data, total_sort_key)
missing_allocations = [
(k,a) for k, a in allocation_usage_grouped if k not in allocation_list
(k,a) for k, a in allocation_usage_grouped if (a, k) not in allocation_list
]
print("missing_allocations:", missing_allocations)
logger.warning('starfish allocations missing in coldfront: %s', missing_allocations)
Expand Down
Loading