Skip to content

Commit

Permalink
Merge pull request #558 from 4dn-dcic/ajs_doppleg_chk
Browse files Browse the repository at this point in the history
Refactor of doppelganger check so it won't fail with a lot of ignored users
  • Loading branch information
aschroed authored Jan 23, 2024
2 parents 6264ded + edf01d4 commit 5faaa56
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 13 deletions.
10 changes: 10 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,16 @@ foursight
----------
Change Log
----------

4.2.0
=====

* refactor of doppelganger check so it won't fail if ignore list becomes too long
* increased stringency for warning to case insensitive equality

`PR 558: refactor doppelganger check <https://github.com/4dn-dcic/foursight/pull/558>`_


4.1.4
=====

Expand Down
33 changes: 21 additions & 12 deletions chalicelib_fourfront/checks/wrangler_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1223,7 +1223,7 @@ def users_with_doppelganger(connection, **kwargs):
check.full_output = {'result': [], 'ignore': []}
check.brief_output = []
check.status = 'PASS'
query = ('/search/?type=User&sort=display_title'
query = ('/search/?type=User' # &sort=display_title'
'&field=display_title&field=contact_email&field=preferred_email&field=email')
# if check was limited to certain emails
if kwargs.get('emails'):
Expand All @@ -1246,6 +1246,7 @@ def users_with_doppelganger(connection, **kwargs):
# go through each combination
combs = itertools.combinations(all_users, 2)
cases = []
iffy_cases = []
for comb in combs:
us1 = comb[0]
us2 = comb[1]
Expand All @@ -1262,7 +1263,17 @@ def users_with_doppelganger(connection, **kwargs):
'brief': msg}
cases.append(log)
# if not, compare names
else:
elif us1['display_title'].lower() == us2['display_title'].lower():
msg = '{} and {} are the same'.format(
us1['display_title'],
us2['display_title']
)
log = {'user1': [us1['display_title'], us1['@id'], us1['email']],
'user2': [us2['display_title'], us2['@id'], us2['email']],
'log': 'have the same name',
'brief': msg}
cases.append(log)
else: # this should just provide a warning list that can be periodically reviewed
score = round(string_label_similarity(us1['display_title'], us2['display_title']) * 100)
if score > 85:
msg = '{} and {} are similar-{}'.format(
Expand All @@ -1273,15 +1284,8 @@ def users_with_doppelganger(connection, **kwargs):
'user2': [us2['display_title'], us2['@id'], us2['email']],
'log': 'has similar names ({}/100)'.format(str(score)),
'brief': msg}
cases.append(log)
iffy_cases.append(log)

# are the ignored ones getting out of control
if len(ignored_cases) > 100:
fail_msg = 'Number of ignored cases is very high, time for maintainace'
check.brief_output = fail_msg
check.full_output = {'result': [fail_msg, ], 'ignore': ignored_cases}
check.status = 'FAIL'
return check
# remove ignored cases from all cases
if ignored_cases:
for an_ignored_case in ignored_cases:
Expand All @@ -1292,7 +1296,6 @@ def users_with_doppelganger(connection, **kwargs):
ignored_cases.append([a_case['user1'], a_case['user2']])
cases = []

# add if they have any items referencing them
if cases:
for a_case in cases:
us1_info = ff_utils.get_metadata('indexing-info?uuid=' + a_case['user1'][1][7:-1], key=connection.ff_keys)
Expand All @@ -1303,14 +1306,20 @@ def users_with_doppelganger(connection, **kwargs):
a_case['log'] = a_case['log'] + add_on
a_case['brief'] = a_case['brief'] + add_on

check.full_output = {'result': cases, 'ignore': ignored_cases}
check.full_output = {'to_check': cases, 'ignore': ignored_cases, 'close_matches': iffy_cases}
if cases:
check.summary = 'Some user accounts need attention.'
check.brief_output = [i['brief'] for i in cases]
check.status = 'WARN'
else:
check.summary = 'No user account conflicts'
check.brief_output = []

# are the ignored ones getting out of control N.B. Don't think this needs to fail
if len(ignored_cases) > 100:
fail_msg = '\nNOTE: Number of ignored cases is very high, time to resolve'
check.brief_output.append(fail_msg)
check.status = 'WARN'
return check


Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "foursight"
version = "4.1.4"
version = "4.2.0"
description = "Serverless Chalice Application for Monitoring"
authors = ["4DN-DCIC Team <[email protected]>"]
license = "MIT"
Expand Down

0 comments on commit 5faaa56

Please sign in to comment.