Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: removing dependency from core analyzer lib! #2

Merged
merged 4 commits into from
Jul 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ tc-messageBroker==1.6.7
sentry-sdk
rq
redis
tc-core-analyzer-lib==1.3.1
tc-neo4j-lib==2.0.1
pybars3
backoff==2.2.1
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

setup(
name="tc-analyzer-lib",
version="1.0.0",
version="1.1.0",
author="Mohammad Amin Dadgar, TogetherCrew",
maintainer="Mohammad Amin Dadgar",
maintainer_email="[email protected]",
Expand Down
10 changes: 10 additions & 0 deletions tc_analyzer_lib/algorithms/assessment/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# flake8: noqa
from .assess_active import assess_active
from .assess_connected import assess_connected
from .assess_consistent import assess_consistent
from .assess_dropped import assess_dropped
from .assess_lurker import assess_lurker
from .assess_overlap import assess_overlap
from .assess_remainder import assess_remainder
from .assess_still_active import assess_still_active
from .assess_vital import assess_vital
45 changes: 45 additions & 0 deletions tc_analyzer_lib/algorithms/assessment/assess_active.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from numpy import intersect1d, ndarray


def assess_active(
acc_names: ndarray,
thr_ind: list[str],
thr_uw_deg: list[str],
w_i: int,
all_active: dict[str, set[str]],
) -> dict[str, set[str]]:
"""
Assess all active accounts

Parameters:
-------------
acc_names : np.ndarray[str]
all active accounts in window
thr_ind : list[int]
index numbers of account names with at least
`INT_THR` interactions in which the `INT_THR` is an integer positive value
thr_uw_deg : list[int]
index numbers of account names with at least
`UW_DEG_THR` connections in which the `UW_DEG_THR` is
an integer positive value
w_i : int
index of the sliding time window
which is an integer value
all_active : dict[str, set[str]]
dictionary with string keys of `w_i` and values
containing a list of all account names that are active

Returns:
---------
all_active - dict[str, set[str]] : dictionary with keys w_i and values
containing a list of all account names that are active updated
for window `w_i`
"""

# obtain accounts that meet both weigthed and unweighted degree thresholds
thr_overlap = intersect1d(thr_ind, thr_uw_deg)
Comment on lines +39 to +40
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Optimize the intersection operation.

Using np.intersect1d is appropriate, but ensure that the input lists are unique to avoid redundant computations.

thr_overlap = intersect1d(np.unique(thr_ind), np.unique(thr_uw_deg))


# obtain active account names in this period and store in dictionary
all_active[str(w_i)] = set(acc_names[thr_overlap])

return all_active
39 changes: 39 additions & 0 deletions tc_analyzer_lib/algorithms/assessment/assess_connected.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import numpy as np


def assess_connected(
acc_names: np.ndarray,
thr_uw_thr_deg: list[int],
w_i: int,
all_connected: dict[str, set[str]],
) -> dict[str, set[str]]:
"""
Assess all connected accounts

Parameters:
------------
acc_names : np.ndarray[str]
all active accounts in window
the account names are string
thr_uw_thr_deg : list[int]
index numbers of account names with at
least `UW_THR_DEG_THR` connections of at least `EDGE_STR_THR`
interactions each
w_i : list[int]
index of sliding time window
all_connected : dict[str, set[str]]
dictionary with keys w_i and values
containing a list of all account names that are connected

Returns:
-----------
all_connected - dict[str, set[str]]
dictionary with keys w_i and values
containing a list of all account names that are connected updated
for window w_i
"""

# obtain connected account names in this period and store in dictionary
all_connected[str(w_i)] = set(acc_names[thr_uw_thr_deg])

return all_connected
52 changes: 52 additions & 0 deletions tc_analyzer_lib/algorithms/assessment/assess_consistent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
from .check_past import check_past


def assess_consistent(
all_active: dict[str, set[str]],
w_i: int,
CON_T_THR: int,
CON_O_THR: int,
WINDOW_D: int,
all_consistent: dict[str, set[str]],
) -> dict[str, set[str]]:
"""
Assess all continuously active accounts

Parameters:
-------------
all_active : dict[str, set[str]]
dictionary with keys w_i and values
containing a list of all account names that are active
w_i : int
index of sliding time window
CON_T_THR : int
time period to assess consistently active
CON_O_THR : int
times to be active within `CON_T_THR` to be
consistently active
WINDOW_D : int
duration of sliding window (days)
all_consistent : dict[str, set[str]]
dictionary with keys w_i and values
containing a list of all account names that are continuously active

Returns:
---------
all_consistent : dict[str, set[str]]
dictionary with keys w_i and values
containing a list of all account names that are consistently active updated
for window w_i
"""

# if there are more time periods in the past than CON_O_THR
if w_i - (CON_O_THR - 1) * WINDOW_D >= 0:
# obtain who was consistently active in all specified time periods
all_consistent[str(w_i)] = set(
check_past(all_active, CON_T_THR, CON_O_THR, WINDOW_D)
)

else:
# store empty set
all_consistent[str(w_i)] = set()

return all_consistent
60 changes: 60 additions & 0 deletions tc_analyzer_lib/algorithms/assessment/assess_dropped.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
from .check_past import check_past


def assess_dropped(
all_new_active: dict[str, set[str]],
all_active: dict[str, set[str]],
w_i: int,
DROP_H_THR: int,
DROP_I_THR: int,
WINDOW_D: int,
all_dropped: dict[str, set[str]],
) -> dict[str, set[str]]:
"""
Assess all dropped accounts

Parameters:
-------------
all_new_active : dict[str, set[str]]
dictionary with keys w_i and values
containing a list of all account names that are active for first
time in period
all_active : dict[str, set[str]]
dictionary with keys w_i and values
containing a list of all account names that are active
w_i : int
index of sliding time window
DROP_H_THR : int
time periods in the past to have been newly active
DROP_I_THR : int
time periods to have been inactive
WINDOW_D : int
duration of sliding window (days)
all_dropped : dict[str, set[str]]
dictionary with keys w_i and values
containing a list of all account names that are dropped

Returns:
----------
all_dropped : dict[str, set[str]]
dictionary with keys w_i and values
containing a list of all account names that are dropped
updated for window w_i
"""

# if there are more time periods in the past than STILL_T_THR
if w_i - (DROP_H_THR * WINDOW_D) >= 0:
# obtain who was newly active in one of specified time periods
all_new_per = set(check_past(all_new_active, DROP_H_THR, 1, WINDOW_D))

# obtain who was active in one of the specified time periods
all_act_per = set(check_past(all_active, DROP_I_THR, 1, WINDOW_D))

# remove all_act_per from all_new_per and store results
all_dropped[str(w_i)] = set(all_new_per - all_act_per)

else:
# store empty set
all_dropped[str(w_i)] = set()

return all_dropped
45 changes: 45 additions & 0 deletions tc_analyzer_lib/algorithms/assessment/assess_lurker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
def assess_lurker(
all_lurker: dict[str, set[str]],
all_new_active: dict[str, set[str]],
all_joined_day: dict[str, set[str]],
w_i: int,
):
"""
Assess all lurker accounts

Parameters:
------------
all_lurker : dict[str, set[str]]
dictionary with keys w_i and values
containing a list of all account names that are lurkers
all_new_active : dict[str, set[str]]
dictionary with keys w_i and values
containing a list of all account names that are active for first
time in period
all_joined_day : dict[str, set[str]]
dictionary with keys w_i and values
containing a list of all account names that joined on w_i
w_i : int
index of sliding time window

Returns:
---------
all_lurker : dict[str, set[str]]
dictionary with keys w_i and values
containing a list of all account names that are lurkers
updated for window w_i
"""
# if data for previous period exists
if w_i >= 1:
# combine lurker from previous period with newly joined from this period
temp_lurker = set(all_lurker[str(w_i - 1)]).union(set(all_joined_day[str(w_i)]))

# if this is the first period
else:
# store all joined accounts as temp_lurkers
temp_lurker = set(all_joined_day[str(w_i)])

# remove newly active accounts from temp_lurker and store
all_lurker[str(w_i)] = temp_lurker - all_new_active[str(w_i)]

return all_lurker
53 changes: 53 additions & 0 deletions tc_analyzer_lib/algorithms/assessment/assess_overlap.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
def assess_overlap(
ref_dict: dict[str, set[str]],
comp_dict: dict[str, set[str]],
w_i: int,
num_past: int,
) -> tuple[set[str], set[str]]:
"""
Assesses the overlap between member type dictionaries at selected time points
Notes: The comparison set needs to be at the same time point (num_past = 0)
or a previous time point (num_past > 0) relative to reference set.
If a future time point is required, switch ref_dict and comp_dict

Parameters:
------------
ref_dict : dict[str, set[str]]
reference dictionary to be used in the comparison
comp_dict : dict[str, set[str]]
comparison dictionary to be used in the comparison.
w_i : int
time period for set from ref_dict
num_past : int
number of time periods previous to w_i for set from comp_dict

Returns:
---------
rem_acc : set[str]
remaining accounts from ref_dict[w_i]
that do not overlap with the selected comp_dict set
overlap_acc : set[str]
accounts that overlap between ref_dict[w_i]
and the selected comp_dict set
"""
w_i_str = str(w_i)

# define comparison period
comp_per = int(w_i_str) - num_past

# if comparison period is present in keys
if str(comp_per) in comp_dict:
# assess overlap
overlap_acc = set(ref_dict[w_i_str]).intersection(set(comp_dict[str(comp_per)]))

# store remaining accounts
rem_acc = set(ref_dict[w_i_str]) - overlap_acc

else:
# store empty set
overlap_acc = set()

# set remaining accounts to all initial accounts
rem_acc = set(ref_dict[w_i_str])

return (rem_acc, overlap_acc)
Loading
Loading