TogetherCrew · cyri113 · Jul 8, 2024 · Jul 8, 2024 · Jul 8, 2024 · Jul 8, 2024
diff --git a/requirements.txt b/requirements.txt
@@ -18,7 +18,6 @@ tc-messageBroker==1.6.7
 sentry-sdk
 rq
 redis
-tc-core-analyzer-lib==1.3.1
 tc-neo4j-lib==2.0.1
 pybars3
 backoff==2.2.1
diff --git a/setup.py b/setup.py
@@ -6,7 +6,7 @@
 
 setup(
     name="tc-analyzer-lib",
-    version="1.0.0",
+    version="1.1.0",
     author="Mohammad Amin Dadgar, TogetherCrew",
     maintainer="Mohammad Amin Dadgar",
     maintainer_email="[email protected]",

diff --git a/tc_analyzer_lib/algorithms/assessment/__init__.py b/tc_analyzer_lib/algorithms/assessment/__init__.py
@@ -0,0 +1,10 @@
+# flake8: noqa
+from .assess_active import assess_active
+from .assess_connected import assess_connected
+from .assess_consistent import assess_consistent
+from .assess_dropped import assess_dropped
+from .assess_lurker import assess_lurker
+from .assess_overlap import assess_overlap
+from .assess_remainder import assess_remainder
+from .assess_still_active import assess_still_active
+from .assess_vital import assess_vital
diff --git a/tc_analyzer_lib/algorithms/assessment/assess_active.py b/tc_analyzer_lib/algorithms/assessment/assess_active.py
@@ -0,0 +1,45 @@
+from numpy import intersect1d, ndarray
+
+
+def assess_active(
+    acc_names: ndarray,
+    thr_ind: list[str],
+    thr_uw_deg: list[str],
+    w_i: int,
+    all_active: dict[str, set[str]],
+) -> dict[str, set[str]]:
+    """
+    Assess all active accounts
+
+    Parameters:
+    -------------
+    acc_names : np.ndarray[str]
+        all active accounts in window
+    thr_ind : list[int]
+        index numbers of account names with at least
+        `INT_THR` interactions in which the `INT_THR` is an integer positive value
+    thr_uw_deg : list[int]
+        index numbers of account names with at least
+        `UW_DEG_THR` connections in which the `UW_DEG_THR` is
+        an integer positive value
+    w_i : int
+        index of the sliding time window
+        which is an integer value
+    all_active : dict[str, set[str]]
+        dictionary with string keys of `w_i` and values
+        containing a list of all account names that are active
+
+    Returns:
+    ---------
+    all_active - dict[str, set[str]] : dictionary with keys w_i and values
+        containing a list of all account names that are active updated
+        for window `w_i`
+    """
+
+    # obtain accounts that meet both weigthed and unweighted degree thresholds
+    thr_overlap = intersect1d(thr_ind, thr_uw_deg)
+
+    # obtain active account names in this period and store in dictionary
+    all_active[str(w_i)] = set(acc_names[thr_overlap])
+
+    return all_active
diff --git a/tc_analyzer_lib/algorithms/assessment/assess_connected.py b/tc_analyzer_lib/algorithms/assessment/assess_connected.py
@@ -0,0 +1,39 @@
+import numpy as np
+
+
+def assess_connected(
+    acc_names: np.ndarray,
+    thr_uw_thr_deg: list[int],
+    w_i: int,
+    all_connected: dict[str, set[str]],
+) -> dict[str, set[str]]:
+    """
+    Assess all connected accounts
+
+    Parameters:
+    ------------
+    acc_names : np.ndarray[str]
+        all active accounts in window
+        the account names are string
+    thr_uw_thr_deg : list[int]
+        index numbers of account names with at
+        least `UW_THR_DEG_THR` connections of at least `EDGE_STR_THR`
+        interactions each
+    w_i : list[int]
+        index of sliding time window
+    all_connected : dict[str, set[str]]
+        dictionary with keys w_i and values
+        containing a list of all account names that are connected
+
+    Returns:
+    -----------
+    all_connected - dict[str, set[str]]
+        dictionary with keys w_i and values
+        containing a list of all account names that are connected updated
+        for window w_i
+    """
+
+    # obtain connected account names in this period and store in dictionary
+    all_connected[str(w_i)] = set(acc_names[thr_uw_thr_deg])
+
+    return all_connected
diff --git a/tc_analyzer_lib/algorithms/assessment/assess_consistent.py b/tc_analyzer_lib/algorithms/assessment/assess_consistent.py
@@ -0,0 +1,52 @@
+from .check_past import check_past
+
+
+def assess_consistent(
+    all_active: dict[str, set[str]],
+    w_i: int,
+    CON_T_THR: int,
+    CON_O_THR: int,
+    WINDOW_D: int,
+    all_consistent: dict[str, set[str]],
+) -> dict[str, set[str]]:
+    """
+    Assess all continuously active accounts
+
+    Parameters:
+    -------------
+    all_active : dict[str, set[str]]
+        dictionary with keys w_i and values
+        containing a list of all account names that are active
+    w_i : int
+        index of sliding time window
+    CON_T_THR : int
+        time period to assess consistently active
+    CON_O_THR : int
+        times to be active within `CON_T_THR` to be
+        consistently active
+    WINDOW_D : int
+        duration of sliding window (days)
+    all_consistent : dict[str, set[str]]
+        dictionary with keys w_i and values
+        containing a list of all account names that are continuously active
+
+    Returns:
+    ---------
+    all_consistent : dict[str, set[str]]
+        dictionary with keys w_i and values
+        containing a list of all account names that are consistently active updated
+        for window w_i
+    """
+
+    # if there are more time periods in the past than CON_O_THR
+    if w_i - (CON_O_THR - 1) * WINDOW_D >= 0:
+        # obtain who was consistently active in all specified time periods
+        all_consistent[str(w_i)] = set(
+            check_past(all_active, CON_T_THR, CON_O_THR, WINDOW_D)
+        )
+
+    else:
+        # store empty set
+        all_consistent[str(w_i)] = set()
+
+    return all_consistent
diff --git a/tc_analyzer_lib/algorithms/assessment/assess_dropped.py b/tc_analyzer_lib/algorithms/assessment/assess_dropped.py
@@ -0,0 +1,60 @@
+from .check_past import check_past
+
+
+def assess_dropped(
+    all_new_active: dict[str, set[str]],
+    all_active: dict[str, set[str]],
+    w_i: int,
+    DROP_H_THR: int,
+    DROP_I_THR: int,
+    WINDOW_D: int,
+    all_dropped: dict[str, set[str]],
+) -> dict[str, set[str]]:
+    """
+    Assess all dropped accounts
+
+    Parameters:
+    -------------
+    all_new_active : dict[str, set[str]]
+        dictionary with keys w_i and values
+        containing a list of all account names that are active for first
+         time in period
+    all_active : dict[str, set[str]]
+        dictionary with keys w_i and values
+        containing a list of all account names that are active
+    w_i : int
+        index of sliding time window
+    DROP_H_THR : int
+        time periods in the past to have been newly active
+    DROP_I_THR : int
+        time periods to have been inactive
+    WINDOW_D : int
+        duration of sliding window (days)
+    all_dropped : dict[str, set[str]]
+        dictionary with keys w_i and values
+        containing a list of all account names that are dropped
+
+    Returns:
+    ----------
+    all_dropped : dict[str, set[str]]
+        dictionary with keys w_i and values
+        containing a list of all account names that are dropped
+        updated for window w_i
+    """
+
+    # if there are more time periods in the past than STILL_T_THR
+    if w_i - (DROP_H_THR * WINDOW_D) >= 0:
+        # obtain who was newly active in one of specified time periods
+        all_new_per = set(check_past(all_new_active, DROP_H_THR, 1, WINDOW_D))
+
+        # obtain who was active in one of the specified time periods
+        all_act_per = set(check_past(all_active, DROP_I_THR, 1, WINDOW_D))
+
+        # remove all_act_per from all_new_per and store results
+        all_dropped[str(w_i)] = set(all_new_per - all_act_per)
+
+    else:
+        # store empty set
+        all_dropped[str(w_i)] = set()
+
+    return all_dropped
diff --git a/tc_analyzer_lib/algorithms/assessment/assess_lurker.py b/tc_analyzer_lib/algorithms/assessment/assess_lurker.py
@@ -0,0 +1,45 @@
+def assess_lurker(
+    all_lurker: dict[str, set[str]],
+    all_new_active: dict[str, set[str]],
+    all_joined_day: dict[str, set[str]],
+    w_i: int,
+):
+    """
+    Assess all lurker accounts
+
+    Parameters:
+    ------------
+    all_lurker : dict[str, set[str]]
+        dictionary with keys w_i and values
+        containing a list of all account names that are lurkers
+    all_new_active : dict[str, set[str]]
+        dictionary with keys w_i and values
+        containing a list of all account names that are active for first
+         time in period
+    all_joined_day : dict[str, set[str]]
+        dictionary with keys w_i and values
+        containing a list of all account names that joined on w_i
+    w_i : int
+        index of sliding time window
+
+    Returns:
+    ---------
+    all_lurker : dict[str, set[str]]
+        dictionary with keys w_i and values
+        containing a list of all account names that are lurkers
+        updated for window w_i
+    """
+    # if data for previous period exists
+    if w_i >= 1:
+        # combine lurker from previous period with newly joined from this period
+        temp_lurker = set(all_lurker[str(w_i - 1)]).union(set(all_joined_day[str(w_i)]))
+
+    # if this is the first period
+    else:
+        # store all joined accounts as temp_lurkers
+        temp_lurker = set(all_joined_day[str(w_i)])
+
+    # remove newly active accounts from temp_lurker and store
+    all_lurker[str(w_i)] = temp_lurker - all_new_active[str(w_i)]
+
+    return all_lurker
diff --git a/tc_analyzer_lib/algorithms/assessment/assess_overlap.py b/tc_analyzer_lib/algorithms/assessment/assess_overlap.py
@@ -0,0 +1,53 @@
+def assess_overlap(
+    ref_dict: dict[str, set[str]],
+    comp_dict: dict[str, set[str]],
+    w_i: int,
+    num_past: int,
+) -> tuple[set[str], set[str]]:
+    """
+    Assesses the overlap between member type dictionaries at selected time points
+    Notes: The comparison set needs to be at the same time point (num_past = 0)
+    or a previous time point (num_past > 0) relative to reference set.
+    If a future time point is required, switch ref_dict and comp_dict
+
+    Parameters:
+    ------------
+    ref_dict : dict[str, set[str]]
+        reference dictionary to be used in the comparison
+    comp_dict : dict[str, set[str]]
+        comparison dictionary to be used in the comparison.
+    w_i : int
+        time period for set from ref_dict
+    num_past : int
+        number of time periods previous to w_i for set from comp_dict
+
+    Returns:
+    ---------
+    rem_acc : set[str]
+        remaining accounts from ref_dict[w_i]
+        that do not overlap with the selected comp_dict set
+    overlap_acc : set[str]
+        accounts that overlap between ref_dict[w_i]
+        and the selected comp_dict set
+    """
+    w_i_str = str(w_i)
+
+    # define comparison period
+    comp_per = int(w_i_str) - num_past
+
+    # if comparison period is present in keys
+    if str(comp_per) in comp_dict:
+        # assess overlap
+        overlap_acc = set(ref_dict[w_i_str]).intersection(set(comp_dict[str(comp_per)]))
+
+        # store remaining accounts
+        rem_acc = set(ref_dict[w_i_str]) - overlap_acc
+
+    else:
+        # store empty set
+        overlap_acc = set()
+
+        # set remaining accounts to all initial accounts
+        rem_acc = set(ref_dict[w_i_str])
+
+    return (rem_acc, overlap_acc)