From 40ed7815eefb6b10cdfbf0df8ffda9598bbeadd5 Mon Sep 17 00:00:00 2001 From: Debin Li Date: Wed, 8 Nov 2023 11:22:21 -0500 Subject: [PATCH 01/10] Updating User model --- api/yelp_beans/models.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/api/yelp_beans/models.py b/api/yelp_beans/models.py index 4d83ffb..717aa52 100644 --- a/api/yelp_beans/models.py +++ b/api/yelp_beans/models.py @@ -23,6 +23,15 @@ class User(db.Model): terminated = db.Column(db.Boolean, nullable=False, default=False) subscription_preferences = db.relationship("UserSubscriptionPreferences") + # Additional fields for match algo + languages = db.Column(db.Text) + cost_center_name = db.Column(db.String()) + days_since_start = db.Column(db.Integer) + employee_id = db.Column(db.String()) + location = db.Column(db.String()) + manager_id = db.Column(db.String()) + pronoun = db.Column(db.String()) + def get_username(self): return self.email.split("@")[0] From 114cea907a141b36441bec8bd97b915fc75b9951 Mon Sep 17 00:00:00 2001 From: I Chen Kao Date: Wed, 8 Nov 2023 11:47:33 -0500 Subject: [PATCH 02/10] migrated match_utils and pair_match --- api/requirements-dev.txt | 1 + api/requirements-minimal.txt | 1 + api/requirements.txt | 1 + api/yelp_beans/matching/match_utils.py | 118 +++++++++++++++++++++++++ api/yelp_beans/matching/pair_match.py | 11 ++- 5 files changed, 130 insertions(+), 2 deletions(-) diff --git a/api/requirements-dev.txt b/api/requirements-dev.txt index 7d8cdf3..574897a 100644 --- a/api/requirements-dev.txt +++ b/api/requirements-dev.txt @@ -45,6 +45,7 @@ packaging==23.2 # pyproject-api # pytest # tox +pandas==1.5.3 platformdirs==3.11.0 # via # tox diff --git a/api/requirements-minimal.txt b/api/requirements-minimal.txt index d1a02eb..3a90b92 100644 --- a/api/requirements-minimal.txt +++ b/api/requirements-minimal.txt @@ -5,6 +5,7 @@ flask-api-utils Flask-SQLAlchemy httplib2 networkx +pandas psycopg2-binary pydantic pytz diff --git a/api/requirements.txt b/api/requirements.txt index 481259c..449af77 100644 --- a/api/requirements.txt +++ b/api/requirements.txt @@ -53,6 +53,7 @@ markupsafe==2.1.3 # werkzeug networkx==3.1 # via -r requirements-minimal.txt +pandas==1.5.3 psycopg2-binary==2.9.9 # via -r requirements-minimal.txt pydantic==2.4.2 diff --git a/api/yelp_beans/matching/match_utils.py b/api/yelp_beans/matching/match_utils.py index a3b7104..b55b8dc 100644 --- a/api/yelp_beans/matching/match_utils.py +++ b/api/yelp_beans/matching/match_utils.py @@ -1,8 +1,11 @@ +import json import logging from collections import defaultdict from datetime import datetime from datetime import timedelta +import networkx as nx +import pandas as pd from database import db from yelp_beans.logic.config import get_config @@ -86,3 +89,118 @@ def get_previous_meetings(subscription, cooldown=None): disallowed_meetings = {tuple([meeting.id for meeting in meeting]) for meeting in disallowed_meetings} return disallowed_meetings + + +def jaccard(list1, list2): + intersection = len(list(set(list1).intersection(list2))) + if intersection == 0: + return 1 + else: + union = (len(list1) + len(list2)) - intersection + return float(intersection) / union + + +def get_pairwise_distance( + user_pair, + org_graph, + employee_df, + max_tenure=1000, +): + """ + get the distance between two users. + The returned distance score is a linear combination of the multiple user attributes' distnace (normalized). + The importance of each attribute is considered equal. + User attribute considered: + 1. team/function: distance in the org chart + 2. location - country, city + 3. tenure at Yelp + 4. language + + note: we considered using education and work experience, but think it likely correlates with the first attribute + """ + user_a, user_b = user_pair + print("(user_a,user_b)", user_a, user_b) + # print("get_pairwise_distance: employee_df:") + print("employee_df:", employee_df.to_string()) + # print(f"get_pairwise_distance, employee_df.columns: {employee_df.columns}") + # employee_df.set_index("email", inplace=True) -- keeping the index as id + user_a_attributes = dict(employee_df.loc[user_a]) + user_b_attributes = dict(employee_df.loc[user_b]) + + distance = 0 + # print("get_pairwise_distance: org_graph nodes") + # print(org_graph.nodes) + # org chart distance + dist_1 = nx.shortest_path_length(org_graph, user_a, user_b) + dist_1 = dist_1 / 10 # approx. min-max scaled + distance += dist_1 + + # location + try: + user_a_city, user_a_country = user_a_attributes["location"].split(", ") + except ValueError: + user_a_city, user_a_country = "unkown", user_a_attributes["location"] + try: + user_b_city, user_b_country = user_b_attributes["location"].split(", ") + except ValueError: + user_b_city, user_b_country = "unkown", user_b_attributes["location"] + country_dist = 0 if user_a_country == user_b_country else 1 + city_dist = 0 if user_a_city == user_b_city else 1 + dist_2 = country_dist + city_dist + dist_2 = dist_2 / 2 # min-max scaled + distance += dist_2 + + # tenure + dist_3 = abs(int(user_a_attributes["days_since_start"]) - int(user_b_attributes["days_since_start"])) + dist_3 = dist_3 / max_tenure + distance += dist_3 + + # language + lang_similarity = jaccard(user_a_attributes["languages"], user_b_attributes["languages"]) + dist_4 = 1 - lang_similarity + distance += dist_4 + + return distance + + +def get_meeting_weights(allowed_meetings): + """ + generate distance score for each user pairs. + """ + meeting_to_weight = {} + + # fetching employee information and create a pandas dataframe with it + # employees = pd.DataFrame(requests.get( + # f'{CORP_API}/employees', + # headers={'X-API-Key': CORP_API_TOKEN}, + # ).json()) + + # need to convert this to JSON to match the previous logic + db_query_result = db.session.query(User).all() + print(f"get_meeting_weights: db_query_result: {db_query_result}") + json_dump = json.dumps([obj.serialize() for obj in db_query_result]) + print(f"get_meeting_weights: json_dump is: {json_dump}") + employees = pd.DataFrame(eval(json_dump)) + print(f"get_meeting_weights: employees is: {employees}") + + employees = employees.set_index("id", drop=False) + # print(f"get_meeting_weights: employees.columns: {employees.columns}") + employees = employees[ + ["manager_id", "cost_center_name", "days_since_start", "location", "languages", "pronoun", "email", "employee_id"] + ] + employees = employees.merge( + employees["employee_id", "id"], how="left", left_on="manager_id", right_on="employee_id", suffixes=("", "_manager") + ) + # print(f"get_meeting_weights: employees.columns after merge: {employees.columns}") + max_tenure = max(employees["days_since_start"].astype(int)) + + # yelp employee network graph created through reporting line + G = nx.Graph() + # G.add_edges_from(list(zip(employees.index, employees['Work_Email_manager']))) + G.add_edges_from(list(zip(employees["id"], employees["id_manager"]))) + # print(f"get_meeting_weights: employees.columns after add edges: {employees.columns}") + for user_pair in allowed_meetings: + users_distance_score = get_pairwise_distance(user_pair, org_graph=G, employee_df=employees.copy(), max_tenure=max_tenure) + meeting_to_weight[user_pair] = users_distance_score + + return meeting_to_weight diff --git a/api/yelp_beans/matching/pair_match.py b/api/yelp_beans/matching/pair_match.py index d18d96c..6084c6a 100644 --- a/api/yelp_beans/matching/pair_match.py +++ b/api/yelp_beans/matching/pair_match.py @@ -85,8 +85,15 @@ def construct_graph(user_ids, disallowed_meetings): # This creates the graph and the maximal matching set is returned. # It does not return anyone who didn't get matched. meetings = [] - possible_meetings = {meeting for meeting in itertools.combinations(user_ids, 2)} - allowed_meetings = possible_meetings - disallowed_meetings + # possible_meetings = {meeting for meeting in itertools.combinations(user_ids, 2)} + # allowed_meetings = possible_meetings - disallowed_meetings + possible_meetings = {tuple(sorted(meeting)) for meeting in itertools.combinations(user_ids, 2)} + print(f"construct_graph, user_ids: {user_ids}") + print(f"construct_graph, disallowed_meetings: {disallowed_meetings}") + print(f"construct_graph, possible_meetings: {possible_meetings}") + allowed_meetings = possible_meetings - {tuple(sorted(a)) for a in disallowed_meetings} + + print(f"construct_graph, allowed_meetings: {allowed_meetings}") for meeting in allowed_meetings: weight = meeting_to_weight.get(meeting, 1.0) From 5dc9cd6c118fba9ad8f75db12cb8f4187b1b8d29 Mon Sep 17 00:00:00 2001 From: I Chen Kao Date: Wed, 8 Nov 2023 13:22:54 -0500 Subject: [PATCH 03/10] fix bug. --- api/tests/matching/match_test.py | 26 ++++++++++++++++++++++++-- api/tests/matching/match_utils_test.py | 24 ++++++++++++++++++++++-- api/yelp_beans/logic/employee.py | 5 +++++ api/yelp_beans/matching/match_utils.py | 25 +++++++++++++++---------- api/yelp_beans/matching/pair_match.py | 3 ++- api/yelp_beans/models.py | 23 +++++++++++++++++++++-- 6 files changed, 89 insertions(+), 17 deletions(-) create mode 100644 api/yelp_beans/logic/employee.py diff --git a/api/tests/matching/match_test.py b/api/tests/matching/match_test.py index 7b4448a..2c19cea 100644 --- a/api/tests/matching/match_test.py +++ b/api/tests/matching/match_test.py @@ -25,9 +25,31 @@ def test_generate_meetings_same_department(session, subscription): preference = subscription.datetime[0] user_pref = UserSubscriptionPreferences(preference=preference, subscription=subscription) session.add(user_pref) - user1 = User(email="a@yelp.com", meta_data={"department": "dept"}, subscription_preferences=[user_pref]) + # user1 = User(email="a@yelp.com", meta_data={"department": "dept"}, subscription_preferences=[user_pref]) + user1 = User( + id=1, + email="a@yelp.com", + meta_data={"department": "dept"}, + subscription_preferences=[user_pref], + manager_id="0", + languages="en, fr", + days_since_start=100, + employee_id="101", + location="UK, London", + ) session.add(user1) - user2 = User(email="b@yelp.com", meta_data={"department": "dept"}, subscription_preferences=[user_pref]) + # user2 = User(email="b@yelp.com", meta_data={"department": "dept"}, subscription_preferences=[user_pref]) + user2 = User( + id=2, + email="b@yelp.com", + meta_data={"department": "dept"}, + subscription_preferences=[user_pref], + manager_id="101", + languages="en, fr", + days_since_start=100, + employee_id="102", + location="CA, London", + ) session.add(user2) user_list = [user1, user2] session.commit() diff --git a/api/tests/matching/match_utils_test.py b/api/tests/matching/match_utils_test.py index 10c8427..b947b95 100644 --- a/api/tests/matching/match_utils_test.py +++ b/api/tests/matching/match_utils_test.py @@ -28,8 +28,28 @@ def test_generate_save_meetings(session, subscription): pref_1 = SubscriptionDateTime(datetime=datetime.now() - timedelta(weeks=MEETING_COOLDOWN_WEEKS - 1)) subscription = MeetingSubscription(title="all engineering weekly", datetime=[pref_1]) user_pref = UserSubscriptionPreferences(preference=pref_1, subscription=subscription) - user1 = User(email="a@yelp.com", meta_data={"department": "dept"}, subscription_preferences=[user_pref]) - user2 = User(email="b@yelp.com", meta_data={"department": "dept2"}, subscription_preferences=[user_pref]) + # user1 = User(email="a@yelp.com", meta_data={"department": "dept"}, subscription_preferences=[user_pref]) + # user2 = User(email="b@yelp.com", meta_data={"department": "dept2"}, subscription_preferences=[user_pref]) + user1 = User( + email="a@yelp.com", + meta_data={"department": "dept"}, + subscription_preferences=[user_pref], + manager_id="0", + languages="en, fr", + days_since_start=100, + employee_id="101", + location="UK, London", + ) + user2 = User( + email="b@yelp.com", + meta_data={"department": "dept2"}, + subscription_preferences=[user_pref], + manager_id="101", + languages="en, fr", + days_since_start=100, + employee_id="102", + location="CA, London", + ) meeting_spec = MeetingSpec(meeting_subscription=subscription, datetime=pref_1.datetime) mr1 = MeetingRequest(user=user1, meeting_spec=meeting_spec) mr2 = MeetingRequest(user=user2, meeting_spec=meeting_spec) diff --git a/api/yelp_beans/logic/employee.py b/api/yelp_beans/logic/employee.py new file mode 100644 index 0000000..149656a --- /dev/null +++ b/api/yelp_beans/logic/employee.py @@ -0,0 +1,5 @@ +from yelp_beans.models import Employee + + +def get_employee(work_email): + return Employee.query.filter(Employee.work_email == work_email).first() diff --git a/api/yelp_beans/matching/match_utils.py b/api/yelp_beans/matching/match_utils.py index b55b8dc..692a4ae 100644 --- a/api/yelp_beans/matching/match_utils.py +++ b/api/yelp_beans/matching/match_utils.py @@ -1,4 +1,3 @@ -import json import logging from collections import defaultdict from datetime import datetime @@ -178,27 +177,33 @@ def get_meeting_weights(allowed_meetings): # need to convert this to JSON to match the previous logic db_query_result = db.session.query(User).all() print(f"get_meeting_weights: db_query_result: {db_query_result}") - json_dump = json.dumps([obj.serialize() for obj in db_query_result]) + # json_dump = json.dumps([obj.serialize() for obj in db_query_result]) + json_dump = [obj.serialize() for obj in db_query_result] print(f"get_meeting_weights: json_dump is: {json_dump}") - employees = pd.DataFrame(eval(json_dump)) + # print(f"get_meeting_weights: json_dump type is: {type(json_dump)}") + # print(f"get_meeting_weights: json_dump content type is: {type(json_dump[0])}") + employees = pd.DataFrame(json_dump) print(f"get_meeting_weights: employees is: {employees}") - employees = employees.set_index("id", drop=False) - # print(f"get_meeting_weights: employees.columns: {employees.columns}") + employees["languages"] = employees["languages"].apply(lambda x: x.split(", ")) + print(f"get_meeting_weights: employees.columns: {employees.columns}") employees = employees[ - ["manager_id", "cost_center_name", "days_since_start", "location", "languages", "pronoun", "email", "employee_id"] + # ["manager_id", "cost_center_name", "days_since_start", "location", "languages", "pronoun", "email", "employee_id"] + ["id", "manager_id", "days_since_start", "location", "languages", "email", "employee_id"] ] employees = employees.merge( - employees["employee_id", "id"], how="left", left_on="manager_id", right_on="employee_id", suffixes=("", "_manager") + employees[["employee_id", "id"]], how="left", left_on="manager_id", right_on="employee_id", suffixes=("", "_manager") ) - # print(f"get_meeting_weights: employees.columns after merge: {employees.columns}") + employees = employees.set_index("id", drop=False) + print(f"get_meeting_weights: employees.columns after merge: {employees.columns}") max_tenure = max(employees["days_since_start"].astype(int)) # yelp employee network graph created through reporting line G = nx.Graph() - # G.add_edges_from(list(zip(employees.index, employees['Work_Email_manager']))) + print("get_meeting_weights: link from nodes:", list(zip(employees["id"], employees["id_manager"]))) G.add_edges_from(list(zip(employees["id"], employees["id_manager"]))) - # print(f"get_meeting_weights: employees.columns after add edges: {employees.columns}") + print(f"get_meeting_weights: G nodes: {G.nodes}") + print(f"get_meeting_weights: G edges: {G.edges}") for user_pair in allowed_meetings: users_distance_score = get_pairwise_distance(user_pair, org_graph=G, employee_df=employees.copy(), max_tenure=max_tenure) meeting_to_weight[user_pair] = users_distance_score diff --git a/api/yelp_beans/matching/pair_match.py b/api/yelp_beans/matching/pair_match.py index 6084c6a..4aadd33 100644 --- a/api/yelp_beans/matching/pair_match.py +++ b/api/yelp_beans/matching/pair_match.py @@ -4,6 +4,7 @@ import networkx as nx from yelp_beans.logic.user import user_preference +from yelp_beans.matching.match_utils import get_meeting_weights from yelp_beans.matching.match_utils import get_previous_meetings @@ -94,7 +95,7 @@ def construct_graph(user_ids, disallowed_meetings): allowed_meetings = possible_meetings - {tuple(sorted(a)) for a in disallowed_meetings} print(f"construct_graph, allowed_meetings: {allowed_meetings}") - + meeting_to_weight = get_meeting_weights(allowed_meetings) for meeting in allowed_meetings: weight = meeting_to_weight.get(meeting, 1.0) meetings.append((*meeting, {"weight": weight})) diff --git a/api/yelp_beans/models.py b/api/yelp_beans/models.py index 717aa52..af1d125 100644 --- a/api/yelp_beans/models.py +++ b/api/yelp_beans/models.py @@ -25,16 +25,35 @@ class User(db.Model): # Additional fields for match algo languages = db.Column(db.Text) - cost_center_name = db.Column(db.String()) + # cost_center_name = db.Column(db.String()) days_since_start = db.Column(db.Integer) employee_id = db.Column(db.String()) location = db.Column(db.String()) manager_id = db.Column(db.String()) - pronoun = db.Column(db.String()) + # pronoun = db.Column(db.String()) def get_username(self): return self.email.split("@")[0] + def serialize(self): + return { + "id": self.id, + "email": self.email, + "first_name": self.first_name, + "last_name": self.last_name, + "photo_url": self.photo_url, + "meta_data": self.meta_data, + "terminated": self.terminated, + # "subscription_preferences": self.subscription_preferences, + "languages": self.languages, + # "cost_center_name": self.cost_center_name, + "days_since_start": self.days_since_start, + "employee_id": self.employee_id, + "location": self.location, + "manager_id": self.manager_id, + # "pronoun": self.pronoun, + } + class MeetingSubscription(db.Model): """The base template for a meeting type, it is comprised of From 02b691333b37909b910fb49c0867c90773c5de4e Mon Sep 17 00:00:00 2001 From: I Chen Kao Date: Wed, 8 Nov 2023 13:31:34 -0500 Subject: [PATCH 04/10] fix bug. --- api/tests/matching/match_test.py | 61 +++++++++++++++++++++++++++++--- 1 file changed, 56 insertions(+), 5 deletions(-) diff --git a/api/tests/matching/match_test.py b/api/tests/matching/match_test.py index 2c19cea..f41c7f8 100644 --- a/api/tests/matching/match_test.py +++ b/api/tests/matching/match_test.py @@ -69,13 +69,53 @@ def test_generate_meetings_with_history(session, subscription): user_pref = UserSubscriptionPreferences(preference=preference, subscription=subscription) session.add(user_pref) - user1 = User(email="a@yelp.com", meta_data={"department": "dept"}, subscription_preferences=[user_pref]) + user1 = User( + id=1, + email="a@yelp.com", + meta_data={"department": "dept"}, + subscription_preferences=[user_pref], + manager_id="0", + languages="en, fr", + days_since_start=100, + employee_id="101", + location="UK, London", + ) session.add(user1) - user2 = User(email="b@yelp.com", meta_data={"department": "dept2"}, subscription_preferences=[user_pref]) + user2 = User( + id=2, + email="b@yelp.com", + meta_data={"department": "dept2"}, + subscription_preferences=[user_pref], + manager_id="101", + languages="en, fr", + days_since_start=100, + employee_id="102", + location="CA, London", + ) session.add(user2) - user3 = User(email="c@yelp.com", meta_data={"department": "dept"}, subscription_preferences=[user_pref]) + user3 = User( + id=3, + email="c@yelp.com", + meta_data={"department": "dept"}, + subscription_preferences=[user_pref], + manager_id="101", + languages="", + days_since_start=100, + employee_id="103", + location="UK, London", + ) session.add(user3) - user4 = User(email="d@yelp.com", meta_data={"department": "dept2"}, subscription_preferences=[user_pref]) + user4 = User( + id=4, + email="d@yelp.com", + meta_data={"department": "dept2"}, + subscription_preferences=[user_pref], + manager_id="101", + languages="en", + days_since_start=100, + employee_id="104", + location="US, SF", + ) session.add(user4) user_list = [user1, user2, user3, user4] @@ -124,7 +164,18 @@ def test_no_re_matches(session): users = [] num_users = 20 for i in range(0, num_users): - user = User(email=f"{i}@yelp.com", meta_data={"department": f"dept{i}"}, subscription_preferences=[user_pref]) + # user = User(email=f"{i}@yelp.com", meta_data={"department": f"dept{i}"}, subscription_preferences=[user_pref]) + user = User( + id={i}, + email=f"{i}@yelp.com", + meta_data={"department": f"dept{i//2}"}, + subscription_preferences=[user_pref], + manager_id="101", + languages="en", + days_since_start=100, + employee_id=f"{100+i}", + location="", + ) session.add(user) mr = MeetingRequest(user=user, meeting_spec=meeting_spec) session.add(mr) From 28aa84ef183710553c5112b3580f765f788ec482 Mon Sep 17 00:00:00 2001 From: I Chen Kao Date: Wed, 8 Nov 2023 13:43:26 -0500 Subject: [PATCH 05/10] fix bug. --- api/tests/matching/match_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/tests/matching/match_test.py b/api/tests/matching/match_test.py index f41c7f8..ff25df1 100644 --- a/api/tests/matching/match_test.py +++ b/api/tests/matching/match_test.py @@ -166,7 +166,7 @@ def test_no_re_matches(session): for i in range(0, num_users): # user = User(email=f"{i}@yelp.com", meta_data={"department": f"dept{i}"}, subscription_preferences=[user_pref]) user = User( - id={i}, + id=i, email=f"{i}@yelp.com", meta_data={"department": f"dept{i//2}"}, subscription_preferences=[user_pref], From 6d1c8c1aa6368f050ab1883c049cba514e5d2c0b Mon Sep 17 00:00:00 2001 From: Debin Li Date: Wed, 8 Nov 2023 13:57:53 -0500 Subject: [PATCH 06/10] Clean up unnecessary commented out code --- api/tests/matching/match_test.py | 5 +---- api/tests/matching/match_utils_test.py | 2 -- api/yelp_beans/matching/match_utils.py | 19 +------------------ 3 files changed, 2 insertions(+), 24 deletions(-) diff --git a/api/tests/matching/match_test.py b/api/tests/matching/match_test.py index f41c7f8..e8ce296 100644 --- a/api/tests/matching/match_test.py +++ b/api/tests/matching/match_test.py @@ -25,7 +25,6 @@ def test_generate_meetings_same_department(session, subscription): preference = subscription.datetime[0] user_pref = UserSubscriptionPreferences(preference=preference, subscription=subscription) session.add(user_pref) - # user1 = User(email="a@yelp.com", meta_data={"department": "dept"}, subscription_preferences=[user_pref]) user1 = User( id=1, email="a@yelp.com", @@ -38,7 +37,6 @@ def test_generate_meetings_same_department(session, subscription): location="UK, London", ) session.add(user1) - # user2 = User(email="b@yelp.com", meta_data={"department": "dept"}, subscription_preferences=[user_pref]) user2 = User( id=2, email="b@yelp.com", @@ -164,9 +162,8 @@ def test_no_re_matches(session): users = [] num_users = 20 for i in range(0, num_users): - # user = User(email=f"{i}@yelp.com", meta_data={"department": f"dept{i}"}, subscription_preferences=[user_pref]) user = User( - id={i}, + id=i, email=f"{i}@yelp.com", meta_data={"department": f"dept{i//2}"}, subscription_preferences=[user_pref], diff --git a/api/tests/matching/match_utils_test.py b/api/tests/matching/match_utils_test.py index b947b95..a79f669 100644 --- a/api/tests/matching/match_utils_test.py +++ b/api/tests/matching/match_utils_test.py @@ -28,8 +28,6 @@ def test_generate_save_meetings(session, subscription): pref_1 = SubscriptionDateTime(datetime=datetime.now() - timedelta(weeks=MEETING_COOLDOWN_WEEKS - 1)) subscription = MeetingSubscription(title="all engineering weekly", datetime=[pref_1]) user_pref = UserSubscriptionPreferences(preference=pref_1, subscription=subscription) - # user1 = User(email="a@yelp.com", meta_data={"department": "dept"}, subscription_preferences=[user_pref]) - # user2 = User(email="b@yelp.com", meta_data={"department": "dept2"}, subscription_preferences=[user_pref]) user1 = User( email="a@yelp.com", meta_data={"department": "dept"}, diff --git a/api/yelp_beans/matching/match_utils.py b/api/yelp_beans/matching/match_utils.py index 692a4ae..7d689bd 100644 --- a/api/yelp_beans/matching/match_utils.py +++ b/api/yelp_beans/matching/match_utils.py @@ -118,18 +118,10 @@ def get_pairwise_distance( note: we considered using education and work experience, but think it likely correlates with the first attribute """ user_a, user_b = user_pair - print("(user_a,user_b)", user_a, user_b) - # print("get_pairwise_distance: employee_df:") - print("employee_df:", employee_df.to_string()) - # print(f"get_pairwise_distance, employee_df.columns: {employee_df.columns}") - # employee_df.set_index("email", inplace=True) -- keeping the index as id user_a_attributes = dict(employee_df.loc[user_a]) user_b_attributes = dict(employee_df.loc[user_b]) distance = 0 - # print("get_pairwise_distance: org_graph nodes") - # print(org_graph.nodes) - # org chart distance dist_1 = nx.shortest_path_length(org_graph, user_a, user_b) dist_1 = dist_1 / 10 # approx. min-max scaled distance += dist_1 @@ -168,12 +160,6 @@ def get_meeting_weights(allowed_meetings): """ meeting_to_weight = {} - # fetching employee information and create a pandas dataframe with it - # employees = pd.DataFrame(requests.get( - # f'{CORP_API}/employees', - # headers={'X-API-Key': CORP_API_TOKEN}, - # ).json()) - # need to convert this to JSON to match the previous logic db_query_result = db.session.query(User).all() print(f"get_meeting_weights: db_query_result: {db_query_result}") @@ -187,10 +173,7 @@ def get_meeting_weights(allowed_meetings): employees["languages"] = employees["languages"].apply(lambda x: x.split(", ")) print(f"get_meeting_weights: employees.columns: {employees.columns}") - employees = employees[ - # ["manager_id", "cost_center_name", "days_since_start", "location", "languages", "pronoun", "email", "employee_id"] - ["id", "manager_id", "days_since_start", "location", "languages", "email", "employee_id"] - ] + employees = employees[["id", "manager_id", "days_since_start", "location", "languages", "email", "employee_id"]] employees = employees.merge( employees[["employee_id", "id"]], how="left", left_on="manager_id", right_on="employee_id", suffixes=("", "_manager") ) From f91ddfd18793f50f07d9342c433c26bfd836d5f4 Mon Sep 17 00:00:00 2001 From: Debin Li Date: Wed, 8 Nov 2023 14:04:10 -0500 Subject: [PATCH 07/10] Clean up code --- api/yelp_beans/matching/match_utils.py | 11 ----------- api/yelp_beans/matching/pair_match.py | 12 ++---------- api/yelp_beans/models.py | 5 ----- 3 files changed, 2 insertions(+), 26 deletions(-) diff --git a/api/yelp_beans/matching/match_utils.py b/api/yelp_beans/matching/match_utils.py index 7d689bd..7afff72 100644 --- a/api/yelp_beans/matching/match_utils.py +++ b/api/yelp_beans/matching/match_utils.py @@ -162,31 +162,20 @@ def get_meeting_weights(allowed_meetings): # need to convert this to JSON to match the previous logic db_query_result = db.session.query(User).all() - print(f"get_meeting_weights: db_query_result: {db_query_result}") - # json_dump = json.dumps([obj.serialize() for obj in db_query_result]) json_dump = [obj.serialize() for obj in db_query_result] - print(f"get_meeting_weights: json_dump is: {json_dump}") - # print(f"get_meeting_weights: json_dump type is: {type(json_dump)}") - # print(f"get_meeting_weights: json_dump content type is: {type(json_dump[0])}") employees = pd.DataFrame(json_dump) - print(f"get_meeting_weights: employees is: {employees}") employees["languages"] = employees["languages"].apply(lambda x: x.split(", ")) - print(f"get_meeting_weights: employees.columns: {employees.columns}") employees = employees[["id", "manager_id", "days_since_start", "location", "languages", "email", "employee_id"]] employees = employees.merge( employees[["employee_id", "id"]], how="left", left_on="manager_id", right_on="employee_id", suffixes=("", "_manager") ) employees = employees.set_index("id", drop=False) - print(f"get_meeting_weights: employees.columns after merge: {employees.columns}") max_tenure = max(employees["days_since_start"].astype(int)) # yelp employee network graph created through reporting line G = nx.Graph() - print("get_meeting_weights: link from nodes:", list(zip(employees["id"], employees["id_manager"]))) G.add_edges_from(list(zip(employees["id"], employees["id_manager"]))) - print(f"get_meeting_weights: G nodes: {G.nodes}") - print(f"get_meeting_weights: G edges: {G.edges}") for user_pair in allowed_meetings: users_distance_score = get_pairwise_distance(user_pair, org_graph=G, employee_df=employees.copy(), max_tenure=max_tenure) meeting_to_weight[user_pair] = users_distance_score diff --git a/api/yelp_beans/matching/pair_match.py b/api/yelp_beans/matching/pair_match.py index 4aadd33..36bc69c 100644 --- a/api/yelp_beans/matching/pair_match.py +++ b/api/yelp_beans/matching/pair_match.py @@ -79,22 +79,14 @@ def construct_graph(user_ids, disallowed_meetings): Yay graphs! Networkx will do all the work for us. """ - # special weights that be put on the matching potential of each meeting, - # depending on heuristics for what makes a good/bad potential meeting. - meeting_to_weight = {} - # This creates the graph and the maximal matching set is returned. # It does not return anyone who didn't get matched. meetings = [] - # possible_meetings = {meeting for meeting in itertools.combinations(user_ids, 2)} - # allowed_meetings = possible_meetings - disallowed_meetings possible_meetings = {tuple(sorted(meeting)) for meeting in itertools.combinations(user_ids, 2)} - print(f"construct_graph, user_ids: {user_ids}") - print(f"construct_graph, disallowed_meetings: {disallowed_meetings}") - print(f"construct_graph, possible_meetings: {possible_meetings}") allowed_meetings = possible_meetings - {tuple(sorted(a)) for a in disallowed_meetings} - print(f"construct_graph, allowed_meetings: {allowed_meetings}") + # special weights that be put on the matching potential of each meeting, + # depending on heuristics for what makes a good/bad potential meeting. meeting_to_weight = get_meeting_weights(allowed_meetings) for meeting in allowed_meetings: weight = meeting_to_weight.get(meeting, 1.0) diff --git a/api/yelp_beans/models.py b/api/yelp_beans/models.py index af1d125..f9515d9 100644 --- a/api/yelp_beans/models.py +++ b/api/yelp_beans/models.py @@ -25,12 +25,10 @@ class User(db.Model): # Additional fields for match algo languages = db.Column(db.Text) - # cost_center_name = db.Column(db.String()) days_since_start = db.Column(db.Integer) employee_id = db.Column(db.String()) location = db.Column(db.String()) manager_id = db.Column(db.String()) - # pronoun = db.Column(db.String()) def get_username(self): return self.email.split("@")[0] @@ -44,14 +42,11 @@ def serialize(self): "photo_url": self.photo_url, "meta_data": self.meta_data, "terminated": self.terminated, - # "subscription_preferences": self.subscription_preferences, "languages": self.languages, - # "cost_center_name": self.cost_center_name, "days_since_start": self.days_since_start, "employee_id": self.employee_id, "location": self.location, "manager_id": self.manager_id, - # "pronoun": self.pronoun, } From 8638b405df6970bd3cd261d270b72b494477ed9d Mon Sep 17 00:00:00 2001 From: Debin Li Date: Fri, 24 Nov 2023 09:55:32 -0500 Subject: [PATCH 08/10] Apply suggestions from code review --- api/yelp_beans/matching/match_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/api/yelp_beans/matching/match_utils.py b/api/yelp_beans/matching/match_utils.py index 7afff72..3f6c5f7 100644 --- a/api/yelp_beans/matching/match_utils.py +++ b/api/yelp_beans/matching/match_utils.py @@ -130,11 +130,11 @@ def get_pairwise_distance( try: user_a_city, user_a_country = user_a_attributes["location"].split(", ") except ValueError: - user_a_city, user_a_country = "unkown", user_a_attributes["location"] + user_a_city, user_a_country = "unknown", user_a_attributes["location"] try: user_b_city, user_b_country = user_b_attributes["location"].split(", ") except ValueError: - user_b_city, user_b_country = "unkown", user_b_attributes["location"] + user_b_city, user_b_country = "unknown", user_b_attributes["location"] country_dist = 0 if user_a_country == user_b_country else 1 city_dist = 0 if user_a_city == user_b_city else 1 dist_2 = country_dist + city_dist From 466668521b47e1df865ad476a8ac752346f72e3b Mon Sep 17 00:00:00 2001 From: I Chen Kao Date: Fri, 1 Mar 2024 09:50:29 -0500 Subject: [PATCH 09/10] adding test case structure --- api/tests/matching/match_test.py | 89 ++++++++++++++++++++++++++ api/yelp_beans/matching/match_utils.py | 1 + 2 files changed, 90 insertions(+) diff --git a/api/tests/matching/match_test.py b/api/tests/matching/match_test.py index e8ce296..55d9186 100644 --- a/api/tests/matching/match_test.py +++ b/api/tests/matching/match_test.py @@ -5,6 +5,8 @@ from yelp_beans.logic.subscription import get_specs_from_subscription from yelp_beans.logic.subscription import store_specs_from_subscription from yelp_beans.matching.match import generate_meetings +from yelp_beans.matching.match_utils import get_meeting_weights +from yelp_beans.matching.pair_match import get_disallowed_meetings from yelp_beans.models import Meeting from yelp_beans.models import MeetingParticipant from yelp_beans.models import MeetingRequest @@ -306,3 +308,90 @@ def test_previous_meeting_penalty(session): assert len(unmatched) == 2 for matched_group in matches: assert not (users[0] in matched_group and users[1] in matched_group) + + +def test_pairwise_distance(session, subscription): + preference = subscription.datetime[0] + user_pref = UserSubscriptionPreferences(preference=preference, subscription=subscription) + session.add(user_pref) + + user1 = User( + id=1, + email="a@yelp.com", + meta_data={"department": "dept"}, + subscription_preferences=[user_pref], + manager_id="0", + languages="en, fr", + days_since_start=100, + employee_id="101", + location="UK, London", + ) + session.add(user1) + user2 = User( + id=2, + email="b@yelp.com", + meta_data={"department": "dept2"}, + subscription_preferences=[user_pref], + manager_id="101", + languages="en, fr", + days_since_start=100, + employee_id="102", + location="CA, London", + ) + session.add(user2) + user3 = User( + id=3, + email="c@yelp.com", + meta_data={"department": "dept"}, + subscription_preferences=[user_pref], + manager_id="101", + languages="", + days_since_start=100, + employee_id="103", + location="UK, London", + ) + session.add(user3) + user4 = User( + id=4, + email="d@yelp.com", + meta_data={"department": "dept2"}, + subscription_preferences=[user_pref], + manager_id="102", + languages="en", + days_since_start=100, + employee_id="104", + location="US, SF", + ) + session.add(user4) + + user_list = [user1, user2, user3, user4] + user_ids = [user.id for user in user_list] + session.commit() + + # WITHOUT considering disallowed meetings and rules + possible_meetings = {tuple(sorted(meeting)) for meeting in itertools.combinations(user_ids, 2)} + allowed_meetings = possible_meetings + get_meeting_weights(allowed_meetings) + + # assert meeting_to_weight[(1,2)] == ? + # assert meeting_to_weight[(2,3)] == ? + # assert meeting_to_weight[(3,4)] == ? + + # considering disallowed meetings and rules + meeting_history = set( + [ + (user1.id, user2.id), + (user3.id, user4.id), + (user2.id, user3.id), + ] + ) + rule = Rule(name="department", value="") + session.add(rule) + subscription.dept_rules = [rule] + _, specs = get_specs_from_subscription(subscription) + possible_meetings = {tuple(sorted(meeting)) for meeting in itertools.combinations(user_ids, 2)} + disallowed_meetings = get_disallowed_meetings(user_list, meeting_history, specs[0]) + allowed_meetings = possible_meetings - {tuple(sorted(a)) for a in disallowed_meetings} + get_meeting_weights(allowed_meetings) + + # assert meeting_to_weight[(1,4)] == ? # only (user1, user4) could be paired diff --git a/api/yelp_beans/matching/match_utils.py b/api/yelp_beans/matching/match_utils.py index 3f6c5f7..58e82fc 100644 --- a/api/yelp_beans/matching/match_utils.py +++ b/api/yelp_beans/matching/match_utils.py @@ -106,6 +106,7 @@ def get_pairwise_distance( max_tenure=1000, ): """ + TODO@ichenkao: define input and output get the distance between two users. The returned distance score is a linear combination of the multiple user attributes' distnace (normalized). The importance of each attribute is considered equal. From b62ab330c03e8c50044f2b2a41fdad063bb20738 Mon Sep 17 00:00:00 2001 From: I Chen Kao Date: Thu, 4 Apr 2024 15:23:28 -0400 Subject: [PATCH 10/10] Added unit test cases for paired distance --- api/tests/matching/match_test.py | 487 ++++++++++++++++++++++++++++--- 1 file changed, 441 insertions(+), 46 deletions(-) diff --git a/api/tests/matching/match_test.py b/api/tests/matching/match_test.py index 55d9186..1a6d114 100644 --- a/api/tests/matching/match_test.py +++ b/api/tests/matching/match_test.py @@ -311,71 +311,465 @@ def test_previous_meeting_penalty(session): def test_pairwise_distance(session, subscription): + rule = Rule(name="department", value="") + session.add(rule) + subscription.dept_rules = [rule] preference = subscription.datetime[0] user_pref = UserSubscriptionPreferences(preference=preference, subscription=subscription) session.add(user_pref) + user0 = User( + id=126, + email="126@yelp.com", + meta_data={"department": "Engineering - Core Experience"}, + subscription_preferences=[user_pref], + manager_id="1073", + languages="", + days_since_start=317, + employee_id="126", + location="California, USA", + ) + session.add(user0) + user1 = User( - id=1, - email="a@yelp.com", - meta_data={"department": "dept"}, + id=223, + email="223@yelp.com", + meta_data={"department": "Engineering - Core Experience"}, subscription_preferences=[user_pref], - manager_id="0", - languages="en, fr", - days_since_start=100, - employee_id="101", - location="UK, London", + manager_id="782", + languages="", + days_since_start=115, + employee_id="223", + location="Berkshire, United Kingdom", ) session.add(user1) + user2 = User( - id=2, - email="b@yelp.com", - meta_data={"department": "dept2"}, + id=707, + email="707@yelp.com", + meta_data={"department": "Engineering - Core Experience"}, subscription_preferences=[user_pref], - manager_id="101", - languages="en, fr", - days_since_start=100, - employee_id="102", - location="CA, London", + manager_id="782", + languages="English, Farsi", + days_since_start=509, + employee_id="707", + location="California, USA", ) session.add(user2) + user3 = User( - id=3, - email="c@yelp.com", - meta_data={"department": "dept"}, + id=782, + email="782@yelp.com", + meta_data={"department": "Engineering - Core Experience"}, subscription_preferences=[user_pref], - manager_id="101", + manager_id="2989", languages="", - days_since_start=100, - employee_id="103", - location="UK, London", + days_since_start=356, + employee_id="782", + location="California, USA", ) session.add(user3) + user4 = User( - id=4, - email="d@yelp.com", - meta_data={"department": "dept2"}, + id=890, + email="890@yelp.com", + meta_data={"department": "Engineering - Core Experience"}, subscription_preferences=[user_pref], - manager_id="102", - languages="en", - days_since_start=100, - employee_id="104", - location="US, SF", + manager_id="1073", + languages="", + days_since_start=54, + employee_id="890", + location="California, USA", ) session.add(user4) - user_list = [user1, user2, user3, user4] - user_ids = [user.id for user in user_list] - session.commit() + user5 = User( + id=1073, + email="1073@yelp.com", + meta_data={"department": "Engineering - Core Experience"}, + subscription_preferences=[user_pref], + manager_id="2989", + languages="Turkish", + days_since_start=595, + employee_id="1073", + location="California, USA", + ) + session.add(user5) - # WITHOUT considering disallowed meetings and rules - possible_meetings = {tuple(sorted(meeting)) for meeting in itertools.combinations(user_ids, 2)} - allowed_meetings = possible_meetings - get_meeting_weights(allowed_meetings) + user6 = User( + id=1117, + email="1117@yelp.com", + meta_data={"department": "Engineering - Core Experience"}, + subscription_preferences=[user_pref], + manager_id="782", + languages="", + days_since_start=338, + employee_id="1117", + location="Texas, USA", + ) + session.add(user6) + + user7 = User( + id=1460, + email="1460@yelp.com", + meta_data={"department": "Engineering - Core Experience"}, + subscription_preferences=[user_pref], + manager_id="5384", + languages="", + days_since_start=1265, + employee_id="1460", + location="California, USA", + ) + session.add(user7) + + user8 = User( + id=1463, + email="1463@yelp.com", + meta_data={"department": "Engineering - Growth"}, + subscription_preferences=[user_pref], + manager_id="5543", + languages="", + days_since_start=410, + employee_id="1463", + location="California, USA", + ) + session.add(user8) - # assert meeting_to_weight[(1,2)] == ? - # assert meeting_to_weight[(2,3)] == ? - # assert meeting_to_weight[(3,4)] == ? + user9 = User( + id=1715, + email="1715@yelp.com", + meta_data={"department": "Engineering - Core Experience"}, + subscription_preferences=[user_pref], + manager_id="5384", + languages="", + days_since_start=269, + employee_id="1715", + location="New York, USA", + ) + session.add(user9) + + user10 = User( + id=2131, + email="2131@yelp.com", + meta_data={"department": "Engineering - Core Experience"}, + subscription_preferences=[user_pref], + manager_id="782", + languages="", + days_since_start=880, + employee_id="2131", + location="Georgia, USA", + ) + session.add(user10) + + user11 = User( + id=2169, + email="2169@yelp.com", + meta_data={"department": "Engineering - Core Experience"}, + subscription_preferences=[user_pref], + manager_id="782", + languages="", + days_since_start=309, + employee_id="2169", + location="California, USA", + ) + session.add(user11) + + user12 = User( + id=2241, + email="2241@yelp.com", + meta_data={"department": "Engineering - Engineering Effectiveness"}, + subscription_preferences=[user_pref], + manager_id="5543", + languages="", + days_since_start=98, + employee_id="2241", + location="British Columbia, Canada", + ) + session.add(user12) + + user13 = User( + id=2525, + email="2525@yelp.com", + meta_data={"department": "Engineering - Core Experience"}, + subscription_preferences=[user_pref], + manager_id="5384", + languages="", + days_since_start=492, + employee_id="2525", + location="New York, USA", + ) + session.add(user13) + + user14 = User( + id=2589, + email="2589@yelp.com", + meta_data={"department": "Engineering - Core Experience"}, + subscription_preferences=[user_pref], + manager_id="5543", + languages="", + days_since_start=511, + employee_id="2589", + location="Florida, USA", + ) + session.add(user14) + + user15 = User( + id=2989, + email="2989@yelp.com", + meta_data={"department": "Engineering - Core Experience"}, + subscription_preferences=[user_pref], + manager_id="5543", + languages="", + days_since_start=1202, + employee_id="2989", + location="California, USA", + ) + session.add(user15) + + user16 = User( + id=3002, + email="3002@yelp.com", + meta_data={"department": "Engineering - Core Experience"}, + subscription_preferences=[user_pref], + manager_id="5384", + languages="", + days_since_start=537, + employee_id="3002", + location="California, USA", + ) + session.add(user16) + + user17 = User( + id=3447, + email="3447@yelp.com", + meta_data={"department": "Engineering - Content Platform"}, + subscription_preferences=[user_pref], + manager_id="5543", + languages="", + days_since_start=692, + employee_id="3447", + location="Pennsylvania, USA", + ) + session.add(user17) + + user18 = User( + id=3457, + email="3457@yelp.com", + meta_data={"department": "Engineering - Core Experience"}, + subscription_preferences=[user_pref], + manager_id="1073", + languages="", + days_since_start=542, + employee_id="3457", + location="Berlin, Germany", + ) + session.add(user18) + + user19 = User( + id=3601, + email="3601@yelp.com", + meta_data={"department": "Engineering - Core Experience"}, + subscription_preferences=[user_pref], + manager_id="5384", + languages="", + days_since_start=141, + employee_id="3601", + location="Ontario, Canada", + ) + session.add(user19) + + user20 = User( + id=3683, + email="3683@yelp.com", + meta_data={"department": "Engineering - Content Platform"}, + subscription_preferences=[user_pref], + manager_id="5543", + languages="", + days_since_start=428, + employee_id="3683", + location="California, USA", + ) + session.add(user20) + + user21 = User( + id=3815, + email="3815@yelp.com", + meta_data={"department": "Engineering - Engineering Effectiveness"}, + subscription_preferences=[user_pref], + manager_id="5543", + languages="", + days_since_start=1816, + employee_id="3815", + location="California, USA", + ) + session.add(user21) + + user22 = User( + id=3957, + email="3957@yelp.com", + meta_data={"department": "Engineering - Services Experience"}, + subscription_preferences=[user_pref], + manager_id="5543", + languages="", + days_since_start=86, + employee_id="3957", + location="British Columbia, Canada", + ) + session.add(user22) + + user23 = User( + id=4078, + email="4078@yelp.com", + meta_data={"department": "Engineering - Core Experience"}, + subscription_preferences=[user_pref], + manager_id="782", + languages="", + days_since_start=266, + employee_id="4078", + location="New York, USA", + ) + session.add(user23) + + user24 = User( + id=4102, + email="4102@yelp.com", + meta_data={"department": "Engineering - Core Experience"}, + subscription_preferences=[user_pref], + manager_id="5384", + languages="", + days_since_start=541, + employee_id="4102", + location="British Columbia, Canada", + ) + session.add(user24) + + user25 = User( + id=4292, + email="4292@yelp.com", + meta_data={"department": "Engineering - Core Experience"}, + subscription_preferences=[user_pref], + manager_id="782", + languages="English, Gujarati, Hindi", + days_since_start=373, + employee_id="4292", + location="Washington, USA", + ) + session.add(user25) + + user26 = User( + id=4650, + email="4650@yelp.com", + meta_data={"department": "Engineering"}, + subscription_preferences=[user_pref], + manager_id="2432", + languages="", + days_since_start=446, + employee_id="4650", + location="East Sussex, United Kingdom", + ) + session.add(user26) + + user27 = User( + id=5240, + email="5240@yelp.com", + meta_data={"department": "Engineering - Core Experience"}, + subscription_preferences=[user_pref], + manager_id="2989", + languages="", + days_since_start=519, + employee_id="5240", + location="California, USA", + ) + session.add(user27) + + user28 = User( + id=5384, + email="5384@yelp.com", + meta_data={"department": "Engineering - Core Experience"}, + subscription_preferences=[user_pref], + manager_id="2989", + languages="", + days_since_start=721, + employee_id="5384", + location="Quebec, Canada", + ) + session.add(user28) + + user29 = User( + id=5529, + email="5529@yelp.com", + meta_data={"department": "Engineering - Services Leads"}, + subscription_preferences=[user_pref], + manager_id="5543", + languages="", + days_since_start=240, + employee_id="5529", + location="California, USA", + ) + session.add(user29) + + user30 = User( + id=5543, + email="5543@yelp.com", + meta_data={"department": "Engineering"}, + subscription_preferences=[user_pref], + manager_id="4650", + languages="", + days_since_start=610, + employee_id="5543", + location="California, USA", + ) + session.add(user30) + + user31 = User( + id=5637, + email="5637@yelp.com", + meta_data={"department": "Engineering - Core Experience"}, + subscription_preferences=[user_pref], + manager_id="1073", + languages="", + days_since_start=226, + employee_id="5637", + location="California, USA", + ) + session.add(user31) + + user_list = [ + user0, + user1, + user2, + user3, + user4, + user5, + user6, + user7, + user8, + user9, + user10, + user11, + user12, + user13, + user14, + user15, + user16, + user17, + user18, + user19, + user20, + user21, + user22, + user23, + user24, + user25, + user26, + user27, + user28, + user29, + user30, + user31, + ] + user_ids = [user.id for user in user_list] + session.commit() # considering disallowed meetings and rules meeting_history = set( @@ -385,13 +779,14 @@ def test_pairwise_distance(session, subscription): (user2.id, user3.id), ] ) - rule = Rule(name="department", value="") - session.add(rule) - subscription.dept_rules = [rule] + _, specs = get_specs_from_subscription(subscription) possible_meetings = {tuple(sorted(meeting)) for meeting in itertools.combinations(user_ids, 2)} disallowed_meetings = get_disallowed_meetings(user_list, meeting_history, specs[0]) allowed_meetings = possible_meetings - {tuple(sorted(a)) for a in disallowed_meetings} - get_meeting_weights(allowed_meetings) + paired_distance = get_meeting_weights(allowed_meetings) - # assert meeting_to_weight[(1,4)] == ? # only (user1, user4) could be paired + assert (126, 223) not in paired_distance.keys() # historically paired not in paired_distance bc historical + assert (2169, 5384) not in paired_distance.keys() # same department members should not be paired + assert round(paired_distance[(3457, 3815)], 3) == 2.102 + assert round(paired_distance[(4102, 4650)], 3) == 1.452