From 821a9d1c3580fa37a4fc616359bc61420e1f4f44 Mon Sep 17 00:00:00 2001 From: Mohammad Amin Date: Tue, 16 Jul 2024 12:26:51 +0330 Subject: [PATCH 1/7] fix: more efficient querying the database! --- .../metrics/heatmaps/analytics_hourly.py | 23 +++++++++++-------- .../metrics/heatmaps/analytics_raw.py | 8 ++----- tc_analyzer_lib/metrics/heatmaps/heatmaps.py | 7 ++++-- 3 files changed, 21 insertions(+), 17 deletions(-) diff --git a/tc_analyzer_lib/metrics/heatmaps/analytics_hourly.py b/tc_analyzer_lib/metrics/heatmaps/analytics_hourly.py index cb848f2..a8f07ad 100644 --- a/tc_analyzer_lib/metrics/heatmaps/analytics_hourly.py +++ b/tc_analyzer_lib/metrics/heatmaps/analytics_hourly.py @@ -39,11 +39,10 @@ def analyze( activity_direction : str should be always either `emitter` or `receiver` **kwargs : - additional_filters : dict[str, str] - the additional filtering for `rawmemberactivities` data of each platform - the keys could be `metadata.channel_id` with a specific value + resource_filtering : dict[str, str] + a filtering applied for resources on data """ - additional_filters: dict[str, str] = kwargs.get("additional_filters", {}) + resource_filtering: dict[str, str] = kwargs.get("resource_filtering", {}) if activity_direction not in ["emitter", "receiver"]: raise AttributeError( @@ -64,8 +63,8 @@ def analyze( filters={ f"{activity}.name": activity_name, f"{activity}.type": activity_direction, - **additional_filters, }, + resource_filters=resource_filtering, ) return activity_vector @@ -76,6 +75,7 @@ def get_hourly_analytics( activity: str, author_id: str | int, filters: dict[str, dict[str, Any] | str] | None = None, + resource_filters: dict[str, str] | None = None, ) -> list[int]: """ Gets the list of documents for the stated day @@ -87,12 +87,12 @@ def get_hourly_analytics( activity : str to be `interactions` or `actions` filter : dict[str, dict[str] | str] | None - the filtering that we need to apply + the filtering that we need to apply on actions or interactions for default it is an None meaning no filtering would be applied - msg : str - additional information to be logged - for default is empty string meaning no additional string to log + resource_filtering : dict[str, str] | None + the filtering on resources of data + could make the query more efficient if provided Returns --------- @@ -103,12 +103,17 @@ def get_hourly_analytics( start_day = datetime.combine(day, time(0, 0, 0)) end_day = start_day + timedelta(days=1) + # if no filter for resources then + if resource_filters is None: + resource_filters = {} + pipeline = [ # the day for analytics { "$match": { "date": {"$gte": start_day, "$lt": end_day}, "author_id": author_id, + **resource_filters, } }, # Unwind the activity array diff --git a/tc_analyzer_lib/metrics/heatmaps/analytics_raw.py b/tc_analyzer_lib/metrics/heatmaps/analytics_raw.py index 9f7f1da..76ebab6 100644 --- a/tc_analyzer_lib/metrics/heatmaps/analytics_raw.py +++ b/tc_analyzer_lib/metrics/heatmaps/analytics_raw.py @@ -114,19 +114,15 @@ def get_analytics_count( raw analytics item which holds the user and the count of interaction in that day """ - filters: dict[str, dict[str, Any] | str] | None = kwargs.get("filters") + filters: dict[str, dict[str, Any] | str] = kwargs.get("filters", {}) start_day = datetime.combine(day, time(0, 0, 0)) end_day = start_day + timedelta(days=1) match_filters = { "date": {"$gte": start_day, "$lt": end_day}, "author_id": author_id, + **filters, } - if filters is not None: - match_filters = { - **match_filters, - **filters, - } pipeline = [ { diff --git a/tc_analyzer_lib/metrics/heatmaps/heatmaps.py b/tc_analyzer_lib/metrics/heatmaps/heatmaps.py index a0e3f82..6d3cef7 100644 --- a/tc_analyzer_lib/metrics/heatmaps/heatmaps.py +++ b/tc_analyzer_lib/metrics/heatmaps/heatmaps.py @@ -188,8 +188,9 @@ def _process_hourly_analytics( activity_name=activity_name, activity_direction=config.direction.value, author_id=author_id, - additional_filters={ + resource_filtering={ f"metadata.{self.analyzer_config.resource_identifier}": resource, + "metadata.bot_activity": False, }, ) analytics[config.name] = analytics_vector @@ -213,8 +214,9 @@ def _process_hourly_analytics( activity_name=activity_name, activity_direction=config.direction.value, author_id=author_id, - additional_filters={ + resource_filtering={ f"metadata.{self.analyzer_config.resource_identifier}": resource, + "metadata.bot_activity": False, **conditions, }, ) @@ -250,6 +252,7 @@ def _process_raw_analytics( additional_filters: dict[str, str] = { f"metadata.{self.analyzer_config.resource_identifier}": resource, + "metadata.bot_activity": False, } # preparing for custom analytics (if available in config) if config.rawmemberactivities_condition is not None: From cfaefc3bf79d21fecb406df55475b3d669f6d48f Mon Sep 17 00:00:00 2001 From: Mohammad Amin Date: Tue, 16 Jul 2024 12:39:30 +0330 Subject: [PATCH 2/7] fix: remove unused codes! --- .../metrics/heatmaps/heatmaps_utils.py | 21 ------------------- 1 file changed, 21 deletions(-) diff --git a/tc_analyzer_lib/metrics/heatmaps/heatmaps_utils.py b/tc_analyzer_lib/metrics/heatmaps/heatmaps_utils.py index 8c28a93..e2576f9 100644 --- a/tc_analyzer_lib/metrics/heatmaps/heatmaps_utils.py +++ b/tc_analyzer_lib/metrics/heatmaps/heatmaps_utils.py @@ -53,27 +53,6 @@ def get_active_users( users : list[str] a list of user ids doing activity in that day """ - # cursor = self.database["rawmemberactivities"].aggregate( - # [ - # {"$match": {"date": {"$gte": start_day, "$lt": end_day}}}, - # {"$unwind": "$interactions"}, - # {"$unwind": "$interactions.users_engaged_id"}, - # { - # "$group": { - # "_id": None, - # "all_ids": {"$addToSet": "$interactions.users_engaged_id"}, - # "author_ids": {"$addToSet": "$author_id"}, - # } - # }, - # { - # "$project": { - # "_id": 0, - # "combined_ids": {"$setUnion": ["$all_ids", "$author_ids"]}, - # } - # }, - # ] - # ) - cursor = self.database["rawmemberactivities"].aggregate( [ { From c54a0fd63691275f0514d3fcbf00f373deec26b5 Mon Sep 17 00:00:00 2001 From: Mohammad Amin Date: Tue, 16 Jul 2024 13:08:34 +0330 Subject: [PATCH 3/7] fix: more improvements on heatmaps computation! now we're also processing the actual channels that are active for each day rather than processing for all selected channels. --- tc_analyzer_lib/metrics/heatmaps/heatmaps.py | 52 +++-- .../metrics/heatmaps/heatmaps_utils.py | 57 ++++++ .../test_heatmaps_utils_active_resources.py | 192 ++++++++++++++++++ ...lyzer_heatmaps_compute_iteration_counts.py | 3 +- 4 files changed, 281 insertions(+), 23 deletions(-) create mode 100644 tests/integration/test_heatmaps_utils_active_resources.py diff --git a/tc_analyzer_lib/metrics/heatmaps/heatmaps.py b/tc_analyzer_lib/metrics/heatmaps/heatmaps.py index 6d3cef7..0a5915a 100644 --- a/tc_analyzer_lib/metrics/heatmaps/heatmaps.py +++ b/tc_analyzer_lib/metrics/heatmaps/heatmaps.py @@ -70,24 +70,34 @@ def start(self, from_start: bool = False) -> list[dict]: # initialize the data array heatmaps_results = [] - iteration_count = self._compute_iteration_counts( - analytics_date=analytics_date, - resources_count=len(self.resources), - ) - cursor = self.utils.get_users(is_bot=True) bot_ids = list(map(lambda user: user["id"], cursor)) - index = 0 + # index = 0 while analytics_date.date() < datetime.now().date(): - for resource_id in self.resources: - # for more efficient retrieval - # we're always using the cursor and re-querying the db - - start_day = analytics_date.replace( - hour=0, minute=0, second=0, microsecond=0 + start_day = analytics_date.replace( + hour=0, minute=0, second=0, microsecond=0 + ) + end_day = start_day + timedelta(days=1) + + # getting the active resource_ids (activities being done there by users) + period_resources = self.utils.get_active_resources_period( + start_day=start_day, + end_day=end_day, + resource_identifier=self.analyzer_config.resource_identifier, + metadata_filter={ + f"metadata.{self.analyzer_config.resource_identifier}": { + "$in": self.resources, + } + }, + ) + if len(period_resources) == 0: + logging.warning( + "No users interacting on platform for date: " + f"{start_day.date()} - {end_day.date()}" ) - end_day = start_day + timedelta(days=1) + + for resource_idx, resource_id in enumerate(period_resources): user_ids = self.utils.get_active_users( start_day, end_day, @@ -99,15 +109,16 @@ def start(self, from_start: bool = False) -> list[dict]: if len(user_ids) == 0: logging.warning( f"{log_prefix} No users interacting for the time window: " - f"{start_day.date()} - {end_day.date()}" + f"{start_day.date()} - {end_day.date()} for resource: {resource_id}" " Skipping the day." ) - for idx, author_id in enumerate(user_ids): + for user_idx, author_id in enumerate(user_ids): logging.info( - f"{log_prefix} ANALYZING HEATMAPS {index}/{iteration_count} " - f"author index: {idx}/{len(user_ids)} | " - f"DAY: {start_day.date()} - {end_day.date()}" + f"{log_prefix} ANALYZING HEATMAPS {start_day.date()} - {end_day.date()} | " + # f"DAY {index}/{iteration_count} " + f"Author: {user_idx + 1}/{len(user_ids)} " + f"of resource: {resource_idx + 1}/{len(period_resources)}" ) if author_id in bot_ids: @@ -137,7 +148,7 @@ def start(self, from_start: bool = False) -> list[dict]: heatmaps_results.append(document) - index += 1 + # index += 1 # analyze next day analytics_date += timedelta(days=1) @@ -279,8 +290,7 @@ def _process_raw_analytics( def _compute_iteration_counts( self, analytics_date: datetime, - resources_count: int, ) -> int: - iteration_count = (datetime.now() - analytics_date).days * resources_count + iteration_count = (datetime.now() - analytics_date).days return iteration_count diff --git a/tc_analyzer_lib/metrics/heatmaps/heatmaps_utils.py b/tc_analyzer_lib/metrics/heatmaps/heatmaps_utils.py index e2576f9..873f3d4 100644 --- a/tc_analyzer_lib/metrics/heatmaps/heatmaps_utils.py +++ b/tc_analyzer_lib/metrics/heatmaps/heatmaps_utils.py @@ -88,6 +88,63 @@ def get_active_users( # making the values to be unique return list(set(combined_ids)) + def get_active_resources_period( + self, + start_day: datetime, + end_day: datetime, + resource_identifier: str, + metadata_filter: dict = {}, + ) -> list[str]: + """ + get the active resource ids for a specific period + + Parameters + ------------ + start_day : datetime + the time to filter the data from + end_day : datetime + the end day for filtering data from + resource_identifier : str + the resource identifier on database for a platform + i.e.: could be `channel_id` for discord + metadata_filter : dict + the additional filtering to be applied on data + default is no filtering which an empty dictionary will be passed + + Returns + --------- + resource_ids : list[str] + a list of user ids doing activity in that day + """ + pipeline = [ + { + "$match": { + "date": { + "$gte": start_day, + "$lt": end_day, + }, + **metadata_filter, + } + }, + { + "$group": { + "_id": None, + "unique_resource_ids": { + "$addToSet": f"$metadata.{resource_identifier}" + }, + } + }, + {"$project": {"_id": 0, "unique_resource_ids": 1}}, + ] + + results = self.database["rawmemberactivities"].aggregate(pipeline) + + unique_resource_ids = [] + for doc in results: + unique_resource_ids = doc.get("unique_resource_ids", []) + + return unique_resource_ids + def get_users_count(self, is_bot: bool = False) -> int: """ get the count of users diff --git a/tests/integration/test_heatmaps_utils_active_resources.py b/tests/integration/test_heatmaps_utils_active_resources.py new file mode 100644 index 0000000..c8a8a55 --- /dev/null +++ b/tests/integration/test_heatmaps_utils_active_resources.py @@ -0,0 +1,192 @@ +from datetime import datetime +from unittest import TestCase + +from tc_analyzer_lib.metrics.heatmaps.heatmaps_utils import HeatmapsUtils +from tc_analyzer_lib.utils.mongo import MongoSingleton + + +class TestHeatmapsUtilsActiveResources(TestCase): + def setUp(self) -> None: + client = MongoSingleton.get_instance().get_client() + self.platform_id = "1234567890" + self.database = client[self.platform_id] + self.database.drop_collection("rawmemberactivities") + + self.utils = HeatmapsUtils(self.platform_id) + + def test_get_users_empty_collection(self): + start_day = datetime(2024, 1, 1) + end_day = datetime(2024, 1, 2) + users = self.utils.get_active_resources_period( + start_day, + end_day, + resource_identifier="channel_id", + ) + self.assertEqual(list(users), []) + + def test_get_multiple_users(self): + start_day = datetime(2024, 1, 1) + end_day = datetime(2024, 1, 2) + samples = [ + { + "actions": [{"name": "message", "type": "emitter"}], + "author_id": "user1", + "date": datetime(2024, 1, 1, 1), + "interactions": [ + { + "name": "reply", + "type": "emitter", + "users_engaged_id": ["user2"], + } + ], + "metadata": { + "bot_activity": False, + "channel_id": "11111", + "thread_id": None, + }, + "source_id": "11188143219343360", + }, + { + "actions": [], + "author_id": "user2", + "date": datetime(2024, 1, 1, 5), + "interactions": [ + { + "name": "reply", + "type": "receiver", + "users_engaged_id": ["user4", "user5"], + } + ], + "metadata": { + "bot_activity": False, + "channel_id": "22222", + "thread_id": None, + }, + "source_id": "11188143219343361", + }, + { + "actions": [], + "author_id": "user2", + "date": datetime(2024, 1, 1, 5), + "interactions": [ + { + "name": "reply", + "type": "receiver", + "users_engaged_id": ["user4", "user5"], + } + ], + "metadata": { + "bot_activity": True, + "channel_id": "44444", + "thread_id": None, + }, + "source_id": "11188143219343361", + }, + { + "actions": [], + "author_id": "user3", + "date": datetime(2024, 1, 2), + "interactions": [ + {"name": "reply", "type": "receiver", "users_engaged_id": ["user6"]} + ], + "metadata": { + "bot_activity": False, + "channel_id": "33333", + "thread_id": None, + }, + "source_id": "11188143219343361", + }, + ] + self.database["rawmemberactivities"].insert_many(samples) + + users = self.utils.get_active_resources_period( + start_day, + end_day, + resource_identifier="channel_id", + ) + + self.assertEqual(set(users), set(["11111", "22222", "44444"])) + + def test_get_multiple_users_with_metadata_filter(self): + start_day = datetime(2024, 1, 1) + end_day = datetime(2024, 1, 2) + samples = [ + { + "actions": [{"name": "message", "type": "emitter"}], + "author_id": "user1", + "date": datetime(2024, 1, 1, 1), + "interactions": [ + { + "name": "reply", + "type": "emitter", + "users_engaged_id": ["user2"], + } + ], + "metadata": { + "bot_activity": False, + "channel_id": "11111", + "thread_id": None, + }, + "source_id": "11188143219343360", + }, + { + "actions": [], + "author_id": "user2", + "date": datetime(2024, 1, 1, 5), + "interactions": [ + { + "name": "reply", + "type": "receiver", + "users_engaged_id": ["user4", "user5"], + } + ], + "metadata": { + "bot_activity": False, + "channel_id": "22222", + "thread_id": None, + }, + "source_id": "11188143219343361", + }, + { + "actions": [], + "author_id": "user2", + "date": datetime(2024, 1, 1, 5), + "interactions": [ + { + "name": "reply", + "type": "receiver", + "users_engaged_id": ["user4", "user5"], + } + ], + "metadata": { + "bot_activity": True, + "channel_id": "44444", + "thread_id": None, + }, + "source_id": "11188143219343361", + }, + { + "actions": [], + "author_id": "user3", + "date": datetime(2024, 1, 2), + "interactions": [ + {"name": "reply", "type": "receiver", "users_engaged_id": ["user6"]} + ], + "metadata": { + "bot_activity": False, + "channel_id": "33333", + "thread_id": None, + }, + "source_id": "11188143219343361", + }, + ] + self.database["rawmemberactivities"].insert_many(samples) + + users = self.utils.get_active_resources_period( + start_day, + end_day, + resource_identifier="channel_id", + metadata_filter={"metadata.channel_id": {"$in": ["22222"]}}, + ) + + self.assertEqual(set(users), set(["22222"])) diff --git a/tests/unit/test_analyzer_heatmaps_compute_iteration_counts.py b/tests/unit/test_analyzer_heatmaps_compute_iteration_counts.py index 5d187e8..737c33c 100644 --- a/tests/unit/test_analyzer_heatmaps_compute_iteration_counts.py +++ b/tests/unit/test_analyzer_heatmaps_compute_iteration_counts.py @@ -29,7 +29,6 @@ def test_compute_iteration_counts(self): iteration_count = self.heatmaps._compute_iteration_counts( analytics_date=analytics_date, - resources_count=5, ) - self.assertEqual(iteration_count, days * 5) # five days + self.assertEqual(iteration_count, days) From 40974a57cdd45807dca77cb7e6c3c842db58749e Mon Sep 17 00:00:00 2001 From: Mohammad Amin Date: Tue, 16 Jul 2024 13:12:13 +0330 Subject: [PATCH 4/7] feat: bump lib version! --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index aba9f75..1123068 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ setup( name="tc-analyzer-lib", - version="1.2.0", + version="1.2.1", author="Mohammad Amin Dadgar, TogetherCrew", maintainer="Mohammad Amin Dadgar", maintainer_email="dadgaramin96@gmail.com", From 10e61b858a60e319c7ce8ba2205f758c1043bbdc Mon Sep 17 00:00:00 2001 From: Mohammad Amin Date: Tue, 16 Jul 2024 13:19:03 +0330 Subject: [PATCH 5/7] fix: test cases, adding missing field to raw data! the missing `bot_activity` field for metadata of mock data. --- ...test_heatmaps_process_hourly_single_day.py | 66 +++++++++++++++---- .../test_heatmaps_process_raw_analytics.py | 48 +++++++++++--- tests/integration/test_heatmaps_replier.py | 1 + 3 files changed, 96 insertions(+), 19 deletions(-) diff --git a/tests/integration/test_heatmaps_process_hourly_single_day.py b/tests/integration/test_heatmaps_process_hourly_single_day.py index 2a47834..3863aa9 100644 --- a/tests/integration/test_heatmaps_process_hourly_single_day.py +++ b/tests/integration/test_heatmaps_process_hourly_single_day.py @@ -77,7 +77,11 @@ def test_process_hourly_single_author(self): "author_id": 9001, "date": datetime(2023, 1, 1, 2), "source_id": "10000", - "metadata": {"thread_id": "7000", "channel_id": "124"}, + "metadata": { + "thread_id": "7000", + "channel_id": "124", + "bot_activity": False, + }, "actions": [{"name": "message", "type": "emitter"}], "interactions": [ { @@ -93,7 +97,11 @@ def test_process_hourly_single_author(self): "author_id": 9001, "date": day, "source_id": "10001", - "metadata": {"thread_id": "7000", "channel_id": "124"}, + "metadata": { + "thread_id": "7000", + "channel_id": "124", + "bot_activity": False, + }, "actions": [{"name": "message", "type": "emitter"}], "interactions": [ { @@ -107,7 +115,11 @@ def test_process_hourly_single_author(self): "author_id": 9001, "date": datetime(2023, 1, 1, 2), "source_id": "10000", - "metadata": {"thread_id": "7000", "channel_id": "124"}, + "metadata": { + "thread_id": "7000", + "channel_id": "124", + "bot_activity": False, + }, "actions": [{"name": "message", "type": "emitter"}], "interactions": [ { @@ -123,7 +135,11 @@ def test_process_hourly_single_author(self): "author_id": 9001, "date": datetime(2023, 1, 1, 4), "source_id": "10001", - "metadata": {"thread_id": None, "channel_id": "124"}, + "metadata": { + "thread_id": None, + "channel_id": "124", + "bot_activity": False, + }, "actions": [{"name": "message", "type": "emitter"}], "interactions": [ { @@ -176,7 +192,11 @@ def test_process_hourly_wrong_channel(self): "author_id": 9001, "date": datetime(2023, 1, 1, 2), "source_id": "10000", - "metadata": {"thread_id": "7000", "channel_id": "124"}, + "metadata": { + "thread_id": "7000", + "channel_id": "124", + "bot_activity": False, + }, "actions": [{"name": "message", "type": "emitter"}], "interactions": [ { @@ -192,7 +212,11 @@ def test_process_hourly_wrong_channel(self): "author_id": 9001, "date": day, "source_id": "10001", - "metadata": {"thread_id": "7000", "channel_id": "124"}, + "metadata": { + "thread_id": "7000", + "channel_id": "124", + "bot_activity": False, + }, "actions": [{"name": "message", "type": "emitter"}], "interactions": [ { @@ -206,7 +230,11 @@ def test_process_hourly_wrong_channel(self): "author_id": 9001, "date": datetime(2023, 1, 1, 2), "source_id": "10000", - "metadata": {"thread_id": "7000", "channel_id": "124"}, + "metadata": { + "thread_id": "7000", + "channel_id": "124", + "bot_activity": False, + }, "actions": [{"name": "message", "type": "emitter"}], "interactions": [ { @@ -222,7 +250,11 @@ def test_process_hourly_wrong_channel(self): "author_id": 9001, "date": datetime(2023, 1, 1, 4), "source_id": "10001", - "metadata": {"thread_id": None, "channel_id": "124"}, + "metadata": { + "thread_id": None, + "channel_id": "124", + "bot_activity": False, + }, "actions": [{"name": "message", "type": "emitter"}], "interactions": [ { @@ -264,7 +296,11 @@ def test_process_hourly_wrong_author(self): "author_id": 9001, "date": datetime(2023, 1, 1, 2), "source_id": "10000", - "metadata": {"thread_id": "7000", "channel_id": "124"}, + "metadata": { + "thread_id": "7000", + "channel_id": "124", + "bot_activity": False, + }, "actions": [{"name": "message", "type": "emitter"}], "interactions": [ { @@ -280,7 +316,11 @@ def test_process_hourly_wrong_author(self): "author_id": 9001, "date": day, "source_id": "10001", - "metadata": {"thread_id": "7000", "channel_id": "124"}, + "metadata": { + "thread_id": "7000", + "channel_id": "124", + "bot_activity": False, + }, "actions": [{"name": "message", "type": "emitter"}], "interactions": [ { @@ -294,7 +334,11 @@ def test_process_hourly_wrong_author(self): "author_id": 9001, "date": datetime(2023, 1, 1, 2), "source_id": "10000", - "metadata": {"thread_id": "7000", "channel_id": "124"}, + "metadata": { + "thread_id": "7000", + "channel_id": "124", + "bot_activity": False, + }, "actions": [{"name": "message", "type": "emitter"}], "interactions": [ { diff --git a/tests/integration/test_heatmaps_process_raw_analytics.py b/tests/integration/test_heatmaps_process_raw_analytics.py index 18eeb69..630ac7a 100644 --- a/tests/integration/test_heatmaps_process_raw_analytics.py +++ b/tests/integration/test_heatmaps_process_raw_analytics.py @@ -57,7 +57,11 @@ def test_single_author(self): "author_id": 9001, "date": datetime(2023, 1, 1, 2), "source_id": "10000", - "metadata": {"thread_id": "7000", "channel_id": "124"}, + "metadata": { + "thread_id": "7000", + "channel_id": "124", + "bot_activity": False, + }, "actions": [{"name": "message", "type": "emitter"}], "interactions": [ { @@ -73,7 +77,11 @@ def test_single_author(self): "author_id": 9001, "date": day, "source_id": "10001", - "metadata": {"thread_id": "7000", "channel_id": "124"}, + "metadata": { + "thread_id": "7000", + "channel_id": "124", + "bot_activity": False, + }, "actions": [{"name": "message", "type": "emitter"}], "interactions": [ { @@ -87,7 +95,11 @@ def test_single_author(self): "author_id": 9001, "date": datetime(2023, 1, 1, 2), "source_id": "10000", - "metadata": {"thread_id": "7000", "channel_id": "124"}, + "metadata": { + "thread_id": "7000", + "channel_id": "124", + "bot_activity": False, + }, "actions": [{"name": "message", "type": "emitter"}], "interactions": [ { @@ -103,7 +115,11 @@ def test_single_author(self): "author_id": 9001, "date": datetime(2023, 1, 1, 4), "source_id": "10001", - "metadata": {"thread_id": None, "channel_id": "124"}, + "metadata": { + "thread_id": None, + "channel_id": "124", + "bot_activity": False, + }, "actions": [{"name": "message", "type": "emitter"}], "interactions": [ { @@ -149,7 +165,11 @@ def test_multiple_authors(self): "author_id": 9001, "date": datetime(2023, 1, 1, 2), "source_id": "10000", - "metadata": {"thread_id": "7000", "channel_id": "124"}, + "metadata": { + "thread_id": "7000", + "channel_id": "124", + "bot_activity": False, + }, "actions": [{"name": "message", "type": "emitter"}], "interactions": [ { @@ -165,7 +185,11 @@ def test_multiple_authors(self): "author_id": 9001, "date": day, "source_id": "10001", - "metadata": {"thread_id": "7000", "channel_id": "124"}, + "metadata": { + "thread_id": "7000", + "channel_id": "124", + "bot_activity": False, + }, "actions": [{"name": "message", "type": "emitter"}], "interactions": [ { @@ -184,7 +208,11 @@ def test_multiple_authors(self): "author_id": 9002, "date": datetime(2023, 1, 1, 2), "source_id": "10000", - "metadata": {"thread_id": "7000", "channel_id": "124"}, + "metadata": { + "thread_id": "7000", + "channel_id": "124", + "bot_activity": False, + }, "actions": [{"name": "message", "type": "emitter"}], "interactions": [ { @@ -203,7 +231,11 @@ def test_multiple_authors(self): "author_id": 9001, "date": datetime(2023, 1, 1, 4), "source_id": "10001", - "metadata": {"thread_id": None, "channel_id": "124"}, + "metadata": { + "thread_id": None, + "channel_id": "124", + "bot_activity": False, + }, "actions": [{"name": "message", "type": "emitter"}], "interactions": [ { diff --git a/tests/integration/test_heatmaps_replier.py b/tests/integration/test_heatmaps_replier.py index cecebd8..bcf5a29 100644 --- a/tests/integration/test_heatmaps_replier.py +++ b/tests/integration/test_heatmaps_replier.py @@ -61,6 +61,7 @@ def test_reply_messages(): "metadata": { "channel_id": chId, "thread_id": None, + "bot_activity": False, }, }, { From ad4f5d98d0da945cb713960ce2bba043a533d3ba Mon Sep 17 00:00:00 2001 From: Mohammad Amin Date: Tue, 16 Jul 2024 13:25:35 +0330 Subject: [PATCH 6/7] fix: codeRabbitAI suggestion! changing for an input variable value so the users won't confuse it. --- .../metrics/heatmaps/heatmaps_utils.py | 21 +++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/tc_analyzer_lib/metrics/heatmaps/heatmaps_utils.py b/tc_analyzer_lib/metrics/heatmaps/heatmaps_utils.py index 873f3d4..ce74311 100644 --- a/tc_analyzer_lib/metrics/heatmaps/heatmaps_utils.py +++ b/tc_analyzer_lib/metrics/heatmaps/heatmaps_utils.py @@ -33,7 +33,10 @@ def get_users(self, is_bot: bool = False) -> Cursor: return cursor def get_active_users( - self, start_day: datetime, end_day: datetime, metadata_filter: dict = {} + self, + start_day: datetime, + end_day: datetime, + metadata_filter: dict | None = None, ) -> list[str]: """ get the users doing activities for a specific period @@ -44,15 +47,18 @@ def get_active_users( the time to filter the data from end_day : datetime the end day for filtering data from - metadata_filter : dict + metadata_filter : dict | None the additional filtering to be applied on data - default is no filtering which an empty dictionary will be passed + default is `None` which means no filtering Returns --------- users : list[str] a list of user ids doing activity in that day """ + if metadata_filter is None: + metadata_filter = {} + cursor = self.database["rawmemberactivities"].aggregate( [ { @@ -93,7 +99,7 @@ def get_active_resources_period( start_day: datetime, end_day: datetime, resource_identifier: str, - metadata_filter: dict = {}, + metadata_filter: dict | None = None, ) -> list[str]: """ get the active resource ids for a specific period @@ -107,15 +113,18 @@ def get_active_resources_period( resource_identifier : str the resource identifier on database for a platform i.e.: could be `channel_id` for discord - metadata_filter : dict + metadata_filter : dict | None the additional filtering to be applied on data - default is no filtering which an empty dictionary will be passed + default is `None` which means no filtering Returns --------- resource_ids : list[str] a list of user ids doing activity in that day """ + if metadata_filter is None: + metadata_filter = {} + pipeline = [ { "$match": { From 983954f88123c553aec08fee1adaf1f9842158a8 Mon Sep 17 00:00:00 2001 From: Mohammad Amin Date: Tue, 16 Jul 2024 14:00:54 +0330 Subject: [PATCH 7/7] fix: there was not enough data that errors was raising! - Added more mock data for past. --- .../test_analyzer_period_week_run_once_empty_analytics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_analyzer_period_week_run_once_empty_analytics.py b/tests/integration/test_analyzer_period_week_run_once_empty_analytics.py index a3f3c30..a63b9b9 100644 --- a/tests/integration/test_analyzer_period_week_run_once_empty_analytics.py +++ b/tests/integration/test_analyzer_period_week_run_once_empty_analytics.py @@ -32,7 +32,7 @@ def test_analyzer_week_period_run_once_empty_analytics(): rawinfo_samples = [] # generating random rawinfo data - for i in range(155): + for i in range(160): author = np.random.choice(acc_id) replied_user = np.random.choice(acc_id) # not producing any self-interactions