From 821a9d1c3580fa37a4fc616359bc61420e1f4f44 Mon Sep 17 00:00:00 2001
From: Mohammad Amin <dadgaramin96@gmail.com>
Date: Tue, 16 Jul 2024 12:26:51 +0330
Subject: [PATCH 1/7] fix: more efficient querying the database!

---
 .../metrics/heatmaps/analytics_hourly.py      | 23 +++++++++++--------
 .../metrics/heatmaps/analytics_raw.py         |  8 ++-----
 tc_analyzer_lib/metrics/heatmaps/heatmaps.py  |  7 ++++--
 3 files changed, 21 insertions(+), 17 deletions(-)

diff --git a/tc_analyzer_lib/metrics/heatmaps/analytics_hourly.py b/tc_analyzer_lib/metrics/heatmaps/analytics_hourly.py
index cb848f2..a8f07ad 100644
--- a/tc_analyzer_lib/metrics/heatmaps/analytics_hourly.py
+++ b/tc_analyzer_lib/metrics/heatmaps/analytics_hourly.py
@@ -39,11 +39,10 @@ def analyze(
         activity_direction : str
             should be always either `emitter` or `receiver`
         **kwargs :
-            additional_filters : dict[str, str]
-                the additional filtering for `rawmemberactivities` data of each platform
-                the keys could be `metadata.channel_id` with a specific value
+            resource_filtering : dict[str, str]
+                a filtering applied for resources on data
         """
-        additional_filters: dict[str, str] = kwargs.get("additional_filters", {})
+        resource_filtering: dict[str, str] = kwargs.get("resource_filtering", {})
 
         if activity_direction not in ["emitter", "receiver"]:
             raise AttributeError(
@@ -64,8 +63,8 @@ def analyze(
             filters={
                 f"{activity}.name": activity_name,
                 f"{activity}.type": activity_direction,
-                **additional_filters,
             },
+            resource_filters=resource_filtering,
         )
 
         return activity_vector
@@ -76,6 +75,7 @@ def get_hourly_analytics(
         activity: str,
         author_id: str | int,
         filters: dict[str, dict[str, Any] | str] | None = None,
+        resource_filters: dict[str, str] | None = None,
     ) -> list[int]:
         """
         Gets the list of documents for the stated day
@@ -87,12 +87,12 @@ def get_hourly_analytics(
         activity : str
             to be `interactions` or `actions`
         filter : dict[str, dict[str] | str] | None
-            the filtering that we need to apply
+            the filtering that we need to apply on actions or interactions
             for default it is an None meaning
             no filtering would be applied
-        msg : str
-            additional information to be logged
-            for default is empty string meaning no additional string to log
+        resource_filtering : dict[str, str] | None
+            the filtering on resources of data
+            could make the query more efficient if provided
 
         Returns
         ---------
@@ -103,12 +103,17 @@ def get_hourly_analytics(
         start_day = datetime.combine(day, time(0, 0, 0))
         end_day = start_day + timedelta(days=1)
 
+        # if no filter for resources then
+        if resource_filters is None:
+            resource_filters = {}
+
         pipeline = [
             # the day for analytics
             {
                 "$match": {
                     "date": {"$gte": start_day, "$lt": end_day},
                     "author_id": author_id,
+                    **resource_filters,
                 }
             },
             # Unwind the activity array
diff --git a/tc_analyzer_lib/metrics/heatmaps/analytics_raw.py b/tc_analyzer_lib/metrics/heatmaps/analytics_raw.py
index 9f7f1da..76ebab6 100644
--- a/tc_analyzer_lib/metrics/heatmaps/analytics_raw.py
+++ b/tc_analyzer_lib/metrics/heatmaps/analytics_raw.py
@@ -114,19 +114,15 @@ def get_analytics_count(
             raw analytics item which holds the user and
             the count of interaction in that day
         """
-        filters: dict[str, dict[str, Any] | str] | None = kwargs.get("filters")
+        filters: dict[str, dict[str, Any] | str] = kwargs.get("filters", {})
         start_day = datetime.combine(day, time(0, 0, 0))
         end_day = start_day + timedelta(days=1)
 
         match_filters = {
             "date": {"$gte": start_day, "$lt": end_day},
             "author_id": author_id,
+            **filters,
         }
-        if filters is not None:
-            match_filters = {
-                **match_filters,
-                **filters,
-            }
 
         pipeline = [
             {
diff --git a/tc_analyzer_lib/metrics/heatmaps/heatmaps.py b/tc_analyzer_lib/metrics/heatmaps/heatmaps.py
index a0e3f82..6d3cef7 100644
--- a/tc_analyzer_lib/metrics/heatmaps/heatmaps.py
+++ b/tc_analyzer_lib/metrics/heatmaps/heatmaps.py
@@ -188,8 +188,9 @@ def _process_hourly_analytics(
                     activity_name=activity_name,
                     activity_direction=config.direction.value,
                     author_id=author_id,
-                    additional_filters={
+                    resource_filtering={
                         f"metadata.{self.analyzer_config.resource_identifier}": resource,
+                        "metadata.bot_activity": False,
                     },
                 )
                 analytics[config.name] = analytics_vector
@@ -213,8 +214,9 @@ def _process_hourly_analytics(
                     activity_name=activity_name,
                     activity_direction=config.direction.value,
                     author_id=author_id,
-                    additional_filters={
+                    resource_filtering={
                         f"metadata.{self.analyzer_config.resource_identifier}": resource,
+                        "metadata.bot_activity": False,
                         **conditions,
                     },
                 )
@@ -250,6 +252,7 @@ def _process_raw_analytics(
 
             additional_filters: dict[str, str] = {
                 f"metadata.{self.analyzer_config.resource_identifier}": resource,
+                "metadata.bot_activity": False,
             }
             # preparing for custom analytics (if available in config)
             if config.rawmemberactivities_condition is not None:

From cfaefc3bf79d21fecb406df55475b3d669f6d48f Mon Sep 17 00:00:00 2001
From: Mohammad Amin <dadgaramin96@gmail.com>
Date: Tue, 16 Jul 2024 12:39:30 +0330
Subject: [PATCH 2/7] fix: remove unused codes!

---
 .../metrics/heatmaps/heatmaps_utils.py        | 21 -------------------
 1 file changed, 21 deletions(-)

diff --git a/tc_analyzer_lib/metrics/heatmaps/heatmaps_utils.py b/tc_analyzer_lib/metrics/heatmaps/heatmaps_utils.py
index 8c28a93..e2576f9 100644
--- a/tc_analyzer_lib/metrics/heatmaps/heatmaps_utils.py
+++ b/tc_analyzer_lib/metrics/heatmaps/heatmaps_utils.py
@@ -53,27 +53,6 @@ def get_active_users(
         users : list[str]
             a list of user ids doing activity in that day
         """
-        # cursor = self.database["rawmemberactivities"].aggregate(
-        #     [
-        #         {"$match": {"date": {"$gte": start_day, "$lt": end_day}}},
-        #         {"$unwind": "$interactions"},
-        #         {"$unwind": "$interactions.users_engaged_id"},
-        #         {
-        #             "$group": {
-        #                 "_id": None,
-        #                 "all_ids": {"$addToSet": "$interactions.users_engaged_id"},
-        #                 "author_ids": {"$addToSet": "$author_id"},
-        #             }
-        #         },
-        #         {
-        #             "$project": {
-        #                 "_id": 0,
-        #                 "combined_ids": {"$setUnion": ["$all_ids", "$author_ids"]},
-        #             }
-        #         },
-        #     ]
-        # )
-
         cursor = self.database["rawmemberactivities"].aggregate(
             [
                 {

From c54a0fd63691275f0514d3fcbf00f373deec26b5 Mon Sep 17 00:00:00 2001
From: Mohammad Amin <dadgaramin96@gmail.com>
Date: Tue, 16 Jul 2024 13:08:34 +0330
Subject: [PATCH 3/7] fix: more improvements on heatmaps computation! now we're
 also processing the actual channels that are active for each day rather than
 processing for all selected channels.

---
 tc_analyzer_lib/metrics/heatmaps/heatmaps.py  |  52 +++--
 .../metrics/heatmaps/heatmaps_utils.py        |  57 ++++++
 .../test_heatmaps_utils_active_resources.py   | 192 ++++++++++++++++++
 ...lyzer_heatmaps_compute_iteration_counts.py |   3 +-
 4 files changed, 281 insertions(+), 23 deletions(-)
 create mode 100644 tests/integration/test_heatmaps_utils_active_resources.py

diff --git a/tc_analyzer_lib/metrics/heatmaps/heatmaps.py b/tc_analyzer_lib/metrics/heatmaps/heatmaps.py
index 6d3cef7..0a5915a 100644
--- a/tc_analyzer_lib/metrics/heatmaps/heatmaps.py
+++ b/tc_analyzer_lib/metrics/heatmaps/heatmaps.py
@@ -70,24 +70,34 @@ def start(self, from_start: bool = False) -> list[dict]:
         # initialize the data array
         heatmaps_results = []
 
-        iteration_count = self._compute_iteration_counts(
-            analytics_date=analytics_date,
-            resources_count=len(self.resources),
-        )
-
         cursor = self.utils.get_users(is_bot=True)
         bot_ids = list(map(lambda user: user["id"], cursor))
 
-        index = 0
+        # index = 0
         while analytics_date.date() < datetime.now().date():
-            for resource_id in self.resources:
-                # for more efficient retrieval
-                # we're always using the cursor and re-querying the db
-
-                start_day = analytics_date.replace(
-                    hour=0, minute=0, second=0, microsecond=0
+            start_day = analytics_date.replace(
+                hour=0, minute=0, second=0, microsecond=0
+            )
+            end_day = start_day + timedelta(days=1)
+
+            # getting the active resource_ids (activities being done there by users)
+            period_resources = self.utils.get_active_resources_period(
+                start_day=start_day,
+                end_day=end_day,
+                resource_identifier=self.analyzer_config.resource_identifier,
+                metadata_filter={
+                    f"metadata.{self.analyzer_config.resource_identifier}": {
+                        "$in": self.resources,
+                    }
+                },
+            )
+            if len(period_resources) == 0:
+                logging.warning(
+                    "No users interacting on platform for date: "
+                    f"{start_day.date()} - {end_day.date()}"
                 )
-                end_day = start_day + timedelta(days=1)
+
+            for resource_idx, resource_id in enumerate(period_resources):
                 user_ids = self.utils.get_active_users(
                     start_day,
                     end_day,
@@ -99,15 +109,16 @@ def start(self, from_start: bool = False) -> list[dict]:
                 if len(user_ids) == 0:
                     logging.warning(
                         f"{log_prefix} No users interacting for the time window: "
-                        f"{start_day.date()} - {end_day.date()}"
+                        f"{start_day.date()} - {end_day.date()} for resource: {resource_id}"
                         " Skipping the day."
                     )
 
-                for idx, author_id in enumerate(user_ids):
+                for user_idx, author_id in enumerate(user_ids):
                     logging.info(
-                        f"{log_prefix} ANALYZING HEATMAPS {index}/{iteration_count} "
-                        f"author index: {idx}/{len(user_ids)} | "
-                        f"DAY: {start_day.date()} - {end_day.date()}"
+                        f"{log_prefix} ANALYZING HEATMAPS {start_day.date()} - {end_day.date()} | "
+                        # f"DAY {index}/{iteration_count} "
+                        f"Author: {user_idx + 1}/{len(user_ids)} "
+                        f"of resource: {resource_idx + 1}/{len(period_resources)}"
                     )
 
                     if author_id in bot_ids:
@@ -137,7 +148,7 @@ def start(self, from_start: bool = False) -> list[dict]:
 
                     heatmaps_results.append(document)
 
-                index += 1
+                # index += 1
 
             # analyze next day
             analytics_date += timedelta(days=1)
@@ -279,8 +290,7 @@ def _process_raw_analytics(
     def _compute_iteration_counts(
         self,
         analytics_date: datetime,
-        resources_count: int,
     ) -> int:
-        iteration_count = (datetime.now() - analytics_date).days * resources_count
+        iteration_count = (datetime.now() - analytics_date).days
 
         return iteration_count
diff --git a/tc_analyzer_lib/metrics/heatmaps/heatmaps_utils.py b/tc_analyzer_lib/metrics/heatmaps/heatmaps_utils.py
index e2576f9..873f3d4 100644
--- a/tc_analyzer_lib/metrics/heatmaps/heatmaps_utils.py
+++ b/tc_analyzer_lib/metrics/heatmaps/heatmaps_utils.py
@@ -88,6 +88,63 @@ def get_active_users(
         # making the values to be unique
         return list(set(combined_ids))
 
+    def get_active_resources_period(
+        self,
+        start_day: datetime,
+        end_day: datetime,
+        resource_identifier: str,
+        metadata_filter: dict = {},
+    ) -> list[str]:
+        """
+        get the active resource ids for a specific period
+
+        Parameters
+        ------------
+        start_day : datetime
+            the time to filter the data from
+        end_day : datetime
+            the end day for filtering data from
+        resource_identifier : str
+            the resource identifier on database for a platform
+            i.e.: could be `channel_id` for discord
+        metadata_filter : dict
+            the additional filtering to be applied on data
+            default is no filtering which an empty dictionary will be passed
+
+        Returns
+        ---------
+        resource_ids : list[str]
+            a list of user ids doing activity in that day
+        """
+        pipeline = [
+            {
+                "$match": {
+                    "date": {
+                        "$gte": start_day,
+                        "$lt": end_day,
+                    },
+                    **metadata_filter,
+                }
+            },
+            {
+                "$group": {
+                    "_id": None,
+                    "unique_resource_ids": {
+                        "$addToSet": f"$metadata.{resource_identifier}"
+                    },
+                }
+            },
+            {"$project": {"_id": 0, "unique_resource_ids": 1}},
+        ]
+
+        results = self.database["rawmemberactivities"].aggregate(pipeline)
+
+        unique_resource_ids = []
+        for doc in results:
+            unique_resource_ids = doc.get("unique_resource_ids", [])
+
+        return unique_resource_ids
+
     def get_users_count(self, is_bot: bool = False) -> int:
         """
         get the count of users
diff --git a/tests/integration/test_heatmaps_utils_active_resources.py b/tests/integration/test_heatmaps_utils_active_resources.py
new file mode 100644
index 0000000..c8a8a55
--- /dev/null
+++ b/tests/integration/test_heatmaps_utils_active_resources.py
@@ -0,0 +1,192 @@
+from datetime import datetime
+from unittest import TestCase
+
+from tc_analyzer_lib.metrics.heatmaps.heatmaps_utils import HeatmapsUtils
+from tc_analyzer_lib.utils.mongo import MongoSingleton
+
+
+class TestHeatmapsUtilsActiveResources(TestCase):
+    def setUp(self) -> None:
+        client = MongoSingleton.get_instance().get_client()
+        self.platform_id = "1234567890"
+        self.database = client[self.platform_id]
+        self.database.drop_collection("rawmemberactivities")
+
+        self.utils = HeatmapsUtils(self.platform_id)
+
+    def test_get_users_empty_collection(self):
+        start_day = datetime(2024, 1, 1)
+        end_day = datetime(2024, 1, 2)
+        users = self.utils.get_active_resources_period(
+            start_day,
+            end_day,
+            resource_identifier="channel_id",
+        )
+        self.assertEqual(list(users), [])
+
+    def test_get_multiple_users(self):
+        start_day = datetime(2024, 1, 1)
+        end_day = datetime(2024, 1, 2)
+        samples = [
+            {
+                "actions": [{"name": "message", "type": "emitter"}],
+                "author_id": "user1",
+                "date": datetime(2024, 1, 1, 1),
+                "interactions": [
+                    {
+                        "name": "reply",
+                        "type": "emitter",
+                        "users_engaged_id": ["user2"],
+                    }
+                ],
+                "metadata": {
+                    "bot_activity": False,
+                    "channel_id": "11111",
+                    "thread_id": None,
+                },
+                "source_id": "11188143219343360",
+            },
+            {
+                "actions": [],
+                "author_id": "user2",
+                "date": datetime(2024, 1, 1, 5),
+                "interactions": [
+                    {
+                        "name": "reply",
+                        "type": "receiver",
+                        "users_engaged_id": ["user4", "user5"],
+                    }
+                ],
+                "metadata": {
+                    "bot_activity": False,
+                    "channel_id": "22222",
+                    "thread_id": None,
+                },
+                "source_id": "11188143219343361",
+            },
+            {
+                "actions": [],
+                "author_id": "user2",
+                "date": datetime(2024, 1, 1, 5),
+                "interactions": [
+                    {
+                        "name": "reply",
+                        "type": "receiver",
+                        "users_engaged_id": ["user4", "user5"],
+                    }
+                ],
+                "metadata": {
+                    "bot_activity": True,
+                    "channel_id": "44444",
+                    "thread_id": None,
+                },
+                "source_id": "11188143219343361",
+            },
+            {
+                "actions": [],
+                "author_id": "user3",
+                "date": datetime(2024, 1, 2),
+                "interactions": [
+                    {"name": "reply", "type": "receiver", "users_engaged_id": ["user6"]}
+                ],
+                "metadata": {
+                    "bot_activity": False,
+                    "channel_id": "33333",
+                    "thread_id": None,
+                },
+                "source_id": "11188143219343361",
+            },
+        ]
+        self.database["rawmemberactivities"].insert_many(samples)
+
+        users = self.utils.get_active_resources_period(
+            start_day,
+            end_day,
+            resource_identifier="channel_id",
+        )
+
+        self.assertEqual(set(users), set(["11111", "22222", "44444"]))
+
+    def test_get_multiple_users_with_metadata_filter(self):
+        start_day = datetime(2024, 1, 1)
+        end_day = datetime(2024, 1, 2)
+        samples = [
+            {
+                "actions": [{"name": "message", "type": "emitter"}],
+                "author_id": "user1",
+                "date": datetime(2024, 1, 1, 1),
+                "interactions": [
+                    {
+                        "name": "reply",
+                        "type": "emitter",
+                        "users_engaged_id": ["user2"],
+                    }
+                ],
+                "metadata": {
+                    "bot_activity": False,
+                    "channel_id": "11111",
+                    "thread_id": None,
+                },
+                "source_id": "11188143219343360",
+            },
+            {
+                "actions": [],
+                "author_id": "user2",
+                "date": datetime(2024, 1, 1, 5),
+                "interactions": [
+                    {
+                        "name": "reply",
+                        "type": "receiver",
+                        "users_engaged_id": ["user4", "user5"],
+                    }
+                ],
+                "metadata": {
+                    "bot_activity": False,
+                    "channel_id": "22222",
+                    "thread_id": None,
+                },
+                "source_id": "11188143219343361",
+            },
+            {
+                "actions": [],
+                "author_id": "user2",
+                "date": datetime(2024, 1, 1, 5),
+                "interactions": [
+                    {
+                        "name": "reply",
+                        "type": "receiver",
+                        "users_engaged_id": ["user4", "user5"],
+                    }
+                ],
+                "metadata": {
+                    "bot_activity": True,
+                    "channel_id": "44444",
+                    "thread_id": None,
+                },
+                "source_id": "11188143219343361",
+            },
+            {
+                "actions": [],
+                "author_id": "user3",
+                "date": datetime(2024, 1, 2),
+                "interactions": [
+                    {"name": "reply", "type": "receiver", "users_engaged_id": ["user6"]}
+                ],
+                "metadata": {
+                    "bot_activity": False,
+                    "channel_id": "33333",
+                    "thread_id": None,
+                },
+                "source_id": "11188143219343361",
+            },
+        ]
+        self.database["rawmemberactivities"].insert_many(samples)
+
+        users = self.utils.get_active_resources_period(
+            start_day,
+            end_day,
+            resource_identifier="channel_id",
+            metadata_filter={"metadata.channel_id": {"$in": ["22222"]}},
+        )
+
+        self.assertEqual(set(users), set(["22222"]))
diff --git a/tests/unit/test_analyzer_heatmaps_compute_iteration_counts.py b/tests/unit/test_analyzer_heatmaps_compute_iteration_counts.py
index 5d187e8..737c33c 100644
--- a/tests/unit/test_analyzer_heatmaps_compute_iteration_counts.py
+++ b/tests/unit/test_analyzer_heatmaps_compute_iteration_counts.py
@@ -29,7 +29,6 @@ def test_compute_iteration_counts(self):
 
         iteration_count = self.heatmaps._compute_iteration_counts(
             analytics_date=analytics_date,
-            resources_count=5,
         )
 
-        self.assertEqual(iteration_count, days * 5)  # five days
+        self.assertEqual(iteration_count, days)

From 40974a57cdd45807dca77cb7e6c3c842db58749e Mon Sep 17 00:00:00 2001
From: Mohammad Amin <dadgaramin96@gmail.com>
Date: Tue, 16 Jul 2024 13:12:13 +0330
Subject: [PATCH 4/7] feat: bump lib version!

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index aba9f75..1123068 100644
--- a/setup.py
+++ b/setup.py
@@ -6,7 +6,7 @@
 
 setup(
     name="tc-analyzer-lib",
-    version="1.2.0",
+    version="1.2.1",
     author="Mohammad Amin Dadgar, TogetherCrew",
     maintainer="Mohammad Amin Dadgar",
     maintainer_email="dadgaramin96@gmail.com",

From 10e61b858a60e319c7ce8ba2205f758c1043bbdc Mon Sep 17 00:00:00 2001
From: Mohammad Amin <dadgaramin96@gmail.com>
Date: Tue, 16 Jul 2024 13:19:03 +0330
Subject: [PATCH 5/7] fix: test cases, adding missing field to raw data! the
 missing `bot_activity` field for metadata of mock data.

---
 ...test_heatmaps_process_hourly_single_day.py | 66 +++++++++++++++----
 .../test_heatmaps_process_raw_analytics.py    | 48 +++++++++++---
 tests/integration/test_heatmaps_replier.py    |  1 +
 3 files changed, 96 insertions(+), 19 deletions(-)

diff --git a/tests/integration/test_heatmaps_process_hourly_single_day.py b/tests/integration/test_heatmaps_process_hourly_single_day.py
index 2a47834..3863aa9 100644
--- a/tests/integration/test_heatmaps_process_hourly_single_day.py
+++ b/tests/integration/test_heatmaps_process_hourly_single_day.py
@@ -77,7 +77,11 @@ def test_process_hourly_single_author(self):
                 "author_id": 9001,
                 "date": datetime(2023, 1, 1, 2),
                 "source_id": "10000",
-                "metadata": {"thread_id": "7000", "channel_id": "124"},
+                "metadata": {
+                    "thread_id": "7000",
+                    "channel_id": "124",
+                    "bot_activity": False,
+                },
                 "actions": [{"name": "message", "type": "emitter"}],
                 "interactions": [
                     {
@@ -93,7 +97,11 @@ def test_process_hourly_single_author(self):
                 "author_id": 9001,
                 "date": day,
                 "source_id": "10001",
-                "metadata": {"thread_id": "7000", "channel_id": "124"},
+                "metadata": {
+                    "thread_id": "7000",
+                    "channel_id": "124",
+                    "bot_activity": False,
+                },
                 "actions": [{"name": "message", "type": "emitter"}],
                 "interactions": [
                     {
@@ -107,7 +115,11 @@ def test_process_hourly_single_author(self):
                 "author_id": 9001,
                 "date": datetime(2023, 1, 1, 2),
                 "source_id": "10000",
-                "metadata": {"thread_id": "7000", "channel_id": "124"},
+                "metadata": {
+                    "thread_id": "7000",
+                    "channel_id": "124",
+                    "bot_activity": False,
+                },
                 "actions": [{"name": "message", "type": "emitter"}],
                 "interactions": [
                     {
@@ -123,7 +135,11 @@ def test_process_hourly_single_author(self):
                 "author_id": 9001,
                 "date": datetime(2023, 1, 1, 4),
                 "source_id": "10001",
-                "metadata": {"thread_id": None, "channel_id": "124"},
+                "metadata": {
+                    "thread_id": None,
+                    "channel_id": "124",
+                    "bot_activity": False,
+                },
                 "actions": [{"name": "message", "type": "emitter"}],
                 "interactions": [
                     {
@@ -176,7 +192,11 @@ def test_process_hourly_wrong_channel(self):
                 "author_id": 9001,
                 "date": datetime(2023, 1, 1, 2),
                 "source_id": "10000",
-                "metadata": {"thread_id": "7000", "channel_id": "124"},
+                "metadata": {
+                    "thread_id": "7000",
+                    "channel_id": "124",
+                    "bot_activity": False,
+                },
                 "actions": [{"name": "message", "type": "emitter"}],
                 "interactions": [
                     {
@@ -192,7 +212,11 @@ def test_process_hourly_wrong_channel(self):
                 "author_id": 9001,
                 "date": day,
                 "source_id": "10001",
-                "metadata": {"thread_id": "7000", "channel_id": "124"},
+                "metadata": {
+                    "thread_id": "7000",
+                    "channel_id": "124",
+                    "bot_activity": False,
+                },
                 "actions": [{"name": "message", "type": "emitter"}],
                 "interactions": [
                     {
@@ -206,7 +230,11 @@ def test_process_hourly_wrong_channel(self):
                 "author_id": 9001,
                 "date": datetime(2023, 1, 1, 2),
                 "source_id": "10000",
-                "metadata": {"thread_id": "7000", "channel_id": "124"},
+                "metadata": {
+                    "thread_id": "7000",
+                    "channel_id": "124",
+                    "bot_activity": False,
+                },
                 "actions": [{"name": "message", "type": "emitter"}],
                 "interactions": [
                     {
@@ -222,7 +250,11 @@ def test_process_hourly_wrong_channel(self):
                 "author_id": 9001,
                 "date": datetime(2023, 1, 1, 4),
                 "source_id": "10001",
-                "metadata": {"thread_id": None, "channel_id": "124"},
+                "metadata": {
+                    "thread_id": None,
+                    "channel_id": "124",
+                    "bot_activity": False,
+                },
                 "actions": [{"name": "message", "type": "emitter"}],
                 "interactions": [
                     {
@@ -264,7 +296,11 @@ def test_process_hourly_wrong_author(self):
                 "author_id": 9001,
                 "date": datetime(2023, 1, 1, 2),
                 "source_id": "10000",
-                "metadata": {"thread_id": "7000", "channel_id": "124"},
+                "metadata": {
+                    "thread_id": "7000",
+                    "channel_id": "124",
+                    "bot_activity": False,
+                },
                 "actions": [{"name": "message", "type": "emitter"}],
                 "interactions": [
                     {
@@ -280,7 +316,11 @@ def test_process_hourly_wrong_author(self):
                 "author_id": 9001,
                 "date": day,
                 "source_id": "10001",
-                "metadata": {"thread_id": "7000", "channel_id": "124"},
+                "metadata": {
+                    "thread_id": "7000",
+                    "channel_id": "124",
+                    "bot_activity": False,
+                },
                 "actions": [{"name": "message", "type": "emitter"}],
                 "interactions": [
                     {
@@ -294,7 +334,11 @@ def test_process_hourly_wrong_author(self):
                 "author_id": 9001,
                 "date": datetime(2023, 1, 1, 2),
                 "source_id": "10000",
-                "metadata": {"thread_id": "7000", "channel_id": "124"},
+                "metadata": {
+                    "thread_id": "7000",
+                    "channel_id": "124",
+                    "bot_activity": False,
+                },
                 "actions": [{"name": "message", "type": "emitter"}],
                 "interactions": [
                     {
diff --git a/tests/integration/test_heatmaps_process_raw_analytics.py b/tests/integration/test_heatmaps_process_raw_analytics.py
index 18eeb69..630ac7a 100644
--- a/tests/integration/test_heatmaps_process_raw_analytics.py
+++ b/tests/integration/test_heatmaps_process_raw_analytics.py
@@ -57,7 +57,11 @@ def test_single_author(self):
                 "author_id": 9001,
                 "date": datetime(2023, 1, 1, 2),
                 "source_id": "10000",
-                "metadata": {"thread_id": "7000", "channel_id": "124"},
+                "metadata": {
+                    "thread_id": "7000",
+                    "channel_id": "124",
+                    "bot_activity": False,
+                },
                 "actions": [{"name": "message", "type": "emitter"}],
                 "interactions": [
                     {
@@ -73,7 +77,11 @@ def test_single_author(self):
                 "author_id": 9001,
                 "date": day,
                 "source_id": "10001",
-                "metadata": {"thread_id": "7000", "channel_id": "124"},
+                "metadata": {
+                    "thread_id": "7000",
+                    "channel_id": "124",
+                    "bot_activity": False,
+                },
                 "actions": [{"name": "message", "type": "emitter"}],
                 "interactions": [
                     {
@@ -87,7 +95,11 @@ def test_single_author(self):
                 "author_id": 9001,
                 "date": datetime(2023, 1, 1, 2),
                 "source_id": "10000",
-                "metadata": {"thread_id": "7000", "channel_id": "124"},
+                "metadata": {
+                    "thread_id": "7000",
+                    "channel_id": "124",
+                    "bot_activity": False,
+                },
                 "actions": [{"name": "message", "type": "emitter"}],
                 "interactions": [
                     {
@@ -103,7 +115,11 @@ def test_single_author(self):
                 "author_id": 9001,
                 "date": datetime(2023, 1, 1, 4),
                 "source_id": "10001",
-                "metadata": {"thread_id": None, "channel_id": "124"},
+                "metadata": {
+                    "thread_id": None,
+                    "channel_id": "124",
+                    "bot_activity": False,
+                },
                 "actions": [{"name": "message", "type": "emitter"}],
                 "interactions": [
                     {
@@ -149,7 +165,11 @@ def test_multiple_authors(self):
                 "author_id": 9001,
                 "date": datetime(2023, 1, 1, 2),
                 "source_id": "10000",
-                "metadata": {"thread_id": "7000", "channel_id": "124"},
+                "metadata": {
+                    "thread_id": "7000",
+                    "channel_id": "124",
+                    "bot_activity": False,
+                },
                 "actions": [{"name": "message", "type": "emitter"}],
                 "interactions": [
                     {
@@ -165,7 +185,11 @@ def test_multiple_authors(self):
                 "author_id": 9001,
                 "date": day,
                 "source_id": "10001",
-                "metadata": {"thread_id": "7000", "channel_id": "124"},
+                "metadata": {
+                    "thread_id": "7000",
+                    "channel_id": "124",
+                    "bot_activity": False,
+                },
                 "actions": [{"name": "message", "type": "emitter"}],
                 "interactions": [
                     {
@@ -184,7 +208,11 @@ def test_multiple_authors(self):
                 "author_id": 9002,
                 "date": datetime(2023, 1, 1, 2),
                 "source_id": "10000",
-                "metadata": {"thread_id": "7000", "channel_id": "124"},
+                "metadata": {
+                    "thread_id": "7000",
+                    "channel_id": "124",
+                    "bot_activity": False,
+                },
                 "actions": [{"name": "message", "type": "emitter"}],
                 "interactions": [
                     {
@@ -203,7 +231,11 @@ def test_multiple_authors(self):
                 "author_id": 9001,
                 "date": datetime(2023, 1, 1, 4),
                 "source_id": "10001",
-                "metadata": {"thread_id": None, "channel_id": "124"},
+                "metadata": {
+                    "thread_id": None,
+                    "channel_id": "124",
+                    "bot_activity": False,
+                },
                 "actions": [{"name": "message", "type": "emitter"}],
                 "interactions": [
                     {
diff --git a/tests/integration/test_heatmaps_replier.py b/tests/integration/test_heatmaps_replier.py
index cecebd8..bcf5a29 100644
--- a/tests/integration/test_heatmaps_replier.py
+++ b/tests/integration/test_heatmaps_replier.py
@@ -61,6 +61,7 @@ def test_reply_messages():
                         "metadata": {
                             "channel_id": chId,
                             "thread_id": None,
+                            "bot_activity": False,
                         },
                     },
                     {

From ad4f5d98d0da945cb713960ce2bba043a533d3ba Mon Sep 17 00:00:00 2001
From: Mohammad Amin <dadgaramin96@gmail.com>
Date: Tue, 16 Jul 2024 13:25:35 +0330
Subject: [PATCH 6/7] fix: codeRabbitAI suggestion! changing for an input
 variable value so the users won't confuse it.

---
 .../metrics/heatmaps/heatmaps_utils.py        | 21 +++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/tc_analyzer_lib/metrics/heatmaps/heatmaps_utils.py b/tc_analyzer_lib/metrics/heatmaps/heatmaps_utils.py
index 873f3d4..ce74311 100644
--- a/tc_analyzer_lib/metrics/heatmaps/heatmaps_utils.py
+++ b/tc_analyzer_lib/metrics/heatmaps/heatmaps_utils.py
@@ -33,7 +33,10 @@ def get_users(self, is_bot: bool = False) -> Cursor:
         return cursor
 
     def get_active_users(
-        self, start_day: datetime, end_day: datetime, metadata_filter: dict = {}
+        self,
+        start_day: datetime,
+        end_day: datetime,
+        metadata_filter: dict | None = None,
     ) -> list[str]:
         """
         get the users doing activities for a specific period
@@ -44,15 +47,18 @@ def get_active_users(
             the time to filter the data from
         end_day : datetime
             the end day for filtering data from
-        metadata_filter : dict
+        metadata_filter : dict | None
             the additional filtering to be applied on data
-            default is no filtering which an empty dictionary will be passed
+            default is `None` which means no filtering
 
         Returns
         ---------
         users : list[str]
             a list of user ids doing activity in that day
         """
+        if metadata_filter is None:
+            metadata_filter = {}
+
         cursor = self.database["rawmemberactivities"].aggregate(
             [
                 {
@@ -93,7 +99,7 @@ def get_active_resources_period(
         start_day: datetime,
         end_day: datetime,
         resource_identifier: str,
-        metadata_filter: dict = {},
+        metadata_filter: dict | None = None,
     ) -> list[str]:
         """
         get the active resource ids for a specific period
@@ -107,15 +113,18 @@ def get_active_resources_period(
         resource_identifier : str
             the resource identifier on database for a platform
             i.e.: could be `channel_id` for discord
-        metadata_filter : dict
+        metadata_filter : dict | None
             the additional filtering to be applied on data
-            default is no filtering which an empty dictionary will be passed
+            default is `None` which means no filtering
 
         Returns
         ---------
         resource_ids : list[str]
             a list of user ids doing activity in that day
         """
+        if metadata_filter is None:
+            metadata_filter = {}
+
         pipeline = [
             {
                 "$match": {

From 983954f88123c553aec08fee1adaf1f9842158a8 Mon Sep 17 00:00:00 2001
From: Mohammad Amin <dadgaramin96@gmail.com>
Date: Tue, 16 Jul 2024 14:00:54 +0330
Subject: [PATCH 7/7] fix: there was not enough data that errors was raising! -
 Added more mock data for past.

---
 .../test_analyzer_period_week_run_once_empty_analytics.py       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration/test_analyzer_period_week_run_once_empty_analytics.py b/tests/integration/test_analyzer_period_week_run_once_empty_analytics.py
index a3f3c30..a63b9b9 100644
--- a/tests/integration/test_analyzer_period_week_run_once_empty_analytics.py
+++ b/tests/integration/test_analyzer_period_week_run_once_empty_analytics.py
@@ -32,7 +32,7 @@ def test_analyzer_week_period_run_once_empty_analytics():
     rawinfo_samples = []
 
     # generating random rawinfo data
-    for i in range(155):
+    for i in range(160):
         author = np.random.choice(acc_id)
         replied_user = np.random.choice(acc_id)
         # not producing any self-interactions