diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..f70fa19 --- /dev/null +++ b/.env.example @@ -0,0 +1 @@ +GITHUB_TOKEN=daolytics_access_token diff --git a/.gitignore b/.gitignore index f206ca2..4812160 100644 --- a/.gitignore +++ b/.gitignore @@ -167,7 +167,7 @@ cython_debug/ # Emacs .org -interactions/credentials.py -interactions/temp.ipynb -test*.ipynb -analyzer/rndao_analyzer/analysis/credentials.py \ No newline at end of file +coverage/* + +analyzer_lib +main.ipynb \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 96b74ae..701dce2 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,12 +1,16 @@ # It's recommended that we use `bullseye` for Python (alpine isn't suitable as it conflcts with numpy) -FROM python:3.10-bullseye AS base +FROM python:3.10-bullseye AS base WORKDIR /project COPY . . +ARG GITHUB_TOKEN RUN pip3 install -r requirements.txt FROM base AS test -RUN python3 -m coverage run -m pytest tests -CMD ["python3", "-m", "coverage", "lcov" ,"-o", "coverage/lcov.info"] +RUN chmod +x docker-entrypoint.sh +CMD ["./docker-entrypoint.sh"] -FROM base AS prod -CMD ["python3", "server.py"] +FROM base AS prod-server +CMD ["python3", "start_rabbit_mq.py"] + +FROM base as prod-worker +CMD ["python3", "redis_worker.py"] \ No newline at end of file diff --git a/README.md b/README.md index c7845d4..885823f 100644 --- a/README.md +++ b/README.md @@ -1 +1,6 @@ -# python-service \ No newline at end of file +# Discord-Analyzer + +[![Maintainability](https://api.codeclimate.com/v1/badges/e1239b895f0ee2569b61/maintainability)](https://codeclimate.com/github/TogetherCrew/discord-analyzer/maintainability) +[![Test Coverage](https://api.codeclimate.com/v1/badges/e1239b895f0ee2569b61/test_coverage)](https://codeclimate.com/github/TogetherCrew/discord-analyzer/test_coverage) + +This repository contains the codes to analyze discord chat data. diff --git a/analyzer_init.py b/analyzer_init.py new file mode 100644 index 0000000..f0f6b0d --- /dev/null +++ b/analyzer_init.py @@ -0,0 +1,57 @@ +from typing import Any + +from discord_analyzer import RnDaoAnalyzer +from utils.daolytics_uitls import ( + get_mongo_credentials, + get_neo4j_credentials, + get_saga_db_location, +) + + +class AnalyzerInit: + """ + initialize the analyzer with its configs + """ + + def __init__(self) -> None: + pass + + def get_analyzer(self) -> tuple[RnDaoAnalyzer, dict[str, Any]]: + """ + Returns: + --------- + analyzer : RnDaoAnalyzer + mongo_creds : dict[str, Any] + """ + analyzer = RnDaoAnalyzer() + + # credentials + mongo_creds = get_mongo_credentials() + neo4j_creds = get_neo4j_credentials() + saga_mongo_location = get_saga_db_location() + + mongo_creds["db_name"] = saga_mongo_location["db_name"] + mongo_creds["collection_name"] = saga_mongo_location["collection_name"] + mongo_creds["connection_str"] = self._get_mongo_connection(mongo_creds) + + analyzer.set_mongo_database_info( + mongo_db_host=mongo_creds["host"], + mongo_db_password=mongo_creds["password"], + mongo_db_port=mongo_creds["port"], + mongo_db_user=mongo_creds["user"], + ) + analyzer.set_neo4j_database_info(neo4j_creds=neo4j_creds) + analyzer.database_connect() + analyzer.setup_neo4j_metrics() + + return analyzer, mongo_creds + + def _get_mongo_connection(self, mongo_creds: dict[str, Any]): + user = mongo_creds["user"] + password = mongo_creds["password"] + host = mongo_creds["host"] + port = mongo_creds["port"] + + connection = f"mongodb://{user}:{password}@{host}:{port}" + + return connection diff --git a/discord_analyzer/DB_operations/__init__.py b/discord_analyzer/DB_operations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/discord_analyzer/DB_operations/mongo_neo4j_ops.py b/discord_analyzer/DB_operations/mongo_neo4j_ops.py new file mode 100644 index 0000000..ec0a96c --- /dev/null +++ b/discord_analyzer/DB_operations/mongo_neo4j_ops.py @@ -0,0 +1,171 @@ +import logging + +from discord_analyzer.DB_operations.mongodb_interaction import MongoDBOps +from discord_analyzer.DB_operations.network_graph import make_neo4j_networkx_query_dict +from tc_neo4j_lib.neo4j_ops import Neo4jOps + + +class MongoNeo4jDB: + def __init__(self, testing=False): + """ + having both databases in one class + + """ + self.neo4j_ops = None + self.mongoOps = None + self.testing = testing + + def set_neo4j_utils( + self, + db_name: str, + host: str, + port: str, + protocol: str, + user: str, + password: str, + ): + """ + store the neo4j utils instance + """ + self.neo4j_ops = Neo4jOps() + self.neo4j_ops.set_neo4j_db_info( + neo4j_db_name=db_name, + neo4j_protocol=protocol, + neo4j_user=user, + neo4j_password=password, + neo4j_host=host, + neo4j_port=port, + ) + self.neo4j_ops.neo4j_database_connect() + + def set_mongo_db_ops( + self, mongo_user: str, mongo_pass: str, mongo_host: str, mongo_port: str + ): + """ + setup the MongoDBOps class with the parameters needed + + """ + self.mongoOps = MongoDBOps( + user=mongo_user, password=mongo_pass, host=mongo_host, port=mongo_port + ) + self.mongoOps.set_mongo_db_access() + + def store_analytics_data( + self, analytics_data, remove_memberactivities=False, remove_heatmaps=False + ): + """ + store the analytics data into database + all data are in format of nested dictionaries which + + Parameters: + ------------- + analytics_data : dictionary + a nested dictinoary with keys as guildId + and values as heatmaps and memberactivities data + heatmaps is also a list of dictinoaries + and memberactivities is a tuple of memberactivities dictionary list + and memebractivities networkx object dictionary list + remove_memberactivities : bool + remove the whole memberactivity data and insert + default is `False` which means don't delete the existing data + remove_heatmaps : bool + remove the whole heatmap data and insert + default is `False` which means don't delete the existing data + + Returns: + ---------- + `None` + """ + for guildId in analytics_data.keys(): + heatmaps_data = analytics_data[guildId]["heatmaps"] + (memberactivities_data, memberactivities_networkx_data) = analytics_data[ + guildId + ]["memberactivities"] + + if not self.testing: + # mongodb transactions + self.mongoOps._do_analytics_write_transaction( + guildId=guildId, + delete_heatmaps=remove_heatmaps, + delete_member_acitivities=remove_memberactivities, + acitivties_list=memberactivities_data, + heatmaps_list=heatmaps_data, + ) + + # neo4j transactions + if ( + memberactivities_networkx_data is not None + and memberactivities_networkx_data != [] + ): + queries_list = make_neo4j_networkx_query_dict( + networkx_graphs=memberactivities_networkx_data, guildId=guildId + ) + self.run_operations_transaction( + guildId=guildId, + queries_list=queries_list, + remove_memberactivities=remove_memberactivities, + ) + else: + logging.warning("Testing mode enabled! Not saving any data") + + def run_operations_transaction( + self, guildId, queries_list, remove_memberactivities + ): + """ + do the deletion and insertion operations inside a transaction + + Parameters: + ------------ + guildId : str + the guild id that the users are connected to it + which we're going to delete the relations of it + queries_list : list + list of strings to add data into neo4j + min length is 1 + remove_memberactivities : bool + if True, remove the old data specified in that guild + """ + self.guild_msg = f"GUILDID: {guildId}:" + + transaction_queries = [] + if remove_memberactivities: + logging.info( + f"{self.guild_msg} Neo4J GuildId accounts relation will be removed!" + ) + delete_relationship_query = self._create_guild_rel_deletion_query( + guildId=guildId + ) + transaction_queries.append(delete_relationship_query) + + # logging.info(queries_list) + transaction_queries.extend(queries_list) + + self.neo4j_ops.store_data_neo4j(transaction_queries, message=self.guild_msg) + + def _create_guild_rel_deletion_query( + self, guildId: str, relation_name: str = "INTERACTED_WITH" + ): + """ + create a query to delete the relationships + between DiscordAccount users in a specific guild + + Parameters: + ------------- + guildId : str + the guild id that the users are connected to it + relation_name : str + the relation we want to delete + + Returns: + ------------ + final_query : str + the final query to remove the relationships + """ + + delete_relationship_query = f""" + MATCH + (:DiscordAccount) + -[r:{relation_name} {{guildId: '{guildId}'}}]-(:DiscordAccount) + DETACH DELETE r""" + + return delete_relationship_query diff --git a/discord_analyzer/DB_operations/mongodb_access.py b/discord_analyzer/DB_operations/mongodb_access.py new file mode 100644 index 0000000..402006b --- /dev/null +++ b/discord_analyzer/DB_operations/mongodb_access.py @@ -0,0 +1,151 @@ +from pymongo import MongoClient + + +class DB_access: + def __init__(self, db_name, connection_string) -> None: + """ + set-up the MongoDB database access + + Parameters: + ------------ + db_name : str + the exact guildId to use + if `None`, the DB_access.db_client will be `None` but + DB_access.db_mongo_client will be available to use + else both `DB_access.db_client` and ` + DB_access.db_mongo_client` are avaialble to use + + the `db_client` has a specific access to the guild (db_name) + the `db_mongo_client` has more variety of access which + can be used to access to the whole databases (guilds) + connection_string : str + the connection string used to connect to MongoDB + """ + + client = self._get_mongoClient(connection_string) + self.db_name = db_name + # if db_name is None: + # self.db_client = None + # else: + # self.db_client = client[db_name] + + self.db_mongo_client = client + + def _get_mongoClient(self, connection_string): + """ + get the database instance + + Parameters: + ------------ + connection_string : string + the url of connection + Returns: + --------- + client : MongoClient + the mongodb client access + """ + client = MongoClient( + connection_string, serverSelectionTimeoutMS=10000, connectTimeoutMS=200000 + ) + + return client + + def _db_call(self, calling_function, query, feature_projection=None, sorting=None): + """ + call the function on database, it could be whether aggragation or find + Parameters: + ------------- + calling_function : function + can be `MongoClient.find` or `MongoClient.aggregate` + query : dictionary + the query as a dictionary + feature_projection : dictionary + the dictionary to or not to project the results on it + default is None, meaning to return all features + sorting : tuple + sort the results base on the input dictionary + if None, then do not sort the results + + Returns: + ---------- + cursor : mongodb Cursor + cursor to get the information of a query + """ + # if there was no projection available + if feature_projection is None: + # if sorting was given + if sorting is not None: + cursor = calling_function(query).sort(sorting[0], sorting[1]) + else: + cursor = calling_function(query) + else: + if sorting is not None: + cursor = calling_function(query, feature_projection).sort( + sorting[0], sorting[1] + ) + else: + cursor = calling_function(query, feature_projection) + + return cursor + + def query_db_aggregation(self, table, query, feature_projection=None, sorting=None): + """ + do aggregation operation the database using query + + Parameters: + ------------ + table : string + the table name to retrieve the data + query : dictionary + the query as a dictionary + feature_projection : dictionary + the dictionary to or not to project the results on it + default is None, meaning to return all features + sorting : tuple + sort the results base on the input dictionary + if None, then do not sort the results + + Returns: + ---------- + cursor : mongodb Cursor + cursor to get the information of a query + """ + + cursor = self._db_call( + calling_function=self.db_mongo_client[self.db_name][table].aggregate, + query=query, + feature_projection=feature_projection, + sorting=sorting, + ) + + return cursor + + def query_db_find(self, table, query, feature_projection=None, sorting=None): + """ + do find operation the database using query + + Parameters: + ------------ + table : string + the table name to retrieve the data + query : dictionary + the query as a dictionary + feature_projection : dictionary + the dictionary to or not to project the results on it + default is None, meaning to return all features + sorting : tuple + sort the results base on the input dictionary + if None, then do not sort the results + + Returns: + ---------- + cursor : mongodb Cursor + cursor to get the information of a query + """ + cursor = self._db_call( + calling_function=self.db_mongo_client[self.db_name][table].find, + query=query, + feature_projection=feature_projection, + sorting=sorting, + ) + return cursor diff --git a/discord_analyzer/DB_operations/mongodb_interaction.py b/discord_analyzer/DB_operations/mongodb_interaction.py new file mode 100644 index 0000000..4783c2f --- /dev/null +++ b/discord_analyzer/DB_operations/mongodb_interaction.py @@ -0,0 +1,250 @@ +import logging + +from discord_analyzer.DB_operations.mongodb_access import DB_access +from pymongo.read_concern import ReadConcern +from pymongo.write_concern import WriteConcern + + +class MongoDBOps: + def __init__(self, user, password, host, port): + """ + mongoDB database operations + """ + self.connection_str = f"mongodb://{user}:{password}@{host}:{port}" + self.DB_access = DB_access + + self.guild_msg = "" + # logging.basicConfig() + # logging.getLogger().setLevel(logging.INFO) + + def set_mongo_db_access(self, guildId=None): + """ + set a database access to a specific guild + + if guildId was `None` then the mongo_db_access just + have the `db_mongo_client` to use + but if wasn't then mongo_db_access + would also have db_client which is connected to a guild + """ + self.mongo_db_access = self.DB_access( + db_name=guildId, connection_string=self.connection_str + ) + self.guild_msg = f"GUILDID: {guildId}:" + + def _do_analytics_write_transaction( + self, + guildId, + delete_heatmaps, + delete_member_acitivities, + acitivties_list, + heatmaps_list, + batch_size=1000, + ): + """ + do write operations in a transaction. + this transaction contains deleting data and insertion in a transaction + + Parameters: + ------------ + delete_heatmaps : bool + delete the heatmap data or not + delete_member_acitivities : bool + delete the memberactivities data or not + acitivties_list : list of dict + list of memberactivity data to store + heatmaps_list : list of dict + list of heatmap data to store + """ + + def callback_wrapper(session): + self._session_custom_transaction( + session, + guildId, + delete_heatmaps, + delete_member_acitivities, + acitivties_list, + heatmaps_list, + batch_size, + ) + + with self.mongo_db_access.db_mongo_client.start_session() as session: + session.with_transaction( + callback=callback_wrapper, + read_concern=ReadConcern("local"), + write_concern=WriteConcern("local"), + ) + + def _session_custom_transaction( + self, + session, + guildId, + delete_heatmaps, + delete_member_acitivities, + memberactiivties_list, + heatmaps_list, + batch_size=1000, + ): + """ + our custom transaction function + which contains the deletion of heatmaps and memberactivities + also insertion of activities_list and heatmaps_list after + + """ + self.guild_msg = f"GUILDID: {guildId}:" + + if delete_heatmaps: + logging.info(f"{self.guild_msg} Removing Heatmaps data!") + self.empty_collection(session=session, guildId=guildId, activity="heatmaps") + if delete_member_acitivities: + logging.info(f"{self.guild_msg} Removing MemberActivities MongoDB data!") + self.empty_collection( + session=session, guildId=guildId, activity="memberactivities" + ) + + if memberactiivties_list is not None and memberactiivties_list != []: + self.insert_into_memberactivities_batches( + session=session, + acitivities_list=memberactiivties_list, + guildId=guildId, + batch_size=batch_size, + ) + + if heatmaps_list is not None and heatmaps_list != []: + self.insert_into_heatmaps_batches( + session=session, + heatmaps_list=heatmaps_list, + guildId=guildId, + batch_size=batch_size, + ) + + def insert_into_memberactivities_batches( + self, session, acitivities_list, guildId, batch_size=1000 + ): + """ + insert data into memberactivities collection of mongoDB in batches + + Parameters: + ------------ + acitivities_list : list of dictionaries + a list of activities to be imported to memberactivities table + batch_size : int + the count of data in batches + default is 1000 + guildId : str + the guildId to insert data to it + """ + memberactivities_collection = session.client[guildId].memberactivities + self._batch_insertion( + collection=memberactivities_collection, + data=acitivities_list, + message=f"{self.guild_msg} Inserting memberactivities documents to MongoDB", + batch_size=batch_size, + ) + + def insert_into_heatmaps_batches( + self, session, heatmaps_list, guildId, batch_size=1000 + ): + """ + insert data into heatmaps collection of mongoDB in batches + + Parameters: + ------------ + heatmaps_list : list of dictionaries + a list of activities to be imported to memberactivities table + batch_size : int + the count of data in batches + default is 1000 + guildId : str + the guildId to insert data to it + """ + heatmaps_collection = session.client[guildId].heatmaps + + self._batch_insertion( + heatmaps_collection, + heatmaps_list, + message=f"{self.guild_msg} Inserting heatmaps documents to mongoDB", + batch_size=batch_size, + ) + + def _batch_insertion(self, collection, data, message, batch_size): + """ + do the batch insertion with and log a given message + + Parameters: + ------------- + collection : MongoDB collection + the collection to insert data into + data : list + data to insert into the collection + message : str + the additional message to log while insertion + batch_size : int + the count of data in batches + """ + data_len = len(data) + batch_count = data_len // batch_size + + for loop_idx, batch_idx in enumerate(range(0, data_len, batch_size)): + logging.info(f"{message}: Batch {loop_idx + 1}/{batch_count}") + collection.insert_many(data[batch_idx : batch_idx + batch_size]) + + def check_heatmaps(self, guildId, selectedChannels, heatmap_model): + """ + check whether all the channels are in heatmaps or not + + Parameters: + ------------- + guildId : str + the guildId to remove its collection data + selectedChannels : list + list of `channelId`s + heatmap_model : HeatMapModel + the heatmaps model to access it + + Returns: + --------- + is_available : bool + is all the selectedChannels available in heatmap collection or not + """ + heatmap_c = heatmap_model(self.mongo_db_access.db_mongo_client[guildId]) + channels = heatmap_c.get_channels_disctinct() + + if channels is not None: + # check if all the selected channels are available in heatmaps + is_available = all(element in selectedChannels for element in channels) + else: + log_msg = "MongoDB heatmaps table check raised an exception," + log_msg += " the heatmaps analysis wouldn't be done!" + logging.info(log_msg) + is_available = True + + return is_available + + def empty_collection(self, session, guildId, activity): + """ + empty a specified collection + + Parameters: + ------------- + session : mongoDB session + the session to needed to delete the data + guildId : str + the guildId to remove its collection data + activity : str + `memberactivities` or `heatmaps` or other collections + the collection to access and delete its data + + Returns: + --------- + `None` + """ + if activity == "heatmaps": + collection = session.client[guildId].heatmaps + elif activity == "memberactivities": + collection = session.client[guildId].memberactivities + else: + raise NotImplementedError( + "removing heatmaps or memberactivities are just implemented!" + ) + + collection.delete_many({}) diff --git a/discord_analyzer/DB_operations/mongodb_query.py b/discord_analyzer/DB_operations/mongodb_query.py new file mode 100644 index 0000000..fe6842e --- /dev/null +++ b/discord_analyzer/DB_operations/mongodb_query.py @@ -0,0 +1,195 @@ +class MongodbQuery: + def __init__(self) -> None: + """ + create different queries to query the database + """ + pass + + def _check_inputs( + self, + acc_names, + channels, + dates, + variable_aggregation_type="and", + value_aggregation_type="or", + ): + """ + just check whether the inputs are correctly entered or not + """ + # checking the length of arrays + if len(acc_names) < 1: + raise ValueError("acc_names array is empty!") + if len(channels) < 1: + raise ValueError("channels array is empty!") + if len(dates) < 1: + raise ValueError("dates array is empty!") + + # checking the variable aggregation_type variable + if variable_aggregation_type not in ["and", "or"]: + raise ValueError( + f"variable aggregation type must be either `and` or \ + `or`!\nentered value is:{variable_aggregation_type}" + ) + + # checking the value aggregation_type variable + if value_aggregation_type not in ["and", "or"]: + raise ValueError( + f"value aggregation type must be either `and` or \ + `or`!\nentered value is:{value_aggregation_type}" + ) + + def create_query_filter_account_channel_dates( + self, + acc_names, + channels, + dates, + variable_aggregation_type="and", + value_aggregation_type="or", + date_key="date", + channel_key="channelId", + account_key="account_name", + ): + """ + A query to filter the database on account_name, + and/or channel_names, and/or dates. + the aggregation of varibales (`account_name`, `channels`, and `dates`) + can be set to `and` or `or` + + Parameters: + ------------ + acc_names : list of string + each string is an account name that needs to be included. + The minimum length of this list is 1 + channels : list of string + each string is a channel identifier for + the channels that need to be included. + The minimum length of this list is 1 + dates : list of datetime + each datetime object is a date that needs to be included. + The minimum length of this list is 1 + should be in type of `%Y-%m-%d` which is the exact database format + variable_aggregation_type : string + values can be [`and`, `or`], the aggregation type between the variables + (variables are `acc_names`, `channels`, and `dates`) + `or` represents the or between the queries of acc_name, channels, dates + `and` represents the and between the queries of acc_name, channels, dates + default value is `and` + value_aggregation_type : string + values can be [`and`, `or`], the aggregation type between the + values of each variable + `or` represents the `or` operation between the values of input arrays + `and` represents the `and` operation between the values of input arrays + default value is `or` + date_key : string + the name of the field of date in database + default is `date` + channel_key : string + the id of the field of channel name in database + default is `channelId` + account_key : string + the name of the field account name in the database + default is `account_name` + Returns: + ---------- + query : dictionary + the query to get access + """ + + # creating each part of query seperately + + # creating date query + date_query = [] + for date in dates: + date_query.append({date_key: {"$regex": date}}) + + # creating channels query + channel_query = [] + + for ch in channels: + channel_query.append({channel_key: ch}) + + # creating the account_name query + account_query = [] + + for account in acc_names: + account_query.append({account_key: account}) + + # creating the query + query = { + "$" + + variable_aggregation_type: [ + {"$" + value_aggregation_type: account_query}, + {"$" + value_aggregation_type: channel_query}, + # for time we should definitly use `or` because + # `and` would result in nothing! + {"$or": date_query}, + ] + } + + return query + + def create_query_channel(self, channels_name): + """ + create a dictionary of query to get channel_id using channel_name + Parameters: + ------------- + channel_name : list + a list of channel names to retrive their id + + Returns: + --------- + query : dictionary + the query to retrieve the channel ids + """ + query_channelId = {"channel": {"$in": channels_name}} + + return query_channelId + + def create_query_threads( + self, channels_id, dates, channelsId_key="channelId", date_key="date" + ) -> dict: + """ + create a dictionary of query to query the DB, + getting the messages for specific channels and dates + Parameters: + ------------ + channels_id : list + list of strings, each string is a channel + identifier for the channels that needs to be included. + The minimum length of this list is 1 + dates : list + list of datetime objects, each datetime + object is a date that needs to be included. + The minimum length of this list is 1 + channelsId_key : string + the field name corresponding to chnnel id in database + default value is `channelId` + date_key : string + the field name corresponding to date in database + default value is `date` + + Returns: + --------- + query : dictionary + a dictionary that query the database + """ + # Array inputs checking + if len(channels_id) < 1: + raise ValueError("channels_id array is empty!") + if len(dates) < 1: + raise ValueError("dates array is empty!") + + datetime_query = [] + for date in dates: + datetime_query.append({date_key: {"$regex": date}}) + + query = { + "$and": [ + {channelsId_key: {"$in": channels_id}}, + {"$or": datetime_query}, + # do not return the messages with no thread + {"thread": {"$ne": "None"}}, + ] + } + + return query diff --git a/discord_analyzer/DB_operations/network_graph.py b/discord_analyzer/DB_operations/network_graph.py new file mode 100644 index 0000000..504c5db --- /dev/null +++ b/discord_analyzer/DB_operations/network_graph.py @@ -0,0 +1,210 @@ +# Store and Rietrive the network graph from neo4j db + +import datetime + +import networkx + + +def make_neo4j_networkx_query_dict(networkx_graphs, guildId): + """ + make a list of queries to store networkx graphs into the neo4j + + Parameters: + ------------- + networkx_graphs : dictionary of networkx.classes.graph.Graph + or networkx.classes.digraph.DiGraph + the dictinoary keys is the date of graph and the values + are the actual networkx graphs + guildId : str + the guild that the members belong to + + Returns: + ----------- + queries_list : list + list of string queries to store data into neo4j + """ + # extract the graphs and their corresponding interaction dates + graph_list, graph_dates = list(networkx_graphs.values()), list( + networkx_graphs.keys() + ) + + # make a list of queries for each date to save + # the Useraccount and INTERACTED relation between them + queries_list = make_graph_list_query( + networkx_graphs=graph_list, + networkx_dates=graph_dates, + guildId=guildId, + toGuildRelation="IS_MEMBER", + ) + + return queries_list + + +def make_graph_list_query( + networkx_graphs: networkx.classes.graph.Graph, + networkx_dates: list[datetime.datetime], + guildId: str, + toGuildRelation: str = "IS_MEMBER", +): + """ + Make a list of queries for each graph to save their results + + Parameters: + ------------- + networkx_graphs : list of networkx.classes.graph.Graph + or networkx.classes.digraph.DiGraph + the list of graph created from user interactions + networkx_dates : list of dates + the dates for each graph + guildId : str + the guild that the members belong to + default is `None` meaning that it wouldn't be belonged to any guild + toGuildRelation : str + the relationship label that connect the users to guilds + default value is `IS_MEMBER` + + Returns: + --------- + final_queries : list of str + list of strings, each is a query for an interaction graph to be created + """ + final_queries = [] + + for graph, date in zip(networkx_graphs, networkx_dates): + nodes_dict = graph.nodes.data() + edges_dict = graph.edges.data() + + node_queries, query_relations = create_network_query( + nodes_dict, + edges_dict, + date, + guildId=guildId, + toGuildRelation=toGuildRelation, + ) + + final_queries.extend(node_queries) + final_queries.extend(query_relations) + + return final_queries + + +def create_network_query( + nodes_dict: networkx.classes.reportviews.NodeDataView, + edge_dict: networkx.classes.reportviews.EdgeDataView, + graph_date: datetime.datetime, + guildId: str, + nodes_type: str = "DiscordAccount", + rel_type: str = "INTERACTED_WITH", + toGuildRelation: str = "IS_MEMBER", +): + """ + make string query to save the accounts with their + account_name and relationships with their relation from **a graph**. + The query to add the nodes and edges is using `MERGE` operator + of Neo4j db since it won't create duplicate nodes and edges + if the relation and the account was saved before + + Parameters: + ------------- + nodes_dict : NodeDataView + the nodes of a Networkx graph + edge_dict : EdgeDataView + the edges of a Networkx graph + graph_date : datetime + the date of the interaction in as a python datetime object + nodes_type : str + the type of nodes to be saved + default is `Account` + rel_type : str + the type of relationship to create + default is `INTERACTED` + + Returns: + ---------- + node_queries : list of str + the list of MERGE queries for creating all nodes + rel_queries : list of str + the list of MERGE queries for creating all relationships + """ + # getting the timestamp `date` + graph_date_timestamp = ( + graph_date.replace( + hour=0, minute=0, second=0, microsecond=0, tzinfo=datetime.timezone.utc + ).timestamp() + * 1000 + ) + date_now_timestamp = ( + datetime.datetime.now() + .replace( + hour=0, minute=0, second=0, microsecond=0, tzinfo=datetime.timezone.utc + ) + .timestamp() + ) * 1000 + + # initializiation of queries + rel_queries = [] + node_queries = [] + + for node in nodes_dict: + node_str_query = "" + # retrieving node data + # user number + node_num = node[0] + # user account name + node_acc_name = node[1]["acc_name"] + # creating the query + node_str_query += ( + f"MERGE (a{node_num}:{nodes_type} {{userId: '{node_acc_name}'}}) " + ) + node_str_query += f"""ON CREATE SET a{node_num}.createdAt = + {int(date_now_timestamp)} + """ + + # relationship query between users and guilds + if guildId is not None: + # creating the guilds if they weren't created before + node_str_query += f"""MERGE (g:Guild {{guildId: '{guildId}'}}) + ON CREATE SET g.createdAt = {int(date_now_timestamp)} + """ + + node_str_query += f""" + MERGE (a{node_num}) + -[rel_guild{node_num}:{toGuildRelation}]-> (g) + ON CREATE SET + rel_guild{node_num}.createdAt = {int(date_now_timestamp)} + """ + + node_queries.append(node_str_query + ";") + + for idx, edge in enumerate(edge_dict): + rel_str_query = "" + + # retrieving edge data + + # relationship from user number + starting_acc_num = edge[0] + # relationship to user number + ending_acc_num = edge[1] + + starting_node_acc_name = nodes_dict[starting_acc_num]["acc_name"] + ending_node_acc_name = nodes_dict[ending_acc_num]["acc_name"] + + # the interaction count between them + interaction_count = edge[2]["weight"] + + rel_str_query += f"""MATCH (a{starting_acc_num}:{nodes_type} + {{userId: \'{starting_node_acc_name}\'}}) + MATCH (a{ending_acc_num}:{nodes_type} + {{userId: \'{ending_node_acc_name}\'}}) + MERGE + (a{starting_acc_num}) -[rel{idx}:{rel_type} + {{ + date: {int(graph_date_timestamp)}, + weight: {int(interaction_count)}, + guildId: '{guildId}' + }} + ]-> (a{ending_acc_num}) + """ + rel_queries.append(rel_str_query + ";") + + return node_queries, rel_queries diff --git a/discord_analyzer/__init__.py b/discord_analyzer/__init__.py new file mode 100644 index 0000000..3c8cd39 --- /dev/null +++ b/discord_analyzer/__init__.py @@ -0,0 +1,3 @@ +#!/usr/bin/env python3 +# flake8: noqa +from .rn_analyzer import RnDaoAnalyzer diff --git a/discord_analyzer/analysis/__init__.py b/discord_analyzer/analysis/__init__.py new file mode 100644 index 0000000..e5a0d9b --- /dev/null +++ b/discord_analyzer/analysis/__init__.py @@ -0,0 +1 @@ +#!/usr/bin/env python3 diff --git a/discord_analyzer/analysis/activity_hourly.py b/discord_analyzer/analysis/activity_hourly.py new file mode 100644 index 0000000..b231925 --- /dev/null +++ b/discord_analyzer/analysis/activity_hourly.py @@ -0,0 +1,657 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# activity_hourly.py +# +# Author Ene SS Rawa / Tjitse van der Molen + + +# # # # # import libraries # # # # # + +import json + +import numpy as np + + +def parse_reaction(s): + result = [] + for subitem in s: + items = subitem.split(",") + parsed_items = [] + for item in items: + parsed_items.append(item) + result.append(parsed_items) + return result + + +# # # # # main function # # # # # + + +def activity_hourly( + json_file, out_file_name=None, acc_names=[], mess_substring=None, emoji_types=None +): + """ + Counts activity per hour from json_file and stores in out_file_name + + Input: + json_file - [JSON]: list of JSON objects with message data + out_file_name - str: path and filename where output is stored + acc_names - [str]: account names for which activity should be + counted separately (default = []) + mess_substring - [str]: only messages containing at least one + substring in this list are considered. all messages are + considered if set to None (default = None) + emoji_types - [str]: only emojis in this list are considered. all + emojis are considered if set to None (default = None) + + Output: + warning_count - [int]: list of counts for the different possible + warnings that could be raised by the script: + 1st entry: number of messages sent by an author not listed in + acc_names + 2nd entry: number of times that a duplicate DayActivity object + is encounterd. if this happens, the first object in the list + is used. + 3rd entry: number of times a message author mentions themselves + in the message. these mentions are not counted + 4rd entry: number of times a message author emoji reacts to + their own message. these reactions are not counted + 5th entry: number of times an emoji sender is not in acc_names + 6th entry: number of times a mentioned account is not in + acc_names + 7th entry: number of times an account that is replied to is not + in acc_names + + Notes: + The results are saved as JSON objects based on out_file_name + """ + + # initiate array with zeros for counting error occurences + warning_count = [0] * 7 + + # initiate empty result array for DayActivity objects all_day_activity_obj = [] + + # add remainder category to acc_names + acc_names.append("remainder") + all_day_activity_obj = [] + # for each message + for mess in json_file: + # # # check for specific message content # # # + + # if message contains specified substring (or None are specified) + if (mess_substring is None) or ( + any([ss in mess["message_content"] for ss in mess_substring]) + ): + # # # extract data # # # + + # obtain message date, channel and author and reply author + mess_date = mess["datetime"].strftime("%Y-%m-%d") + mess_hour = int(mess["datetime"].strftime("%H")) + mess_chan = mess["channel"] + mess_auth = mess["author"] + rep_auth = mess["replied_user"] + + reactions = parse_reaction(mess["reactions"]) + + try: + # obtain index of author in acc_names + auth_i = acc_names.index(mess_auth) + except Exception as exp: + # if author is not in acc_names, + # raise warning and add counts to remainder + print( + f"WARNING: author name {mess_auth} not found in acc_names", + f"Exception: {exp}", + ) + warning_count[0] += 1 + auth_i = -1 + + if rep_auth is not None: + try: + # obtain index of reply author in acc_names + rep_i = acc_names.index(rep_auth) + except Exception as exp: + # if author is not in acc_names, raise warning + # and add counts to remainder + print( + f"WARNING: author name {rep_auth} not found in acc_names", + f"Exception: {exp}", + ) + warning_count[6] += 1 + rep_i = -1 + else: + rep_i = None + + # # # obtain object index in object list # # # + + # see if an object exists with corresponding date and channel + (all_day_activity_obj, obj_list_i, warning_count) = get_obj_list_i( + all_day_activity_obj, mess_date, mess_chan, acc_names, warning_count + ) + + # # # count activity per hour # # # + + # count reactions + (n_reac, reacting_accs, warning_count) = count_reactions( + reactions, emoji_types, mess_auth, warning_count + ) + + # if there are any reacting accounts + if len(reacting_accs) > 0: + # for each reacting account + for r_a in reacting_accs: + # add reacting accounts + all_day_activity_obj[obj_list_i].reacted_per_acc[auth_i].append(r_a) + + # add n_reac to hour of message that received the emoji + all_day_activity_obj[obj_list_i].reacted[auth_i, mess_hour] += int(n_reac) + + # count raised warnings + warning_count[4] += count_from_list( + reacting_accs, + acc_names, + all_day_activity_obj[obj_list_i].reacter, + mess_hour, + ) + + # count mentions + (n_men, n_rep_men, mentioned_accs, warning_count) = count_mentions( + mess["user_mentions"], rep_auth, mess_auth, warning_count + ) + + # if there are any mentioned accounts + if len(mentioned_accs) > 0: + # for each mentioned account + for m_a in mentioned_accs: + # add mentioned accounts + all_day_activity_obj[obj_list_i].mentioner_per_acc[auth_i].append( + m_a + ) + + # if message was not sent in thread + if mess["threadId"] is None: + # if message is default message + if mess["mess_type"] == 0: + # add 1 to hour of message + all_day_activity_obj[obj_list_i].lone_messages[ + auth_i, mess_hour + ] += int(1) + + # add n_men to hour for message sender + all_day_activity_obj[obj_list_i].mentioner[ + auth_i, mess_hour + ] += int(n_men) + + # count raised warnings + warning_count[5] += count_from_list( + mentioned_accs, + acc_names, + all_day_activity_obj[obj_list_i].mentioned, + mess_hour, + ) + + # if message is reply + elif mess["mess_type"] == 19: + # store account name that replied + # for author of message that was replied to + all_day_activity_obj[obj_list_i].replied_per_acc[rep_i].append( + mess_auth + ) + + # add 1 to hour of message for replier + all_day_activity_obj[obj_list_i].replier[auth_i, mess_hour] += 1 + + # add 1 to hour of message for replied + all_day_activity_obj[obj_list_i].replied[rep_i, mess_hour] += 1 + + # add n_men to hour for message sender + all_day_activity_obj[obj_list_i].mentioner[ + auth_i, mess_hour + ] += int(n_men) + + # count raised warnings + warning_count[5] += count_from_list( + mentioned_accs, + acc_names, + all_day_activity_obj[obj_list_i].mentioned, + mess_hour, + ) + + # add n_rep_men to hour of message + all_day_activity_obj[obj_list_i].rep_mentioner[ + auth_i, mess_hour + ] += int(n_rep_men) + all_day_activity_obj[obj_list_i].rep_mentioned[ + rep_i, mess_hour + ] += int(n_rep_men) + + # if reply is to unknown account + # and this account got mentioned in the reply + if n_rep_men > 0 and rep_i == -1: + print( + "WARNING: acc name {} not found in acc_names".format( + rep_auth + ) + ) + warning_count[5] += 1 + + # if message was sent in thread + else: + # if message is default message + if mess["mess_type"] == 0: + # add 1 to hour of message + all_day_activity_obj[obj_list_i].thr_messages[ + auth_i, mess_hour + ] += int(1) + # add n_men to hour for message sender + all_day_activity_obj[obj_list_i].mentioner[ + auth_i, mess_hour + ] += int(n_men) + + # count raised warnings + warning_count[5] += count_from_list( + mentioned_accs, + acc_names, + all_day_activity_obj[obj_list_i].mentioned, + mess_hour, + ) + # if message is reply + elif mess["mess_type"] == 19: + # store account name that replied + # for author of message that was replied to + all_day_activity_obj[obj_list_i].replied_per_acc[rep_i].append( + mess_auth + ) + + # add 1 to hour of message for replier + all_day_activity_obj[obj_list_i].replier[auth_i, mess_hour] += 1 + + # add 1 to hour of message for replied + all_day_activity_obj[obj_list_i].replied[rep_i, mess_hour] += int(1) + + # add n_men to hour for message sender + all_day_activity_obj[obj_list_i].mentioner[ + auth_i, mess_hour + ] += int(n_men) + + # count raised warnings + warning_count[5] += count_from_list( + mentioned_accs, + acc_names, + all_day_activity_obj[obj_list_i].mentioned, + mess_hour, + ) + + # add n_rep_men to hour of message + all_day_activity_obj[obj_list_i].rep_mentioner[ + auth_i, mess_hour + ] += int(n_rep_men) + all_day_activity_obj[obj_list_i].rep_mentioned[ + rep_i, mess_hour + ] += int(n_rep_men) + + # if reply is to unknown account + # and this account got mentioned in the reply + if n_rep_men > 0 and rep_i == -1: + print( + "WARNING: acc name {} not found in acc_names".format( + rep_auth + ) + ) + warning_count[5] += 1 + + # # # store results # # # + # json_out_file = store_results_json([i.asdict() for i in \ + # all_day_activity_obj], out_file_name) + return (warning_count, [i.asdict() for i in all_day_activity_obj]) + + +# # # # # classes # # # # # + + +class DayActivity: + # define constructor + def __init__( + self, + date, + channel, + lone_messages, + thr_messages, + replier, + replied, + mentioner, + mentioned, + rep_mentioner, + rep_mentioned, + reacter, + reacted, + reacted_per_acc, + mentioner_per_acc, + replied_per_acc, + acc_names, + ): + self.date = date # date of object + self.channel = channel # channel id of object + # number of lone messages per hour per account + self.lone_messages = lone_messages + # number of thread messages per hour per account + self.thr_messages = thr_messages + self.replier = replier # number of replies sent per hour per account + # number of replies received per hour per account + self.replied = replied + self.mentioner = mentioner # number of mentions sent per hour per account + # number of mentions received per hour per account + self.mentioned = mentioned + # number of reply mentions sent per hour per account + self.rep_mentioner = rep_mentioner + # number of reply mentions received per hour per account + self.rep_mentioned = rep_mentioned + # number of reactions sent per hour per account + self.reacter = reacter + # number of reactions received per hour per account + self.reacted = reacted + # list of account names from which reactions + # are received per account (duplicates = multiple reactions) + self.reacted_per_acc = reacted_per_acc + # list of account names that are mentioned by + # account per account (duplicates = multiple mentions) + self.mentioner_per_acc = mentioner_per_acc + # list of account names from which replies are + # received per account (duplicates = multiple replies) + self.replied_per_acc = replied_per_acc + # account names (corresponds to row index of activity types) + self.acc_names = acc_names + + # # # functions # # # + + # turn object into dictionary + + def asdict(self): + return { + "date": self.date, + "channel": self.channel, + "lone_messages": self.lone_messages.tolist(), + "thr_messages": self.thr_messages.tolist(), + "replier": self.replier.tolist(), + "replied": self.replied.tolist(), + "mentioner": self.mentioner.tolist(), + "mentioned": self.mentioned.tolist(), + "rep_mentioner": self.rep_mentioner.tolist(), + "rep_mentioned": self.rep_mentioned.tolist(), + "reacter": self.reacter.tolist(), + "reacted": self.reacted.tolist(), + "reacted_per_acc": self.reacted_per_acc, + "mentioner_per_acc": self.mentioner_per_acc, + "replied_per_acc": self.replied_per_acc, + "acc_names": self.acc_names, + } + + +# # # # # functions # # # # # + + +def get_obj_list_i( + all_day_activity_obj, mess_date, mess_chan, acc_names, warning_count +): + """ + Assesses index of DayActivity object + + Input: + all_day_activity_obj - [obj]: list of DayActivity objects + mess_date - str: date in which message was sent yyyy-mm-dd + mess_chan - str: name of channel in which message was sent + num_rows - int: number of rows for count arrays in DayActivity + + Output: + all_day_activity_obj - [obj]: updated list of DayActivity objects + obj_list_i - int: index of DayActivity object in + all_day_activity_obj that corresponds to the message + + Notes: + if no corresponding DayActivity object is found in + all_day_activity_obj, a new DayActivity object is appended + """ + + # check if DayActivity object corresponding to mess_date and mess_chan exists + obj_overlap = [ + all( + [ + getattr(obj, "date", "Attribute does not exist")[0] == mess_date, + getattr(obj, "channel", "Attribute does not exist")[0] == mess_chan, + ] + ) + for obj in all_day_activity_obj + ] + + # if there is no object for the channel date combination + if not any(obj_overlap): + # create DayActivity object and add it to the list + all_day_activity_obj.append( + DayActivity( + [mess_date], + [mess_chan], + np.zeros((len(acc_names), 24), dtype=np.int16), + np.zeros((len(acc_names), 24), dtype=np.int16), + np.zeros((len(acc_names), 24), dtype=np.int16), + np.zeros((len(acc_names), 24), dtype=np.int16), + np.zeros((len(acc_names), 24), dtype=np.int16), + np.zeros((len(acc_names), 24), dtype=np.int16), + np.zeros((len(acc_names), 24), dtype=np.int16), + np.zeros((len(acc_names), 24), dtype=np.int16), + np.zeros((len(acc_names), 24), dtype=np.int16), + np.zeros((len(acc_names), 24), dtype=np.int16), + [[] for _ in range(len(acc_names))], + [[] for _ in range(len(acc_names))], + [[] for _ in range(len(acc_names))], + acc_names, + ) + ) + + # set list index for message + # TODO: Why it was -1? + obj_list_i = int(-1) + + else: + # set list index for message + obj_list_i = int(obj_overlap.index(True)) + + # see if object only occurs once and raise error if more than once + if sum(obj_overlap) > 1: + msg = "WARNING: duplicate DayActivity " + msg += "object, first entry in list is used" + print(msg) + warning_count[1] += 1 + + return all_day_activity_obj, obj_list_i, warning_count + + +# # # + + +def count_mentions(mess_mentions, replied_user, mess_auth, warning_count): + """ + Counts number of user mentions in a message + + Input: + mess_mentions - [str]: all user account names that are mentioned in + the message + replied_user - str: account name of author who is replied to if + message type is reply + mess_auth - str: message author + + Output: + n_men - int: number of mentions in message + n_rep_men - int: number of times the author of the message that is + replied to is mentioned in the message + reacting_accs - [str]: all account names that were mentioned + + Notes: + authors mentioning themselves are not counted + """ + + # set number of interactions to 0 + n_men = 0 + n_rep_men = 0 + mentioned_accs = [] + + # for each mentioned account + for mentioned in mess_mentions: + if mentioned is not None and len(mentioned) > 0: + # if mentioned account is the same as message author + if mentioned == mess_auth: + # print error and skip + msg = f"WARNING: {mess_auth} mentioned themselves. " + msg += "This is not counted" + print(msg) + warning_count[2] += 1 + + else: + # if mentioned account is not the account that was replied to + if mentioned != replied_user: + # add 1 to number of mentions + n_men += 1 + + # add mentioned account to mentioned_accs + mentioned_accs.append(mentioned) + + else: + # add 1 to number of replied mentions + n_rep_men = 1 + + return n_men, n_rep_men, mentioned_accs, warning_count + + +# # # + + +def count_reactions(mess_reactions, emoji_types, mess_auth, warning_count): + """ + Counts number of reactions to a message + + Input: + mess_reactions - [[str]]: list with a list for each emoji type, + containing the accounts that reacted with this emoji and the + emoji type (last entry of lists within list) + emoji_types - [str] or None: list of emoji types to be considered. + All emojis are considered when None + mess_auth - str: message author + warning_count - [int]: list with counts for warning types + + Output: + n_reac - int: number of emoji reactions to post + reacting_accs - [str]: all account names that sent an emoji (if + account sent >1 emoji, account name will be listed >1) + warning_count - [int]: upated list with counts for warning types + + notes: + emojis reacted by the author of the message are not counted but lead + to a warning instead + """ + # set number of reactions to 0 + n_reac = 0 + + # make empty list for all accounts that sent an emoji + reacting_accs = [] + + # for every emoji type + for emoji_type in mess_reactions: + # if reacting account is in acc_names and + # reacted emoji is part of emoji_types if defined + if emoji_types is None or emoji_type[-1] in emoji_types: + # for each account that reacted with this emoji + for reactor in emoji_type[:-1]: + # if the message author posted the emoji + if reactor == mess_auth: + # print error and skip + msg = f"WARNING: {mess_auth} reacted to themselves." + msg += " This is not counted" + print(msg) + warning_count[3] += 1 + + # if the reactor is not empty + elif len(reactor) > 0: + # add 1 to number of reactions + n_reac += 1 + + # store name of reactor + reacting_accs.append(reactor) + + return n_reac, reacting_accs, warning_count + + +# # # + + +def count_from_list(acc_list, acc_names, to_count, mess_hour): + """ + Adds counts per hour to accounts from list + + Input: + acc_list - [str]: all account names that should be counted (the + account is counted for each time it is in the list, allowing for + duplicates) + acc_names - [str]: account names for which activity should be + counted separately + to_count - [[int]]: activity type to be counted + mess_hour - int: hour at which message with activity was sent + + Output: + warning_count - int: number of times warning was raised + + Notes: + counts are added to DayActivity object under the to_count variable + """ + + # initiate warning count at 0 + warning_count = 0 + + # for each account + for acc in acc_list: + try: + # obtain index of account name in acc_names + acc_i = acc_names.index(acc) + + except Exception as exp: + # if acc is not in acc_names, raise warning and add count to remainder + msg = f"WARNING: acc name {acc} not found in acc_names" + msg += f", Exception: {exp}" + print(msg) + warning_count += 1 + acc_i = -1 + + # add 1 to hour of message for acc + to_count[acc_i, mess_hour] += int(1) + + return warning_count + + +# # # + + +def store_results_json(save_dict, file_name, print_out=False): + """ + Stores dictionary or list of dictionaries as JSON file + + Input: + save_dict - {}, [{}]: dictionary or list of dictionaries to be saved + file_name - str: name (including path) to where data is saved + print_out - bool: whether message should be printed confirming that + the data is saved + + Output: + out_file - JSON: JSON file with content from save_dict + + Notes: + JSON file is also saved in location specified by file_name + """ + + # initiate output file + with open(file_name, "w") as f: + # store results + json.dump(save_dict, f) + + # # save and close output file + # out_file.close() + + if print_out: + print("data saved at: " + file_name) diff --git a/discord_analyzer/analysis/analytics_interactions_script.py b/discord_analyzer/analysis/analytics_interactions_script.py new file mode 100644 index 0000000..837b870 --- /dev/null +++ b/discord_analyzer/analysis/analytics_interactions_script.py @@ -0,0 +1,271 @@ +import itertools +from datetime import datetime +from warnings import warn + +from numpy import zeros + + +def sum_interactions_features(cursor_list, dict_keys): + """ + sum the interactions per hour + Parameters: + ------------ + cursor_list : list + the db cursor returned and converted as list + dict_keys : list + the list of dictionary keys, representing the features in database + + Returns: + ---------- + summed_counts_per_hour : dictionary + the dictionary of each feature having summed + the counts per hour, the dictionary of features is returned + """ + + summed_counts_per_hour = {} + for key in dict_keys: + summed_counts_per_hour[key] = zeros(24) + + for key in dict_keys: + # the array of hours 0:23 + for data in cursor_list: + summed_counts_per_hour[key] += data[key] + + return summed_counts_per_hour + + +def per_account_interactions( + cursor_list, + dict_keys=["replier_accounts", "reacter_accounts", "mentioner_accounts"], +): + """ + get per account interactions as `mentioner_accounts`, + `reacter_accounts`, and `replier_accounts` (summing) + Parameters: + ------------ + cursor_list : list + the db cursor returned and converted as list + dict_keys : list + the list of dictionary keys, representing the features in database + + Returns: + ---------- + summed_per_account_interactions : dictionary + the dictionary of each feature having summed the counts per hour, + the dictionary of features is returned + """ + + data_processed = {} + all_interaction_accounts = {} + + # for each interaction + for k in dict_keys: + temp_dict = {} + # get the data of a key in a map + samples = list(map(lambda data_dict: data_dict[k], cursor_list)) + + # flatten the list + samples_flattened = list(itertools.chain(*samples)) + + for i, sample in enumerate(samples_flattened): + account_name = sample[0]["account"] + interaction_count = sample[0]["count"] + + if account_name not in temp_dict.keys(): + temp_dict[account_name] = interaction_count + else: + temp_dict[account_name] += interaction_count + + if account_name not in all_interaction_accounts.keys(): + all_interaction_accounts[account_name] = interaction_count + else: + all_interaction_accounts[account_name] += interaction_count + + data_processed[k] = refine_dictionary(temp_dict) + + data_processed["all_interaction_accounts"] = refine_dictionary( + all_interaction_accounts + ) + + summed_per_account_interactions = data_processed + + return summed_per_account_interactions + + +def refine_dictionary(interaction_dict): + """ + refine dictionary and add the account id to the dictionary + + Parameters: + ------------ + interaction_dict : dict + a dictionary like {'user1': 5, 'user2: 4} + keys are usernames and values are the count of each user interaction + + Returns: + ---------- + refined_dict : nested dictionary + the input refined like this + { + '0': { 'user1': 5 }, + '1': { 'user2': 4 } + } + """ + + refined_dict = {} + for idx, data_acc in enumerate(interaction_dict.keys()): + refined_dict[f"{idx}"] = { + "account": data_acc, + "count": interaction_dict[data_acc], + } + + return refined_dict + + +def filter_channel_name_id( + cursor_list, channel_name_key="channelName", channel_id_key="channelId" +): + """ + filter the cursor list retrieved from DB for channels and their ids + + Parameters: + ------------- + cursor_list : list of dictionaries + the retreived values of DB + channel_name_key : string + the name of channel_name field in DB + default is `channel` + channel_id_key : string + the name of channel_id field in DB + default is `channelId` + Returns: + ---------- + channels_id_dict : dictionary + a dictionary with keys as channel_id and values as channel_name + """ + channels_id_dict = {} + for ch_id_dict in cursor_list: + # the keys in dict are channel id + chId = ch_id_dict[channel_id_key] + # and the values of dict are the channel name + channels_id_dict[chId] = ch_id_dict[channel_name_key] + + return channels_id_dict + + +def filter_channel_thread( + cursor_list, + # channels_id, + # channels_id_name, + thread_name_key="threadName", + author_key="author", + message_content_key="content", + date_key="createdDate", +): + """ + create a dictionary of channels and threads for messages, + sorted by time ascending + + Note: The cursor_list `MUST` be sorted ascending. + + Parameters: + ------------ + cursor_list : list of dictionaries + the list of values in DB containing a thread and messages of authors + # channels_id : list + # a list of channels id + # minimum length of the list is 1 + # channels_id_name : dict + # the dictionary containing {`channelId`: `channel_name`} + thread_name_key : string + the name of the thread field in DB + author_key : string + the name of the author field in DB + message_content_key : string + the name of the message content field in DB + date_key : str + the key to check whether the data is descending or not + + Returns: + ---------- + channel_thread_dict : {str:{str:{str:str}}} + a dictionary having keys of channel names, + and per thread messages as dictionary + # An example of output can be like this: + { + “CHANNEL_NAME1” : + { + “THREAD_NAME1” : + { + “1:@user1”: “Example message 1”, + “2:@user2”: “Example message 2”, + … + }, + “THREAD_NAME2” : + {More example messages in same format}, …}, + “CHANNEL_NAME2” : + {More thread dictionaries with example messages in same format}, …}, + More channel dictionaries with thread dictionaries + with example messages in same format, + … + } + """ + # check the input is descending + date_check = datetime(1961, 1, 1) + for data in cursor_list: + msg_date = datetime.strptime(data[date_key], "%Y-%m-%d %H:%M:%S") + if msg_date >= date_check: + date_check = msg_date + continue + else: + warn("Messages is not ascending ordered!") + + # First we're filtering the records via their channel name + channels_dict = {} + # create an empty array of each channel + # for chId in channels_id: + for record in cursor_list: + ch = record["channelName"] + if ch not in channels_dict: + channels_dict[ch] = [record] + else: + channels_dict[ch].append(record) + + # filtering through the channel name field in dictionary + # for record in cursor_list: + # # chId = record["channelId"] + # # ch = channels_id_name[chId] + # channels_dict[ch].append(record) + + # and the adding the filtering of thread id + channel_thread_dict = {} + + # filtering threads + for ch in channels_dict.keys(): + channel_thread_dict[ch] = {} + # initialize the index + idx = 1 + for record in channels_dict[ch]: + # get the thread name + thread = record[thread_name_key] + + # if the thread wasn't available in dict + # then make a dictionary for that + if thread not in channel_thread_dict[ch].keys(): + # reset the idx for each thread + idx = 1 + # creating the first message + channel_thread_dict[ch][thread] = { + f"{idx}:{record[author_key]}": record[message_content_key] + } + + # if the thread was created before + # then add the author content data to the dictionary + else: + # increase the index for the next messages in thread + idx += 1 + channel_thread_dict[ch][thread][f"{idx}:{record[author_key]}"] = record[ + message_content_key + ] + + return channel_thread_dict diff --git a/discord_analyzer/analysis/compute_interaction_matrix_discord.py b/discord_analyzer/analysis/compute_interaction_matrix_discord.py new file mode 100644 index 0000000..e5de975 --- /dev/null +++ b/discord_analyzer/analysis/compute_interaction_matrix_discord.py @@ -0,0 +1,90 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# compute_interaction_matrix_discord.py +# +# Author Ene SS Rawa / Tjitse van der Molen + +from discord_analyzer.analysis.utils.activity import Activity +from discord_analyzer.DB_operations.mongodb_query import MongodbQuery +from numpy import ndarray + +from .utils.compute_interaction_mtx_utils import ( + generate_interaction_matrix, + prepare_per_account, +) + + +def compute_interaction_matrix_discord( + acc_names, + dates, + channels, + db_access, + activities: list[str] = [Activity.Mention, Activity.Reply, Activity.Reaction], +) -> dict[str, ndarray]: + """ + Computes interaction matrix from discord data + + Input: + -------- + acc_names - [str] : list of all account names to be considered for analysis + dates - [str] : list of all dates to be considered for analysis + channels - [str] : list of all channel ids to be considered for analysis + db_access - obj : database access object + activities - list[Activity] : + the list of activities to generate the matrix for + default is to include all 3 `Activity` types + minimum length is 1 + + Output: + --------- + int_mtx : dict[str, np.ndarray] + keys are representative of an activity + and the 2d matrix representing the interactions for the activity + """ + + feature_projection = { + "thr_messages": 0, + "lone_messages": 0, + "replier": 0, + "replied": 0, + "mentioner": 0, + "mentioned": 0, + "reacter": 0, + "reacted": 0, + "__v": 0, + "_id": 0, + } + + # intiate query + query = MongodbQuery() + + # set up query dictionary + query_dict = query.create_query_filter_account_channel_dates( + acc_names=acc_names, + channels=channels, + dates=dates, + date_key="date", + channel_key="channelId", + account_key="account_name", + ) + + # create cursor for db + cursor = db_access.query_db_find( + table="heatmaps", query=query_dict, feature_projection=feature_projection + ) + db_results = list(cursor) + + per_acc_query_result = prepare_per_account(db_results=db_results) + + # And now compute the interactions per account_name (`acc`) + int_mat = {} + # computing `int_mat` per activity + for activity in activities: + int_mat[activity] = generate_interaction_matrix( + per_acc_interactions=per_acc_query_result, + acc_names=acc_names, + activities=[activity], + ) + + return int_mat diff --git a/discord_analyzer/analysis/compute_member_activity.py b/discord_analyzer/analysis/compute_member_activity.py new file mode 100644 index 0000000..3e53443 --- /dev/null +++ b/discord_analyzer/analysis/compute_member_activity.py @@ -0,0 +1,344 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# member_activity_history.py +# +# Author Ene SS Rawa / Tjitse van der Molen + +from datetime import datetime, timedelta + +import networkx as nx +import numpy as np +from dateutil.relativedelta import relativedelta +from discord_analyzer.analysis.compute_interaction_matrix_discord import ( + compute_interaction_matrix_discord, +) +from discord_analyzer.analysis.member_activity_history import check_past_history +from discord_analyzer.analysis.utils.member_activity_history_utils import ( + MemberActivityPastUtils, +) +from discord_analyzer.analysis.utils.member_activity_utils import ( + convert_to_dict, + get_joined_accounts, + get_latest_joined_users, + get_users_past_window, + store_based_date, + update_activities, +) +from discord_analyzer.DB_operations.mongodb_access import DB_access +from tc_core_analyzer_lib.assess_engagement import EngagementAssessment +from tc_core_analyzer_lib.utils.activity import DiscordActivity + + +def compute_member_activity( + db_name, + connection_string, + channels, + acc_names, + date_range, + window_param, + act_param, + logging, + load_past_data=True, +): + """ + Computes member activity and member interaction network + + Input + db_name: (str) - guild id + connection_string: (str) - connection to db string + channels: [str] - list of all channel ids that should be analysed + acc_names: [str] - list of all account names that should be analysed + date_range: [str] - list of first and last date to be analysed (one output per date) + window_param: [int] - + entry 1: window size in days. default = 7 + entry 2: step size of sliding window in days. default = 1 + (Currently these values will be default values, + in the future, the user might be able to set these in the + extraction settings page) + act_param: [int] - + entry 1: INT_THR - int : + minimum number of interactions to be active. + Default = 1 + entry 2: UW_DEG_THR - int : + minimum number of connections to be active. + Default = 1 + entry 3: PAUSED_T_THR - int : + time period to remain paused. + Default = 1 + entry 4: CON_T_THR - int : + time period to assess consistently active. + Default = 4 + entry 5: CON_O_THR - int : + times to be active within CON_T_THR to be consistently active. + Default = 3 + entry 6: EDGE_STR_THR - int : + minimum number of interactions for connected. + Default = 5 + entry 7: UW_THR_DEG_THR - int : + minimum number of accounts for connected. + Default = 5 + entry 8: VITAL_T_THR - int : + time period to assess for vital. + Default = 4 + entry 9: VITAL_O_THR - int : + times to be connected within VITAL_T_THR to be vital. + Default = 3 + entry 10: STILL_T_THR - int : + time period to assess for still active. + Default = 3 + entry 11: STILL_O_THR - int : + times to be active within STILL_T_THR to be still active. + Default = 2 + entry 12: DROP_H_THR - int: + Default = 2 + entry 13: DROP_I_THR - int: + Default = 1 + (Currently these values will be default values, + in the future, the user might be able to adjust these) + + Output + network_dict: {datetime:networkx obj} - + dictionary with python datetime objects as keys and networkx graph + objects as values. + The keys reflect the last date of the WINDOW_D day window + over which the network was computed. + The values contain the computed networks. + activity_dict: {str:{str:set}} - + dictionary with keys reflecting each member activity type and + dictionaries as values. Each nested dictionary contains an index string as + key reflecting the number of STEP_D steps have been + taken since the first analysis period. The values in the nested dictionary + are python sets with account names that belonged to that category + in that period. The length of the set reflects the total number. + load_past_data : bool + whether to load past data or not, default is True + if True, will load the past data, if data was available in given range + """ + guild_msg = f"GUILDID: {db_name}:" + + # make empty results output array + + # # # DATABASE SETTINGS # # # + + # set up database access + db_access = DB_access(db_name, connection_string) + + # specify the features not to be returned + + # initiate result dictionary for network graphs + network_dict = {} + + # initiate result dictionaries for engagement types + activity_dict = { + "all_joined": {}, + "all_joined_day": {}, + "all_consistent": {}, + "all_vital": {}, + "all_active": {}, + "all_connected": {}, + "all_paused": {}, + "all_new_disengaged": {}, + "all_disengaged": {}, + "all_unpaused": {}, + "all_returned": {}, + "all_new_active": {}, + "all_still_active": {}, + "all_dropped": {}, + "all_disengaged_were_newly_active": {}, + "all_disengaged_were_consistently_active": {}, + "all_disengaged_were_vital": {}, + "all_lurker": {}, + "all_about_to_disengage": {}, + "all_disengaged_in_past": {}, + } + activities_name = list(activity_dict.keys()) + + if load_past_data: + # past_activities_date is the data from past activities + # new_date_range is defined to change the date_range with past data loaded + # starting_key is the starting key of actuall analysis + past_activities_data, new_date_range, starting_key = check_past_history( + db_access=db_access, + date_range=date_range, + collection_name="memberactivities", + window_param=window_param, + ) + else: + past_activities_data = {} + new_date_range = [ + datetime.strptime(date_range[0], "%y/%m/%d"), + datetime.strptime(date_range[1], "%y/%m/%d"), + ] + starting_key = 0 + + # if in past there was an activity, we'll update the dictionaries + if past_activities_data != {}: + activities = update_activities( + past_activities=past_activities_data, activities_list=activities_name + ) + activity_dict = convert_to_dict( + data=list(activities), dict_keys=activities_name + ) + + # if there was still a need to analyze some data in the range + # also if there was some accounts and channels to be analyzed + if new_date_range != []: + # all_joined data + + # if the date range wasn't as long as a date window, + # no analytics for the days would be computed + # so make it as a window_d lenght to have the computations + new_date_range_interval = (new_date_range[1] - new_date_range[0]).days + if new_date_range_interval < window_param[0] - 1: + interval_before = (new_date_range_interval) + (window_param[0] - 1) + new_date_range[0] = new_date_range[1] - timedelta(days=interval_before) + + member_activity_utils = MemberActivityPastUtils(db_access=db_access) + ( + activity_dict["all_joined"], + activity_dict["all_joined_day"], + ) = member_activity_utils.update_joined_accounts( + start_dt=new_date_range[0], + end_dt=new_date_range[1], + all_joined_day=activity_dict["all_joined_day"], + starting_key=starting_key, + window_d=window_param[0], + ) + + # # # DEFINE SLIDING WINDOW RANGE # # # + + # determine window start times + start_dt = new_date_range[0] + end_dt = new_date_range[1] + + time_diff = end_dt - start_dt + + # determine maximum start time (include last day in date_range) + last_start = time_diff - relativedelta(days=window_param[0] - 1) + + # # # ACTUAL ANALYSIS # # # + + assess_engagment = EngagementAssessment( + activities=[ + DiscordActivity.Mention, + DiscordActivity.Reply, + DiscordActivity.Reaction, + ], + activities_ignore_0_axis=[DiscordActivity.Mention], + activities_ignore_1_axis=[], + ) + + # for every window index + max_range = int(np.floor(last_start.days / window_param[1]) + 1) + # if max range was chosen negative, + # then we have to make it zero + # (won't affect the loop but will affect codes after it) + if max_range < 0: + max_range = 0 + if acc_names != [] and channels != []: + for w_i in range(max_range): + msg_info = "MEMBERACTIVITY ANALYTICS: PROGRESS" + msg = f"{guild_msg} {msg_info} {w_i + 1}/{max_range}" + logging.info(msg) + new_window_i = w_i + starting_key + + last_date = ( + new_date_range[0] + + relativedelta(days=window_param[1] * w_i) + + relativedelta(days=window_param[0] - 1) + ) + + # make list of all dates in window + date_list_w = [] + for x in range(window_param[0]): + date_list_w.append(last_date - relativedelta(days=x)) + + # make empty array for date string values + date_list_w_str = np.zeros_like(date_list_w) + + # turn date time values into string + for i in range(len(date_list_w_str)): + date_list_w_str[i] = date_list_w[i].strftime("%Y-%m-%d") + + window_start = last_date - relativedelta(days=window_param[0]) + + # updating account names for past 7 days + acc_names = get_users_past_window( + window_start_date=window_start.strftime("%Y-%m-%d"), + collection=db_access.db_mongo_client[db_name]["heatmaps"], + ) + + if acc_names == []: + time_window_str = f"{window_start.strftime('%Y-%m-%d')} - " + time_window_str += last_date.strftime("%Y-%m-%d") + logging.warning( + f"{guild_msg} No data for the time window {time_window_str}" + ) + logging.info( + """Getting latest joined instead! + So we could compute other activity types!""" + ) + + # will get 5 users just to make sure + # we could have empty outputs + acc_names = get_latest_joined_users(db_access, count=5) + + # obtain interaction matrix + int_mat = compute_interaction_matrix_discord( + acc_names, date_list_w_str, channels, db_access + ) + + # for each int_mat type + for key in list(int_mat.keys()): + # remove interactions with self + int_mat[key][np.diag_indices_from(int_mat[key])] = 0 + + # assess engagement + (graph_out, *activity_dict) = assess_engagment.compute( + int_mat=int_mat, + w_i=new_window_i, + acc_names=np.asarray(acc_names), + act_param=act_param, + WINDOW_D=window_param[0], + **activity_dict, + ) + + activity_dict = convert_to_dict( + data=list(activity_dict), dict_keys=activities_name + ) + + # make empty dict for node attributes + node_att = {} + + # store account names in node_att dict + for i, node in enumerate(list(graph_out)): + node_att[node] = acc_names[i] + + # assign account names in node_att to node attributes of graph_out + nx.set_node_attributes(graph_out, node_att, "acc_name") + + # store results in dictionary + network_dict[last_date] = graph_out + # else if there was no past data + else: + max_range = 0 + + start_dt = datetime.strptime(date_range[0], "%y/%m/%d") + end_dt = datetime.strptime(date_range[1], "%y/%m/%d") + + # get the accounts with their joining date + joined_acc_dict = get_joined_accounts( + db_access=db_access, date_range=[start_dt, end_dt + timedelta(days=1)] + ) + + activity_dict_per_date = store_based_date( + start_date=start_dt, + all_activities=activity_dict, + analytics_day_range=window_param[0] - 1, + joined_acc_dict=joined_acc_dict, + load_past=load_past_data, + empty_channel_acc=(len(channels) != 0 and len(acc_names) != 0), + ) + + return [network_dict, activity_dict_per_date] diff --git a/discord_analyzer/analysis/member_activity_history.py b/discord_analyzer/analysis/member_activity_history.py new file mode 100644 index 0000000..f50ba53 --- /dev/null +++ b/discord_analyzer/analysis/member_activity_history.py @@ -0,0 +1,141 @@ +# checking the past history of member activities + +# Importing libraries +import datetime + +from dateutil import parser +from discord_analyzer.analysis.utils.member_activity_history_utils import ( + MemberActivityPastUtils, +) + + +# the main script function +def check_past_history( + db_access, + date_range, + window_param, + collection_name="memberactivities", + verbose=False, +): + """ + check past member_activities history and + return if some analysis were available in db in the date_range + + Parameters: + ------------- + db_access: DB_access + the database access class that queries are called through it + date_range: list of strings + a list of length 2, the first index has the start of the interval + and the second index is end of the interval + *Note*: Each value of the array should be in the format of `str(%y/%m/%d)` + window_param : tuple of int + a tuple with length 2, first parameter is window length + and the second one is the step + collection_name: string + the collection of db to use + default is `memberactivities` + verbose : bool + whether to print the logs or not + + Returns: + ---------- + all_activity_data_dict : dictionary + the data for past activities + new_date_range : list + list of new date range in datetime format + because the last + maximum_key : int + the maximum key that the new data should start its data from + """ + # checking the inputs + if len(date_range) != 2: + raise ValueError( + f"""date_range should have the length of two, + first index is the start of the interval and the + second index is the end of the interval + its length is: {len(date_range)}""" + ) + + # the input date_range in format of datetime + # converting the dates into datetime format + date_format = "%y/%m/%d" + date_range_start = datetime.datetime.strptime(date_range[0], date_format) + date_range_end = datetime.datetime.strptime(date_range[1], date_format) + + member_act_past_utils = MemberActivityPastUtils(db_access=db_access) + + # creating the query + query = member_act_past_utils.create_past_history_query(date_range) + + # do not project the variables that we don't need + feature_projection = { + # 'first_end_date': 1, + # 'all_consistent': 1, + "_id": 0 + } + # sorting the results from past to now (ascending) + # sorting by `date` + sorting = ["date", 1] + + # quering the db now + cursor = db_access.query_db_find( + collection_name, query, feature_projection, sorting + ) + # getting a list of returned data + past_data_new_schema = list(cursor) + + # if any past data was available in DB + if past_data_new_schema != []: + if verbose: + print(past_data_new_schema) + + # db_analysis_start_date = parser.parse(past_data[0]['date']) + # db_analysis_start_date = date_range_start + db_analysis_end_date = parser.parse(past_data_new_schema[-1]["date"]) + + # days_after_analysis_start = ( + # db_analysis_end_date - db_analysis_start_date + # ).days + + past_data = member_act_past_utils.convert_back_to_old_schema( + past_data_new_schema, + date_range_start, + window_param=window_param, + ) + + else: + # db_analysis_start_date = None + db_analysis_end_date = None + + # the input date_range in format of datetime + # converting the dates into datetime format + date_format = "%y/%m/%d" + date_range_start = datetime.datetime.strptime(date_range[0], date_format) + date_range_end = datetime.datetime.strptime(date_range[1], date_format) + + # if for the requested date_range, its results were available in db + if (db_analysis_end_date is not None) and (date_range_start < db_analysis_end_date): + # refine the dates + # if the range end was smaller than the analysis end, + # then empty the new_date_range + # empty it, since all the requested analysis are available in db + if date_range_end <= db_analysis_end_date: + new_date_range = [] + else: + # start date would be the next day of the end day + new_date_range = [ + db_analysis_end_date + datetime.timedelta(days=1), + date_range_end, + ] + + all_activity_data_dict = past_data + # maximum key is used for having the key for future data + # maximum_key = days_after_analysis_start + 1 + maximum_key = len(past_data_new_schema) + else: + all_activity_data_dict = {} + new_date_range = [date_range_start, date_range_end] + maximum_key = 0 + + return all_activity_data_dict, new_date_range, maximum_key diff --git a/discord_analyzer/analysis/neo4j_analysis/__init__.py b/discord_analyzer/analysis/neo4j_analysis/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/discord_analyzer/analysis/neo4j_analysis/analyzer_node_stats.py b/discord_analyzer/analysis/neo4j_analysis/analyzer_node_stats.py new file mode 100644 index 0000000..5741669 --- /dev/null +++ b/discord_analyzer/analysis/neo4j_analysis/analyzer_node_stats.py @@ -0,0 +1,244 @@ +# analyzer whether a node is sender or receiver +import logging +from uuid import uuid1 + +import pandas as pd +from discord_analyzer.analysis.neo4j_utils.projection_utils import ProjectionUtils +from tc_neo4j_lib.neo4j_ops import Neo4jOps + + +class NodeStats: + def __init__(self, neo4j_ops: Neo4jOps, threshold: int = 2) -> None: + """ + initialize the Node status computations object + the status could be either one of `Sender`, `Receiver`, `Balanced` + + Parameters: + ------------- + gds : GraphDataScience + the gds instance to do computations on it + neo4j_ops : Neo4jOps + neo4j shared library instance to use + threshold : int + the threshold value to compute the stats + default is 2 meaning for the node + - If in_degrees > threhold * out_degree then it's frequent receive + - else if out_degrees > threhold * in_degree then it's frequent sender + - else it is balanced + + """ + self.gds = neo4j_ops.gds + self.driver = neo4j_ops.neo4j_driver + self.threshold = threshold + + def compute_stats(self, guildId: str, from_start: bool) -> None: + projection_utils = ProjectionUtils(gds=self.gds, guildId=guildId) + + # possible dates to do the computations + possible_dates = projection_utils.get_dates(guildId=guildId) + + # if we didn't want to compute from the day start + if not from_start: + computed_dates = self.get_computed_dates(projection_utils, guildId) + possible_dates = possible_dates - computed_dates + + for date in possible_dates: + try: + self.compute_node_stats_wrapper(projection_utils, guildId, date) + except Exception as exp: + msg = f"GUILDID: {guildId} " + logging.error( + f"{msg} node stats computation for date: {date}, exp: {exp}" + ) + + def compute_node_stats_wrapper( + self, projection_utils: ProjectionUtils, guildId: str, date: float + ): + """ + a wrapper for node stats computation process + we're doing the projection here and computing on that, + then we'll drop the pojection + + Parameters: + ------------ + projection_utils : ProjectionUtils + the utils needed to get the work done + guildId : str + the guild we want the temp relationships + between its members + date : float + timestamp of the relation + """ + # NATURAL relations direction degreeCentrality computations + graph_name = f"GraphStats_{uuid1()}" + + projection_utils.project_temp_graph( + guildId=guildId, + graph_name=graph_name, + weighted=True, + relation_direction="NATURAL", + date=date, + ) + natural_dc = self.gds.run_cypher( + f""" + CALL gds.degree.stream( + '{graph_name}', + {{ + relationshipWeightProperty: 'weight' + }} + ) + YIELD nodeId, score + RETURN gds.util.asNode(nodeId).userId AS userId, score + """ + ) + + reverse_dc = self.gds.run_cypher( + f""" + CALL gds.degree.stream( + '{graph_name}', + {{ + orientation: 'REVERSE', + relationshipWeightProperty: 'weight' + }} + ) + YIELD nodeId, score + RETURN gds.util.asNode(nodeId).userId AS userId, score + """ + ) + + df = self.get_date_stats(natural_dc, reverse_dc, threshold=self.threshold) + + self.save_properties_db(guildId, df, date) + _ = self.gds.run_cypher( + f""" + CALL gds.graph.drop( + "{graph_name}" + ) + """ + ) + + def get_computed_dates( + self, projection_utils: ProjectionUtils, guildId: str + ) -> set[float]: + """ + get the computed dates of our guild + """ + query = f""" + MATCH (:DiscordAccount) + -[r:INTERACTED_IN]->(g:Guild {{guildId: '{guildId}'}}) + WHERE r.status IS NOT NULL + RETURN r.date as computed_dates + """ + computed_dates = projection_utils.get_computed_dates(query=query) + + return computed_dates + + def get_date_stats( + self, sender_info: pd.DataFrame, reciever_info: pd.DataFrame, threshold: int + ) -> pd.DataFrame: + merged_df = pd.merge( + sender_info, reciever_info, on="userId", suffixes=("_S", "_R") + ) + # getting the ones that at least receiver or sender count + merged_df = merged_df[(merged_df["score_R"] != 0) | (merged_df["score_S"] != 0)] + + # Frequent Receiver + merged_df["freq_reciver"] = ( + merged_df["score_R"] > threshold * merged_df["score_S"] + ) + + # Frequent Sender + merged_df["freq_sender"] = ( + merged_df["score_S"] > threshold * merged_df["score_R"] + ) + + merged_df = self._compute_stats(merged_df) + + del merged_df["freq_reciver"] + del merged_df["freq_sender"] + del merged_df["score_R"] + del merged_df["score_S"] + + return merged_df + + def _compute_stats( + self, + merged_df: pd.DataFrame, + sender_col: str = "freq_sender", + receiver_col: str = "freq_reciver", + ) -> pd.DataFrame: + """ + get the final conclusion of user stats + the user must be either Receiver, Sender, or Balanceed + saving back to a column named `Balanced` + + Parameters: + ------------ + merged_df : pd.Dataframe + the dataframe that merged the degreeCentralities column + sender_col : str + column named representing the question of "is the user Sender?" + default is "freq_sender" + receiver_col : str + column named representing the question of "is the user Receiver?" + default is "freq_reciver" + + Returns: + --------- + merged_df : pd.DataFrame + returning the dataframe with a column named `stats` + """ + + stats = [] + for _, row in merged_df.iterrows(): + sender = row[sender_col] + receiver = row[receiver_col] + + if sender: + stats.append(0) + elif receiver: + stats.append(1) + elif not sender and not receiver: + stats.append(2) + else: + # S-> Sender + # R -> Receiver + logging.error("It isn't possible to have both S and R True!") + + merged_df["stats"] = stats + + return merged_df + + def save_properties_db( + self, guildId: str, user_status: pd.DataFrame, date: float + ) -> None: + """ + save user stats to their nodes + + Parameters: + ------------ + guildId : str + the guildId we're using + user_status : pd.DataFrame + dataframe containing `userId` and `stats` for each user + date : float + the date in timestamp format + """ + with self.driver.session() as session: + for _, row in user_status.iterrows(): + userId = row["userId"] + status = row["stats"] + + query = """ + MATCH (a:DiscordAccount {userId: $userId}) + MATCH (g:Guild {guildId: $guildId}) + MERGE (a) -[r:INTERACTED_IN { + date: $date + }] -> (g) + SET r.status = $status + """ + session.run( + query, userId=userId, guildId=guildId, status=status, date=date + ) + prefix = f"GUILDID: {guildId}: " + logging.info(f"{prefix}Node stats saved for the date: {date}") diff --git a/discord_analyzer/analysis/neo4j_analysis/centrality.py b/discord_analyzer/analysis/neo4j_analysis/centrality.py new file mode 100644 index 0000000..bf7bf17 --- /dev/null +++ b/discord_analyzer/analysis/neo4j_analysis/centrality.py @@ -0,0 +1,365 @@ +import logging +from typing import Literal + +import pandas as pd +from discord_analyzer.analysis.neo4j_metrics import Neo4JMetrics +from discord_analyzer.analysis.neo4j_utils.projection_utils import ProjectionUtils +from tc_neo4j_lib.neo4j_ops import Neo4jOps + + +class Centerality: + def __init__(self, neo4j_ops: Neo4jOps) -> None: + """ + centerality algorithms + """ + self.neo4j_ops = neo4j_ops + + def compute_degree_centerality( + self, + guildId: str, + direction: str, + from_start: bool, + **kwargs, + ) -> dict[float, dict[str, float]]: + """ + compute the weighted count of edges coming to a node + it would be based on the date + the computed_dates will be based on the + network decentrality metric computations + + Parameters: + ------------ + guildId : str + the user nodes of guildId + gds : GraphDataScience + the gds instance to interact with DB + direction : str + the direction of relation + could be `in_degree`, `out_degree`, `undirected` + from_start : bool + whether to compute everything from scratch + or continue the computations + kwargs : dict + node : str + the name of the node we're computing degree centrality + default is `DiscordAccount` + weighted : bool + assuming the edges as weighted or not + default is `True` + normalize : bool + whether to normalize the values or not + default is False, meaning values wouldn't be normalized + preserve_parallel : bool + preserve parallel relationships + or do not count 2 the parallel relations + default is `True` which means we do + count the parallel relationships as 2 + + Never use `preserve_parallel=True` with `weighted=True` because + it could produce wrong results, since we cannot sum weights with + parallel relationships + recompute_dates : set[datetime.timestamp] + the dates that must be included in computations + in another words, recompute analytics for that date + + Returns: + ---------- + degree_centerality : dict[float, dict[str, float]] + the degree centerality per date for each user + """ + + node = "DiscordAccount" if "node" not in kwargs.keys() else kwargs["node"] + weighted = True if "weighted" not in kwargs.keys() else kwargs["weighted"] + normalize = False if "normalize" not in kwargs.keys() else kwargs["normalize"] + preserve_parallel = ( + True + if "preserve_parallel" not in kwargs.keys() + else kwargs["preserve_parallel"] + ) + + recompute_dates = None + if "recompute_dates" in kwargs: + recompute_dates = kwargs["recompute_dates"] + + if weighted and not preserve_parallel: + logging.warn( + """preserver_parallel=False with weighted=True + could produce wrong results!""" + ) + + # determining one line of the query useing the direction variable + if direction == "in_degree": + query = f"MATCH (a:{node})<-[r:INTERACTED_WITH]-(b:{node})" + elif direction == "out_degree": + query = f"MATCH (a:{node})-[r:INTERACTED_WITH]->(b:{node})" + elif direction == "undirected": + query = f"MATCH (a:{node})-[r:INTERACTED_WITH]-(b:{node})" + + results = self.neo4j_ops.gds.run_cypher( + f""" + {query} + WHERE r.guildId = '{guildId}' + RETURN + a.userId as a_userId, + r.date as date, + r.weight as weight, + b.userId as b_userId + """ + ) + + dates_to_compute = set(results["date"].value_counts().index) + if not from_start: + projection_utils = ProjectionUtils(gds=self.neo4j_ops.gds, guildId=guildId) + + dates_to_compute = self._get_dates_to_compute( + projection_utils, dates_to_compute, guildId + ) + if recompute_dates is not None: + dates_to_compute = dates_to_compute.union(recompute_dates) + + degree_centerality = self.count_degrees( + computation_date=dates_to_compute, + results=results, + weighted=weighted, + normalize=normalize, + preserve_parallel=preserve_parallel, + ) + + return degree_centerality + + def _get_dates_to_compute( + self, + projection_utils: ProjectionUtils, + user_interaction_dates: set[float], + guildId: str, + ) -> set[float]: + """ + exclude available analyzed date + + Parameters: + ------------- + user_interaction_dates : set[float] + the date of interactions between users + guildId : str + the guildId to get computations date + """ + query = f""" + MATCH (g:Guild {{guildId: '{guildId}'}}) + -[r:HAVE_METRICS] -> (g) + WHERE r.decentralizationScore IS NOT NULL + RETURN r.date as computed_dates + """ + computed_dates = projection_utils.get_computed_dates(query) + + dates_to_compute = user_interaction_dates - computed_dates + + return dates_to_compute + + def count_degrees( + self, + computation_date: set[float], + results: pd.DataFrame, + weighted: bool, + normalize: bool, + preserve_parallel: bool, + ) -> dict[float, dict[str, float]]: + """ + count the degree of nodes + (the direction of the relation depends on the results) + + Parameters: + ------------- + results : pd.DataFrame + the results for userId, `interaction_date`, and `weight` of relations + computation_date : set[float] + the dates to compute the analytics + weighted : bool + whether to use the weights of the relationships and compute + the degrees weighted or not + True means assume relationships weighted + normalize : bool + whether to normalize the values or not + default is False, meaning values wouldn't be normalized + preserve_parallel : bool + do or do not count parallel relationships + if True, if would count the parallel relationships + + Returns: + ----------- + degree_centrality : dict[float, dict[str, float]] + the results per date degrees of each user + """ + per_date_acc_weights: dict[float, dict[str, float]] = {} + + userIds = set(results["a_userId"].value_counts().index).union( + results["b_userId"].value_counts().index + ) + + # a variable for normalizing + # saving max value of each date + date_max_values: dict[float, float] = {} + + for date in computation_date: + per_date_acc_weights[date] = {} + date_max_values[date] = 0 + # find the results for a specific date + results_per_date = results[results["date"] == date] + for user in userIds: + relation_users = [] + results_per_date_user = results_per_date[ + results_per_date["a_userId"] == user + ] + for _, row in results_per_date_user.iterrows(): + a_userId = row["a_userId"] + b_userId = row["b_userId"] + + relation = set([a_userId, b_userId]) + + # if we've counted the relation before + # and preserver_parallel is False + if relation in relation_users and not preserve_parallel: + continue + + if a_userId in per_date_acc_weights[date]: + per_date_acc_weights[date][a_userId] += ( + row["weight"] if weighted else 1 + ) + else: + per_date_acc_weights[date][a_userId] = ( + row["weight"] if weighted else 1 + ) + + # saving it not to repeat if preserve_parallel is False + relation_users.append(relation) + + # updating the max value + if date_max_values[date] < per_date_acc_weights[date][a_userId]: + date_max_values[date] = per_date_acc_weights[date][a_userId] + + degree_centrality = per_date_acc_weights + if normalize: + degree_centrality = self.normalize_degree_centrality( + per_date_acc_weights, date_max_values + ) + + return degree_centrality + + def normalize_degree_centrality( + self, + per_date_acc_weights: dict[float, dict[str, float]], + date_max_values: dict[float, float], + ) -> dict[float, dict[str, float]]: + """ + normalize the per_acc_date_weights of degree centrality + + Parameters: + ------------ + per_date_acc_weights : dict[float, dict[str, float]] + the results per date degrees of each user + first float is representing the date and second one is the weight + str is also the user + date_max_values : dict[float, float] + max values in each date + keys are dates and values are the maximum values + + Returns: + ---------- + per_date_acc_weights : dict[float, dict[str, float]] + the normalized version of `per_date_acc_weights` + """ + for date in per_date_acc_weights.keys(): + for user in per_date_acc_weights[date].keys(): + # normalizing the weight + per_date_acc_weights[date][user] = ( + per_date_acc_weights[date][user] / date_max_values[date] + ) + + return per_date_acc_weights + + def compute_network_decentrality( + self, + guildId: str, + from_start: bool, + save: bool = True, + weighted: bool = False, + ) -> dict[float, float | Literal[-1]]: + """ + compute the network decentrality over the date periods + + Parameters: + ------------- + guildId : str + the guildId that we want to compute the network decentraility + save : bool + save the results of network decentrality in db + default is `True` meaning we would save the results back to db + neo4j_ops : Neo4jOps + the utils instance to save the results + will be used if save=True, else a None value could be given + weighted : bool + wether to use the weights of each edge or not + default is `False` + + Returns: + --------- + network_decentrality : dict[float, float | Literal[-1]] + the decentrality over time + keys are timestamp in float format + values are the decenrality values + """ + + results_undirected = self.compute_degree_centerality( + guildId=guildId, + direction="undirected", + weighted=weighted, + normalize=True, + preserve_parallel=False, + from_start=from_start, + ) + + neo4j_metrics = Neo4JMetrics(self.neo4j_ops.gds) + + # saving each date network decentrality + network_decentrality: dict[float, float | Literal[-1]] = {} + for date in results_undirected.keys(): + centerality = list(results_undirected[date].values()) + network_decentrality[date] = neo4j_metrics.compute_decentralization( + centerality + ) + + if save: + self.save_decentralization_score(guildId, network_decentrality) + + return network_decentrality + + def save_decentralization_score( + self, + guildId: str, + decentrality_score: dict[float, float | Literal[-1]], + ) -> None: + """ + save network decentrality scores over time in the Guild node + + Parameters: + ------------- + guiildId : str + the guild that we're saving data into + decentrality_score : dict[float, float] + the network decentrality scores over time + """ + # preparing the queries + queries = [] + for date in decentrality_score.keys(): + query = f""" + MATCH (g: Guild {{guildId: '{guildId}'}}) + MERGE (g) -[r:HAVE_METRICS {{ + date: {date} + }}]-> (g) + SET r.decentralizationScore = {decentrality_score[date]} + """ + queries.append(query) + + self.neo4j_ops.store_data_neo4j( + queries, + message=f"GUILDID: {guildId}: Saving Network Decentrality:", + ) diff --git a/discord_analyzer/analysis/neo4j_analysis/local_clustering_coefficient.py b/discord_analyzer/analysis/neo4j_analysis/local_clustering_coefficient.py new file mode 100644 index 0000000..869cd8f --- /dev/null +++ b/discord_analyzer/analysis/neo4j_analysis/local_clustering_coefficient.py @@ -0,0 +1,161 @@ +import logging +from uuid import uuid1 + +from discord_analyzer.analysis.neo4j_utils.projection_utils import ProjectionUtils +from graphdatascience import GraphDataScience + + +class LocalClusteringCoeff: + def __init__(self, gds: GraphDataScience) -> None: + self.gds = gds + + def compute(self, guildId: str, from_start: bool = False) -> None: + """ + computing the localClusteringCoefficient + per date of each interaction and saving them in nodes + + + Parameters: + ------------ + gds : GraphDataScience + the python GraphDataScience instance + neo4j_analytics : Neo4JMetrics object + our written Neo4JMetrics class instance + use_names : bool + whether to add user names to results + if True, the userId will be added alongside nodeId in output + default is False + from_start : bool + whether to compute the metric from the first day or not + if True, then would compute from start + default is False + + Returns: + --------- + `None` + """ + projection_utils = ProjectionUtils(gds=self.gds, guildId=guildId) + + # Getting all possible dates + computable_dates = projection_utils.get_dates(guildId=guildId) + + computed_dates = self.get_computed_dates(projection_utils, guildId) + + # compute for each date + to_compute: set[float] + if from_start: + to_compute = computable_dates + else: + to_compute = computable_dates - computed_dates + + # for the computation date + for date in to_compute: + try: + self.local_clustering_computation_wrapper( + projection_utils=projection_utils, guildId=guildId, date=date + ) + except Exception as exp: + msg = f"GUILDID: {guildId} " + logging.error( + f"{msg}localClustering computation for date: {date}, exp: {exp}" + ) + + def local_clustering_computation_wrapper( + self, projection_utils: ProjectionUtils, guildId: str, date: float + ) -> None: + """ + a wrapper for local clustering coefficient computation process + we're doing the projection here and computing on that, + then we'll drop the pojection + + Parameters: + ------------ + projection_utils : ProjectionUtils + the utils needed to get the work done + guildId : str + the guild we want the temp relationships + between its members + date : float + timestamp of the relation + """ + graph_projected_name = f"GraphLocalClustering_{uuid1()}" + projection_utils.project_temp_graph( + guildId=guildId, + graph_name=graph_projected_name, + weighted=True, + date=date, + ) + + # get the results as pandas dataframe + self.compute_graph_lcc( + date=date, graph_name=graph_projected_name, guildId=guildId + ) + + # dropping the computed date + _ = self.gds.run_cypher( + f""" + CALL gds.graph.drop("{graph_projected_name}") + """ + ) + + def get_computed_dates( + self, projection_utils: ProjectionUtils, guildId: str + ) -> set[float]: + """ + get localClusteringCoeff computed dates + + Parameters: + ------------ + guildId : str + the guild we want the temp relationships + between its members + projection_utils : ProjectionUtils + the utils needed to get the work done + + Returns: + ---------- + computed_dates : set[float] + the computation dates + """ + # getting the dates computed before + query = f""" + MATCH (:DiscordAccount) + -[r:INTERACTED_IN]->(g:Guild {{guildId: '{guildId}'}}) + WHERE r.localClusteringCoefficient IS NOT NULL + RETURN r.date as computed_dates + """ + computed_dates = projection_utils.get_computed_dates(query) + + return computed_dates + + def compute_graph_lcc(self, date: float, graph_name: str, guildId: str) -> None: + """ + compute the localClusteringCoefficient for the given graph + and write the results back to the nodes + + Parameters: + ------------ + date : float + timestamp of the relation + graph_name : str + the operation would be done on the graph + guild : str + the guildId to save the data for it + """ + msg = f"GUILDID: {guildId}" + try: + _ = self.gds.run_cypher( + f""" + CALL gds.localClusteringCoefficient.stream( + "{graph_name}" + ) YIELD nodeId, localClusteringCoefficient + WITH + gds.util.asNode(nodeId) as userNode, + localClusteringCoefficient + MATCH (g:Guild {{guildId: '{guildId}'}}) + MERGE (userNode) -[r:INTERACTED_IN {{date: {date}}}]-> (g) + SET r.localClusteringCoefficient = localClusteringCoefficient + """ + ) + except Exception as exp: + logging.error(f"{msg} error in computing localClusteringCoefficient, {exp}") diff --git a/discord_analyzer/analysis/neo4j_metrics.py b/discord_analyzer/analysis/neo4j_metrics.py new file mode 100644 index 0000000..b23d6b3 --- /dev/null +++ b/discord_analyzer/analysis/neo4j_metrics.py @@ -0,0 +1,243 @@ +import os + +from discord_analyzer.analysis.neo4j_utils.compute_metrics import Neo4JMetrics +from dotenv import load_dotenv +from tc_neo4j_lib.neo4j_ops import Neo4jOps + + +def degree_centrality( + gds, + neo4j_analytics, + use_names=False, + drop_projection=True, + method="stream", + node="DiscordAccount", + relationship="INTERACTED", + relationship_orientation="NATURAL", + parallel_relationship=False, +): + """ + a sample function to show how to compute DegreeCenterality using neo4j_ops + Note: this function does not assume the relation over time + + + Parameters: + ------------ + gds : GraphDataScience + the python GraphDataScience instance + neo4j_analytics : Neo4JMetrics object + our written Neo4JMetrics class instance + use_names : bool + whether to add user names to results + if True, the userId will be added alongside nodeId in output + default is False + drop_projection : bool + drop the graph projection + default is True, which means the graph projections + will be dropped after metric computation + **Note:** Must drop the projection to be able to update results, + make it False if you want do something experimental. + method : str + whether `stream`, `stats`, `Mutate`, or `write`, default is `stream` + each has a special effect on the database, + see: https://neo4j.com/docs/graph-data-science/current/graph-catalog-node-ops/ + node : str + the node name we're computing the degree centrality for + NOTE: Important to have the node exactly like it is saved in DB. + relationship : str + the relationship name we're computing the degree centrality for + relationship_orientation : str + the relationship orientation to be assumed + either `NATURAL`, `REVERSE`, or `UNDIRECTED` + parallel_relationship : bool + whether to assume parallel relationship as one or the real count + if False, then for relationship like A -> B + and B->A the degree centrality of A and B will be 2 + else the degree centrality of A and B will be 1 + + Returns: + --------- + results : pandas dataframe + the results of metrics in pandas dataframe format + """ + + if relationship_orientation not in ["NATURAL", "REVERSE", "UNDIRECTED"]: + msg_prefix = "Wrong relationship orientation given" + msg_prefix += "should be either `NATURAL`, `REVERSE`, or `UNDIRECTED`!" + raise ValueError(f"{msg_prefix} Entered: {relationship_orientation}") + + # compute the total weight of each INTERACTED relationship + gds.run_cypher( + """MATCH (a:DiscordAccount) -[r:INTERACTED]-(:DiscordAccount) + SET r.total_weight= REDUCE(total=0, weight in r.weights | total + weight);""" + ) + + # make the relationship projection configs + relationship_projection = {} + + if parallel_relationship: + relationship_projection[f"{relationship}"] = { + "properties": {"total_weight": {"aggregation": "SUM"}}, + "orientation": f"{relationship_orientation}", + } + else: + relationship_projection[f"{relationship}"] = { + "orientation": f"{relationship_orientation}", + "properties": ["total_weight"], + } + + # first we have to apply the projection (will be saved in server memory) + G, _ = gds.graph.project("MyGraph", node, relationship_projection) + + configuration = None + if method == "write": + configuration = {"relationshipWeightProperty": "total_weight"} + + # get the results as pandas dataframe + results = neo4j_analytics.compute_degreeCenterality( + G, method=method, configuration=configuration + ) + + if use_names: + results["userId"] = results["nodeId"].apply( + lambda nodeId: dict(gds.util.asNode(nodeId))["userId"] + ) + + if drop_projection: + _ = gds.graph.drop(G) + + return results + + +def decenterialization_score(neo4j_analytics, centrality_scores): + """ + a sample function to show how the network decentrality can be computed + + Parameters: + ------------ + neo4j_analytics : Neo4JMetrics object + our written Neo4JMetrics class instance + centrality_scores : array + array of user centrality scores + + Returns: + --------- + network_decentrality : float + the decentrality score of network + """ + network_decentrality = neo4j_analytics.compute_decentralization(centrality_scores) + + return network_decentrality + + +if __name__ == "__main__": + load_dotenv() + + protocol = os.getenv("NEO4J_PROTOCOL") + host = os.getenv("NEO4J_HOST") + port = os.getenv("NEO4J_PORT") + db_name = os.getenv("NEO4J_DB") + + url = f"{protocol}://{host}:{port}" + + user, password = (os.getenv("NEO4J_USER"), os.getenv("NEO4J_PASSWORD")) + + neo4j_ops = Neo4jOps() + neo4j_ops.set_neo4j_db_info(db_name, url, user, password) + neo4j_ops.neo4j_database_connect() + + gds = neo4j_ops.gds + + neo4j_analytics = Neo4JMetrics(gds) + + results_degreeCenterality = degree_centrality( + gds, + neo4j_analytics=neo4j_analytics, + use_names=True, + drop_projection=True, + method="stream", + node="DiscordAccount", + relationship="INTERACTED", + relationship_orientation="UNDIRECTED", + parallel_relationship=True, + ) + + # finding the output relationship counts from a node + results_degreeCentrality_OUT = degree_centrality( + gds, + neo4j_analytics=neo4j_analytics, + use_names=True, + drop_projection=True, + method="stream", + node="DiscordAccount", + relationship="INTERACTED", + relationship_orientation="NATURAL", + # parallel_relationship = True + ) + # finding the input relationship counts to a node + results_degreeCentrality_IN = degree_centrality( + gds, + neo4j_analytics=neo4j_analytics, + use_names=True, + drop_projection=True, + method="stream", + node="DiscordAccount", + relationship="INTERACTED", + relationship_orientation="REVERSE", + # parallel_relationship = True + ) + + # what guilds to find isolated nodes + guildId_arr = ["123456789101112", "993163081939165234", "1012430565959553145"] + results_isolated_discordNodes = neo4j_analytics.compute_isolated_nodes( + guildId=guildId_arr + ) + results_isolation_fraction = neo4j_analytics.compute_isolated_nodes_fraction( + guildId=guildId_arr + ) + results_network_density = neo4j_analytics.compute_network_density( + guildId=guildId_arr + ) + + # adding the scores in and scores out + # to pandas dataframe of `results_degreeCenterality` + results_degreeCenterality["score_in"] = results_degreeCentrality_IN["score"] + results_degreeCenterality["score_out"] = results_degreeCentrality_OUT["score"] + results_degreeCenterality["score_undirected"] = results_degreeCenterality["score"] + + # normalizing undirected scores + results_degreeCenterality[ + "normalized_score_undirected" + ] = results_degreeCenterality["score"] / sum( + results_degreeCenterality["score"].values > 0 + ) + # the normalization over positive score_out + results_degreeCenterality["normalized_score_out"] = results_degreeCenterality[ + "score_out" + ] / sum(results_degreeCenterality["score_out"].values > 0) + # the normalization over positive score_in + results_degreeCenterality["normalized_score_in"] = results_degreeCenterality[ + "score_in" + ] / sum(results_degreeCenterality["score_in"].values > 0) + + results_decentralityScore = decenterialization_score( + neo4j_analytics=neo4j_analytics, + centrality_scores=results_degreeCenterality[ + "normalized_score_undirected" + ].values, + ) + + print("------------------ Degree Centerality ------------------") + print(results_degreeCenterality, "\n") + + print("------------------ Network Decentrality Score ------------------") + print(results_decentralityScore, "\n") + + print("------------------ Isolated Nodes ------------------") + print(f"Isolated Nodes in guilds: {guildId_arr}") + print(results_isolated_discordNodes, "\n") + print("Isolation fraction: ", results_isolation_fraction, "\n") + + print("------------------ Network Density ------------------") + print(f"Network Density for guilds: {guildId_arr}") + print(results_network_density) diff --git a/discord_analyzer/analysis/neo4j_utils/__init__.py b/discord_analyzer/analysis/neo4j_utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/discord_analyzer/analysis/neo4j_utils/compute_metrics.py b/discord_analyzer/analysis/neo4j_utils/compute_metrics.py new file mode 100644 index 0000000..0dea17b --- /dev/null +++ b/discord_analyzer/analysis/neo4j_utils/compute_metrics.py @@ -0,0 +1,310 @@ +# Computation of Neo4j analytics +from typing import Literal + +import numpy as np +from graphdatascience import GraphDataScience + + +class Neo4JMetrics: + def __init__(self, gds: GraphDataScience) -> None: + """ + computation of Neo4J metrics + + Parameters: + ------------ + gds : GraphDataScience + the GraphDataScience instance to query the DB + """ + self.gds = gds + + def compute_degreeCenterality(self, graphProjection, method, configuration=None): + """ + compute the degree decenterality metrics for the graphProjection + + Parameters: + ------------- + graphProjection : gds.graph.project + the graph projection to compute the local Clustering Coefficient on it + method : str + whether `stream`, `stats`, `Mutate`, or `write` + each has a special effect on the database, + https://neo4j.com/docs/graph-data-science/current/graph-catalog-node-ops/ + configuration : str or list or map + additional configurations for the gds_operator + default is `None` meaning no configurations is applied + + Returns: + ---------- + results : pandas dataframe + the result of gds.localClusteringCoefficient in pandas dataframe format + """ + + results = self._run_on_method( + gds_operator=self.gds.degree, + method=method, + graphProjection=graphProjection, + additional_configurations=configuration, + ) + + return results + + def compute_isolated_nodes( + self, guildId, nodeType="DiscordAccount", relType="INTERACTED" + ): + """ + retrieve the isolated nodes for one or more guilds + + Parameters: + ------------ + guildId : list of str + string id + minimum length must be 1 + nodeType : str + optional, default is `DiscordAccount` + relType : str + optional, default is `INTERACTED` + the relationship that would be assumed to compute the metric + + Returns: + --------- + isolated_nodes : pandas dataframe + the isolated nodes list + """ + if not isinstance(guildId, list): + raise ValueError( + f"guildId should be a list of string! Given type is: {type(guildId)}" + ) + if len(guildId) < 1: + msg = "guildId should be a list with minimum length of 1!" + raise ValueError(f"{msg} Given length is: {len(guildId)}") + + isolated_nodes = self.gds.run_cypher( + f""" + MATCH (isolated_nodes:{nodeType}) -[:IS_MEMBER]->(guild:Guild) + WHERE + NOT (isolated_nodes)-[:{relType}]-() AND + guild.guildId IN {guildId} + RETURN DISTINCT (isolated_nodes).userId AS userId + """ + ) + + return isolated_nodes + + def compute_isolated_nodes_fraction( + self, guildId, nodeType="DiscordAccount", relType="INTERACTED" + ): + """ + retrieve the count isolated nodes divided by all nodes for one or more guilds + + Parameters: + ------------ + guildId : list of str + string id + minimum length must be 1 + nodeType : str + optional, default is `DiscordAccount` + relType : str + optional, default is `INTERACTED` + the relationship that would be assumed to compute the metric + + Returns: + --------- + isolation_fraction : float + the fraction of isolation in network + """ + if not isinstance(guildId, list): + raise ValueError( + f"guildId should be a list of string! Given type is: {type(guildId)}" + ) + if len(guildId) < 1: + msg = "guildId should be a list with minimum length of 1!" + raise ValueError(f"{msg} Given length is: {len(guildId)}") + + result = self.gds.run_cypher( + f""" + MATCH (isolated_nodes:{nodeType}) -[:IS_MEMBER]->(guild:Guild) + WHERE not (isolated_nodes)-[:{relType}]-() AND guild.guildId in {guildId} + WITH COUNT(isolated_nodes) * 1.0 as isolated_nodes_count + MATCH (nodes:DiscordAccount) -[:IS_MEMBER]-> (guild:Guild) + WHERE guild.guildId in {guildId} + + WITH COUNT(nodes) as all_nodes_count, isolated_nodes_count + CALL apoc.when( + all_nodes_count = 0, + 'RETURN 0 AS isolation_fraction', + 'RETURN $isolated_nodes_count / $all_nodes_count AS isolation_fraction', + {{ + isolated_nodes_count: isolated_nodes_count, + all_nodes_count: all_nodes_count + }} + ) YIELD value + RETURN value.isolation_fraction as isolation_fraction + """ + ) + + # getting the float value from + # a one row dataframe with column name `isolation_fraction` + return result["isolation_fraction"].values[0] + + def compute_network_density( + self, guildId, nodeType="DiscordAccount", relType="INTERACTED" + ): + """ + compute network density for one or more guilds + + Parameters: + ------------ + guildId : list of str + string id + minimum length must be 1 + nodeType : str + optional, default is `DiscordAccount` + relType : str + optional, default is `INTERACTED` + the relationships that would be count to compute the metric + + Returns: + --------- + network_density : float + the fraction of isolation in network + """ + if not isinstance(guildId, list): + raise ValueError( + f"guildId should be a list of string! Given type is: {type(guildId)}" + ) + if len(guildId) < 1: + msg = "guildId should be a list with minimum length of 1!" + raise ValueError(f"{msg} Given length is: {len(guildId)}") + + result = self.gds.run_cypher( + f""" + MATCH (nodes:{nodeType}) -[:IS_MEMBER]->(guild:Guild) + WHERE guild.guildId in {guildId} + WITH + COUNT(DISTINCT(nodes)) * 1.0 * (COUNT(DISTINCT(nodes)) - 1) * 2 + AS potential_connection_count + MATCH (nodes)-[r:{relType}]-() + WITH COUNT(DISTINCT(r)) * 1.0 AS actual_connection_count, + potential_connection_count + RETURN + actual_connection_count / potential_connection_count + AS network_density + """ + ) + + # getting the float value from a one row dataframe + # with column name `network_density` + return result["network_density"].values[0] + + def compute_decentralization(self, centrality: list[float]) -> float | Literal[-1]: + """ + Computes degree decentralization score of a graph + Note: the degreeCenterality must be computed before to comute descenterality + + Parameters: + ------------- + centrality : list[float] + list of centrality scores per node + + Returns: + ---------- + network_decentrality : float + the decentrality score + """ + # converting to numpy + centrality_np = np.array(centrality) + + # get number of non-zero values in list + n_val_nonzero = len(centrality_np[centrality_np != 0]) + # n_val = float(len(centrality)) + + # define denominator + c_denominator = (n_val_nonzero - 1) * (n_val_nonzero - 2) + + # get max centrality + c_node_max = max(centrality) + + # sort centrality scores + c_sorted = sorted(centrality, reverse=True) + + # initate c_numerator at 0 + c_numerator: float = 0.0 + + # for each sorted score + for value in c_sorted: + # computing over the positive values + if value != 0: + # remove normalisation for each value + c_numerator += c_node_max * (n_val_nonzero - 1) - value * ( + n_val_nonzero - 1 + ) + if c_denominator != 0: + # compute network centrality + network_centrality = float(c_numerator / c_denominator) + + # compute network decentrality + network_decentrality = 2 * (100 - (network_centrality * 100)) + else: + # setting `-1` + network_decentrality = -1 + + return network_decentrality + + def _run_on_method( + self, gds_operator, method, graphProjection, additional_configurations=None + ): + """ + run the gds_operation with the method `stream`, `stats`, `Mutate`, or `write` + + Parameters: + ------------ + gds_operator : gds.#some operation + the graph datascience operation + method : str + whether `stream`, `stats`, `mutate`, or `write` + each has a special effect on the database, + https://neo4j.com/docs/graph-data-science/current/graph-catalog-node-ops/ + graphProjection : gds.graph.project + the graph projection to compute the local Clustering Coefficient on it + additional_configurations : str or list or map + additional configurations for the gds_operator + default is `None` meaning no configurations is used + + Returns: + ------------ + results : pandas dataframe + the result of gds operation in pandas dataframe format + """ + if method == "stream": + if additional_configurations is not None: + results = gds_operator.stream( + graphProjection, additional_configurations + ) + else: + results = gds_operator.stream(graphProjection) + + elif method == "stats": + if additional_configurations is not None: + results = gds_operator.stats(graphProjection, additional_configurations) + else: + results = gds_operator.stats(graphProjection) + + elif method == "mutate": + if additional_configurations is not None: + results = gds_operator.mutate( + graphProjection, additional_configurations + ) + else: + results = gds_operator.mutate(graphProjection) + + elif method == "write": + if additional_configurations is not None: + results = gds_operator.write(graphProjection, additional_configurations) + else: + results = gds_operator.write(graphProjection) + else: + prefix_msg = "Invalid method name, " + prefix_msg += "should be either `stream`, `stats`, `mutate`, or `write`" + raise ValueError(f"{prefix_msg}\n given: {method} ") + + return results diff --git a/discord_analyzer/analysis/neo4j_utils/projection_utils.py b/discord_analyzer/analysis/neo4j_utils/projection_utils.py new file mode 100644 index 0000000..070714d --- /dev/null +++ b/discord_analyzer/analysis/neo4j_utils/projection_utils.py @@ -0,0 +1,140 @@ +import logging + +from graphdatascience import GraphDataScience + + +class ProjectionUtils: + def __init__(self, gds: GraphDataScience, guildId: str) -> None: + self.gds = gds + self.guildId = guildId + + def project_temp_graph( + self, + guildId: str, + graph_name: str, + **kwargs, + ) -> None: + """ + project a temperory graph on the INTERACTED_WITH relations + + Parameters: + ------------ + guildId : str + the guildId we want to do the projection + graph_name : str + the name we want to name the projected graph + **kwargs : + weighted : bool + whether to do the projection weighted or not + default is False which means it doesn't include + `weight` property of the graph + relation_direction : str + either `NATURAL`, `REVERSE`, `UNDIRECTED` + default is `UNDIRECTED` + projection_query : str + the projection query for nodes `a` and `b` and the relation `r` + default is + `MATCH (a:DiscordAccount) + -[r:INTERACTED_WITH {{guildId: '{guildId}'}}]-> + (b:DiscordAccount)` + date : float + if we want to include date in the graph projection query + """ + # getting kwargs + weighted = False + if "weighted" in kwargs: + weighted = kwargs["weighted"] + + relation_direction = "UNDIRECTED" + if "relation_direction" in kwargs: + relation_direction = kwargs["relation_direction"] + + projection_query: str + if "date" in kwargs: + date = kwargs["date"] + projection_query = f"""MATCH (a:DiscordAccount) + -[r:INTERACTED_WITH {{guildId: '{guildId}', date: {date}}}]-> + (b:DiscordAccount) """ + else: + projection_query = f"""MATCH (a:DiscordAccount) + -[r:INTERACTED_WITH {{guildId: '{guildId}'}}]-> + (b:DiscordAccount) """ + + if "projection_query" in kwargs: + projection_query = kwargs["projection_query"] + + rel_direction = None + if relation_direction == "NATURAL": + # empty str + rel_direction = "" + elif relation_direction == "UNDIRECTED": + rel_direction = ",{undirectedRelationshipTypes: ['*']}" + elif relation_direction == "REVERSE": + rel_direction = ",{inverseIndexedRelationshipTypes: ['*']}" + else: + logging.error("Wrong relation_direction given as input") + logging.error(f"Given is: {relation_direction}, defaulting to UNDIRECTED") + rel_direction = ",{undirectedRelationshipTypes: ['*']}" + + # initializing it + rel_properties = None + + if weighted: + # the relation properties to include + rel_properties = "{.date, .weight}" + else: + rel_properties = "{.date}" + + _ = self.gds.run_cypher( + f""" + {projection_query} + WITH gds.graph.project( + "{graph_name}", + a, + b, + {{ + relationshipProperties: r {rel_properties} + }} + {rel_direction} + ) AS g + RETURN + g.graphName AS graph, g.nodeCount AS nodes, g.relationshipCount AS rels + """ + ) + + def get_dates(self, guildId: str) -> set[float]: + """ + get all the dates we do have on the INTERACTED_WITH relations + + Parameters: + ------------ + guildId : str + the guild we do want the dates of relations + """ + dates = self.gds.run_cypher( + f""" + MATCH (a:DiscordAccount) + -[r:INTERACTED_WITH {{guildId: '{guildId}'}}]-() + WITH DISTINCT(r.date) as dates + RETURN dates + """ + ) + computable_dates_set = set(dates["dates"].values) + + return computable_dates_set + + def get_computed_dates(self, query: str) -> set[float]: + """ + get the computed metric dates for that specific query + + Parameters: + ------------- + query : str + the query to get the computed dates of a metric + must have one return results with label of computed_dates + first one is date + """ + dates = self.gds.run_cypher(query) + computed_dates = set(dates["computed_dates"].values) + + return computed_dates diff --git a/discord_analyzer/analysis/utils/__init__.py b/discord_analyzer/analysis/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/discord_analyzer/analysis/utils/activity.py b/discord_analyzer/analysis/utils/activity.py new file mode 100644 index 0000000..3b6fc4c --- /dev/null +++ b/discord_analyzer/analysis/utils/activity.py @@ -0,0 +1,8 @@ +class Activity: + """ + enum class + """ + + Reply = "reply" + Mention = "mention" + Reaction = "reaction" diff --git a/discord_analyzer/analysis/utils/compute_interaction_mtx_utils.py b/discord_analyzer/analysis/utils/compute_interaction_mtx_utils.py new file mode 100644 index 0000000..f3d8636 --- /dev/null +++ b/discord_analyzer/analysis/utils/compute_interaction_mtx_utils.py @@ -0,0 +1,129 @@ +import logging +from typing import Any + +import numpy as np +from discord_analyzer.analysis.analytics_interactions_script import ( + per_account_interactions, +) +from discord_analyzer.analysis.utils.activity import Activity + + +def prepare_per_account(db_results: list) -> dict[str, list[dict]]: + """ + convert the db_results into per account results + + Parameters: + ------------ + db_results : list[Any] + the results gotten from heatmaps + + Returns: + --------- + per_acc_query_result : dict[str, list[dict]] + per account results + key is the account name + and values are the docuemnts of database + """ + # Cetegorize per account_name + per_acc_query_result: dict[str, list[dict]] = {} + + # a dictionary for results of each account + for db_record in db_results: + # if the data for a specific account was not created before, create one as list + acc_name = db_record["account_name"] + if acc_name not in per_acc_query_result.keys(): + per_acc_query_result[acc_name] = [db_record] + # else, append + else: + per_acc_query_result[acc_name].append(db_record) + + return per_acc_query_result + + +def generate_interaction_matrix( + per_acc_interactions: dict[str, list[Any]], + acc_names: list[str], + activities: list[str], +) -> np.ndarray: + """ + generate interaction matrix for account interactions + + Parameters: + ------------ + per_acc_interactions : dict[str, list[Any]] + dictionary of per account interactions + keys are the account names + values are the interactions for that account + acc_names : [str] + list of all account names to be considered for analysis + activities : list[str] + the activities to include for generating interaction matrix + min length is 1 + + Returns: + --------- + int_matrix : np.ndarray + an array of integer values + each row and column are representative of account interactions + """ + + int_matrix = np.zeros((len(acc_names), len(acc_names)), dtype=np.uint16) + + for acc in per_acc_interactions.keys(): + db_res_per_acc = per_acc_interactions[acc] + + dict_keys = prepare_interaction_field_names(activities=activities) + # get results from db + db_results = per_account_interactions( + cursor_list=db_res_per_acc, + dict_keys=dict_keys, + ) + + # obtain results for all interactions summed together + acc_out_int = db_results["all_interaction_accounts"] + + # for each interacting account + for int_acc in acc_out_int.values(): + # if the interacting account is in acc_names + if int_acc["account"] in acc_names: + # store data in int_network + int_matrix[ + np.where(np.array(acc_names) == acc)[0][0], + np.where(np.array(acc_names) == int_acc["account"])[0][0], + ] = int_acc["count"] + + return int_matrix + + +def prepare_interaction_field_names(activities: list[str]) -> list[str]: + """ + convert activity names to the field names + as are saved under the heatmaps collection + + + Parameters: + ------------ + activities : list[str] + the activities to be converted to db field names + could be the items below + - `mention` + - `reply` + - `reaction` + + Returns: + --------- + field_names : list[str] + the field names from database to use + """ + field_names = [] + for activity in activities: + if activity == Activity.Mention: + field_names.append("mentioner_per_acc") + elif activity == Activity.Reply: + field_names.append("replied_per_acc") + elif activity == Activity.Reaction: + field_names.append("reacted_per_acc") + else: + logging.warning("prepare_interaction_field_names: Wrong activity given!") + + return field_names diff --git a/discord_analyzer/analysis/utils/member_activity_history_utils.py b/discord_analyzer/analysis/utils/member_activity_history_utils.py new file mode 100644 index 0000000..2ce63fe --- /dev/null +++ b/discord_analyzer/analysis/utils/member_activity_history_utils.py @@ -0,0 +1,385 @@ +import logging +from datetime import datetime, timedelta +from typing import Any + +from dateutil import parser +from discord_analyzer.DB_operations.mongodb_access import DB_access +from numpy import array + + +class MemberActivityPastUtils: + def __init__(self, db_access: DB_access) -> None: + self.db_access = db_access + + def update_joined_accounts( + self, + start_dt: datetime, + end_dt: datetime, + all_joined_day: dict[str, set[str]], + starting_key: int, + window_d: int = 7, + ): + """ + Parameters: + ----------- + start_dt : datetime + the starting point of looking into joined accounts + end_dt : datetime + the ending point of looking into joined accounts + all_joined_day : dict[str, set[str]] + dictionary of `all_joined_day` from before + we should update this dict based on the new joined accounts + difference between this one and `all_joined` is + `all_joined` is for past `window_d` days but `all_joined_day` + is for users joining for just the day + starting_key : int + the starting key to add the joined accounts + window_d : int + the window days to include days + default is 7 days + + + Returns: + --------- + all_joined : dict[str, set[str]] + the updated joined dictionary for past 7 days + all_joined_day : dict[str, set[str]] + the updated joined dictionary for past 1 day + """ + # to get the data in end_date we should plus it to one + joined_acc = self._get_joined_accounts( + date_range=[start_dt, end_dt + timedelta(days=1)] + ) + + all_joined_day = self.update_all_joined_day( + start_dt, end_dt, all_joined_day, starting_key, joined_acc + ) + + all_joined = self.get_users_past_days(all_joined_day, window_d) + + return all_joined, all_joined_day + + def get_users_past_days( + self, all_joined_day: dict[str, set[str]], window_d: int + ) -> dict[str, set[str]]: + """ + get the users from past `window_d` days + + Parameters: + -------------- + all_joined_day : dict[str, set[str]] + the users joining in one day + window_d : int + the number of days look into past for getting users + + Returns: + ---------- + all_joined : dict[str, list[str]] + the users joining in, withing `window_d` past days + """ + all_joined: dict[str, set[str]] = {} + + # looping up to the max key + loop_max = array(list(all_joined_day.keys()), dtype=int).max() + 1 + + for day_idx in range(loop_max): + # how mAny days to look for past joined members + look_past = None + if day_idx - window_d > 0: + look_past = day_idx - window_d + else: + look_past = 0 + + joined_members_idx = array(range(look_past, day_idx + 1), dtype=str) + all_joined[str(look_past + 1)] = set() + for idx in joined_members_idx: + all_joined[str(look_past + 1)] = all_joined[str(look_past + 1)].union( + all_joined_day[idx] + ) + + return all_joined + + def update_all_joined_day( + self, + start_dt: datetime, + end_dt: datetime, + all_joined_day: dict[str, set[str]], + starting_key: int, + joined_acc: list[dict[str, str]], + ) -> dict[str, set[str]]: + """ + update the all_joined_day dict with new retrieved data + + Parameters: + ----------- + start_dt : datetime + the starting point of looking into joined accounts + end_dt : datetime + the ending point of looking into joined accounts + all_joined_day : dict[str, set[str]] + dictionary of joined accounts from before + we should update this dict based on the new joined accounts + starting_key : int + the starting key to add the joined accounts + joined_acc : list[dict[str, str]] + list of retrieved data from db + it is a list of dicionaries, each has the keys of + `joinedAt`, and `discordId` + + Returns: + --------- + all_joined_day : dict[str, set[str]] + the updated joined dictionary + """ + + for i in range(0, (end_dt - start_dt).days + 1): + date = (start_dt + timedelta(days=i)).date() + joined_accounts = self._get_accounts_per_date(joined_acc, date) + + date_index = i + starting_key + all_joined_day[str(date_index)] = set(joined_accounts) + + return all_joined_day + + def create_past_history_query(self, date_range): + """ + create a query to retreive the data that are not analyzed + + Parameters: + ------------- + date_range: list + a list of length 2, the first index has the start of the interval + and the second index is end of the interval + + Returns: + ---------- + query : dictionary + the query representing the dictionary of filters + """ + date_interval_start = datetime.strptime(date_range[0], "%y/%m/%d").isoformat() + date_interval_end = datetime.strptime(date_range[1], "%y/%m/%d").isoformat() + + query = { + "date": { + # the given date_range in script analysis + "$gte": date_interval_start, + "$lte": date_interval_end, + } + } + + return query + + def convert_back_to_old_schema(self, retrieved_data, date_start, window_param): + """ + convert the retrieved data back to the old schema we had, to do the analysis + + Parameters: + --------------- + retrieved_data : array + array of db returned records + date_start : datetime + the starting point of analysis + days_after_analysis_start : int + the day count after analysis which are available in DB + window_param : tuple of int with len 2 + + Returns: + ---------- + activity_dict : dict + the data converted to the old db schema + """ + # make empty result dictionary + activity_dict = {} + + # store results in dictionary + activity_dict["all_joined"] = {} + activity_dict["all_joined_day"] = {} + activity_dict["all_consistent"] = {} + activity_dict["all_vital"] = {} + activity_dict["all_active"] = {} + activity_dict["all_connected"] = {} + activity_dict["all_paused"] = {} + activity_dict["all_new_disengaged"] = {} + activity_dict["all_disengaged"] = {} + activity_dict["all_unpaused"] = {} + activity_dict["all_returned"] = {} + activity_dict["all_new_active"] = {} + activity_dict["all_still_active"] = {} + activity_dict["all_dropped"] = {} + activity_dict["all_disengaged_were_vital"] = {} + activity_dict["all_disengaged_were_newly_active"] = {} + activity_dict["all_disengaged_were_consistently_active"] = {} + activity_dict["all_lurker"] = {} + activity_dict["all_about_to_disengage"] = {} + activity_dict["all_disengaged_in_past"] = {} + + for idx in range(len(retrieved_data)): + db_record = retrieved_data[idx] + parser.parse(db_record["date"]) - timedelta(days=window_param[0]) + + for activity in activity_dict.keys(): + try: + if db_record[activity] != []: + # creating a dictionary of users + users_name = db_record[activity] + # make a dictionary of indexes and users for + # a specific activity in an specific day + activity_dict[activity][str(idx)] = set(users_name) + else: + activity_dict[activity][str(idx)] = set("") + except KeyError: + logging.error( + f"KeyError: the key {activity} is not available in DB record!" + ) + except Exception as exp: + logging.error(str(exp)) + + activity_dict["first_end_date"] = ( + date_start - timedelta(days=window_param[0]) + ).isoformat() + + return activity_dict + + def _get_accounts_per_date( + self, joined_acc, date, date_key="joinedAt", account_key="discordId" + ): + """ + get the accounts for a special date + + Parameters: + ------------- + joined_acc : list(dict[str, Any]) + joined account retreived from database + it must be sorted by joinDate! + date : datetime.date + the date that we're going to retrieve accounts from + date_key : str + the key used to represent the date of user join + account_key : str + the key used to represent the account name + + Returns: + --------- + account_names : list(str) + the list of accounts + """ + account_names = [] + + for account in joined_acc: + account_join_date = account[date_key].date() + if account_join_date == date: + account_names.append(account[account_key]) + + return account_names + + def _get_joined_accounts(self, date_range) -> list[dict[str, Any]]: + """ + get the joined accounts for a time interval to a date range + + Parameters: + ------------- + date_range : tuple of datetime + a tuple with length 2 + in the first index we save the starting date + in the second date we would save the end date + + Returns: + ---------- + data : list of dictionaries + an array of dictionaries + each dictionary has `account` and `joinDate` member + """ + query = {"joinedAt": {"$gte": date_range[0], "$lte": date_range[1]}} + feature_projection = {"joinedAt": 1, "discordId": 1, "_id": 0} + + # quering the db now + cursor = self.db_access.query_db_find("guildmembers", query, feature_projection) + + data = list(cursor) + + return data + + def _append_all_past_data( + self, retrived_past_data, activity_names_list, starting_idx=0 + ): + """ + Append all past activities together + + Parameters: + -------------- + retrived_past_data : list + list of dictionaries, having all the activities in it + activity_names_list : list + the activities to filter + starting_idx : int + the data of activities that should be started from the index + in another words it would be started from which day + default is 0, meaning all the past data from the starting point of + `first_end_date` will be included + + Returns: + ---------- + all_activity_data_dict : dictionary + the analyzed data with refined keys + maximum_key : int + the maximum key of the data + """ + all_activity_data_dict = {} + maximum_key_values = [] + for activity_name in activity_names_list: + activity_data_list = retrived_past_data[activity_name] + + activity_data_dict, max_key_val = self._refine_dict_indexes( + activity_data_list, starting_idx + ) + + maximum_key_values.append(max_key_val) + # add it to the new dictionary + all_activity_data_dict[activity_name] = activity_data_dict + + return all_activity_data_dict, max(maximum_key_values) + + def _refine_dict_indexes(self, data_dict, starting_idx=0): + """ + refine the indexes in dictionary + + Parameters: + ------------ + data_dict : dictionary + dictionary for a specific activity with keys '0','1', '2', etc + starting_idx : int + the data of activities that should be started from the index + in another words it would be started from which day + default is 0, meaning all the past data from the starting point of + `first_end_date` will be included + + Returns: + ----------- + data_dict_appended: dictionary + all the dictionaries appended together + the keys are refined in a way that + starting with '0' and ending with sum of keys + max_key_val : int + the maximum value of the dictionary + """ + data_dict_appended = {} + max_key_val = 0 + + # get all the keys in integer format + indices_list = list(map(lambda x: int(x), data_dict.keys())) + # converting to numpy to be able to filter them + indices_list = array(indices_list) + # filtering them + indices_list = indices_list[indices_list > starting_idx] + # incrementing and converting the indices of the dictionary to string + indices_list = list(map(lambda x: str(x + max_key_val), indices_list)) + # creating new dictionary with new indices + dictionary_refined_keys = dict(zip(indices_list, list(data_dict.values()))) + # adding it to the results dictionary + data_dict_appended.update(dictionary_refined_keys) + + # if there was some index available + if len(indices_list) != 0: + max_key_val += int(max(indices_list)) + + return data_dict_appended, max_key_val diff --git a/discord_analyzer/analysis/utils/member_activity_utils.py b/discord_analyzer/analysis/utils/member_activity_utils.py new file mode 100644 index 0000000..d7b1d6a --- /dev/null +++ b/discord_analyzer/analysis/utils/member_activity_utils.py @@ -0,0 +1,246 @@ +from datetime import timedelta +from typing import Any + +import numpy as np +import pymongo +from discord_analyzer.DB_operations.mongodb_access import DB_access + + +def get_joined_accounts(db_access, date_range): + """ + get the joined accounts for a time interval to a date range + + Parameters: + ------------- + db_access: DB_access + the database access class that queries are called through it + date_range : tuple of datetime + a tuple with length 2 + in the first index we save the starting date + in the second date we would save the end date + + Returns: + ---------- + data : list of dictionaries + an array of dictionaries, each dictionary has `discordId` and `joined_at` member + """ + query = {"joinedAt": {"$gte": date_range[0], "$lte": date_range[1]}} + feature_projection = {"joinedAt": 1, "discordId": 1, "_id": 0} + + # quering the db now + cursor = db_access.query_db_find("guildmembers", query, feature_projection) + + data = list(cursor) + + return data + + +def store_based_date( + start_date, + all_activities, + analytics_day_range, + joined_acc_dict, + load_past, + **kwargs +): + """ + store the activities (`all_*`) in a dictionary based on their ending analytics date + + Parameters: + ------------- + start_date : datetime + datetime object showing the start date of analysis + all_activities : dictionary + the `all_*` activities dictionary + each key does have an activity, `all_joined_day`, `all_consistent`, etc + and values are representing the analytics after the start_date + analytics_day_range : int + the range window of analytics + to make sure that the dates of analytics is for the past + `analytics_day_range` days, not `analytics_day_range` forward + joined_acc_dict : array of dictionary + an array of dictionaries, each dictionary has `discordId` and `joined_at` member + load_past : bool + whether we loaded the past data or start processing from scratch + If True, indicates that the past data is loaded beside the analytics data + **kwargs : + empty_channel_acc : bool + whether the channel and acc are empty + if True, then this wouldn't give outputs + """ + # to fill the all_joined_day field + if "empty_channel_acc" in kwargs: + if not kwargs["empty_channel_acc"]: + return [] + + # post processing the + account_names = list(map(lambda record: record["discordId"], joined_acc_dict)) + acc_join_date = list( + map( + lambda record: record["joinedAt"].date(), + joined_acc_dict, + ) + ) + # converting to numpy to be easier to use + account_names = np.array(account_names) + acc_join_date = np.array(acc_join_date) + + # the data converted to multiple db records + all_data_records = [] + + # using the 3rd activity (2) + # we do know it is always complete and have all the keys + # finding the maximum days after first day of analytics + max_days_after = len(all_activities[list(all_activities.keys())[2]]) + + for day_index in range(max_days_after): + analytics_date = start_date + timedelta(days=day_index) + analytics_end_date = analytics_date + timedelta(days=analytics_day_range) + # saving the data of a record + data_record = {} + + if not load_past: + date_using = analytics_end_date + else: + date_using = analytics_date + + data_record["date"] = date_using.isoformat() + + # analytics that were done in that date + for activity in all_activities.keys(): + # if an analytics for that day was available + if str(day_index) in all_activities[activity].keys(): + data_record[activity] = list(all_activities[activity][str(day_index)]) + # if there was no analytics in that day + else: + data_record[activity] = [] + + # fill in the all_joined_day member + data_record["all_joined_day"] = list( + account_names[date_using.date() == acc_join_date] + ) + + # all_data_records[str(day_index)] = data_record + all_data_records.append(data_record) + + # if there was no data just save empty date records + if max_days_after == 0: + data_record = {} + data_record["date"] = ( + start_date + timedelta(days=analytics_day_range) + ).isoformat() + + for activity in all_activities.keys(): + data_record[activity] = [] + + all_data_records = [data_record] + + return all_data_records + + +def update_activities(past_activities, activities_list): + """ + update activities variables using `past_activities` variable + note: `past_activities` variable contains all the activities from past + """ + from operator import itemgetter + + # getting all dictionary values with the order of `activities_list` + activity_dictionaries = itemgetter(*activities_list)(past_activities) + + return activity_dictionaries + + +def convert_to_dict(data: list[Any], dict_keys: list[str]) -> dict: + """ + convert data into dictionary + Note: the length of data and dict_keys always must be the same + + Parameters: + ------------ + data : list + the data to use as dictionary values + dict_keys : list + the dictionary keys + + Returns: + --------- + converted_data : dict + the data that is converted to dictionary + with their corresponding keys + """ + converted_data = dict(zip(dict_keys, data)) + + return converted_data + + +def get_users_past_window( + window_start_date: str, collection: pymongo.collection.Collection +) -> list[str]: + """ + get all users in the past date window from specific collection + + Parameters: + ------------ + window_start_date : str + the starting point of the window until today + must be in format of the database which for now is %Y-%m-%d + collection : pymongo.collection.Collection + the mongodb collection to do the aggregation + + Returns: + --------- + user_names : list[str] + the user names for the past 7 days + """ + pipeline = [ + # Filter documents based on date + {"$match": {"date": {"$gte": window_start_date}}}, + {"$group": {"_id": "$account_name"}}, + { + "$group": { + "_id": None, + "uniqueAccounts": {"$push": "$_id"}, + } + }, + ] + result = list(collection.aggregate(pipeline)) + + # in case of no data we would return empty string + user_names = [] + if result != []: + user_names = result[0]["uniqueAccounts"] + # removing remainder category + if "remainder" in user_names: + user_names.remove("remainder") + + return user_names + + +def get_latest_joined_users(db_access: DB_access, count: int = 5) -> list[str]: + """ + get latest joined users + + Parameters: + ------------- + db_access : DB_access + database access class + count : int + the count of latest users to return + + Returns: + --------- + users : list[str] + the userIds to use + """ + cursor = db_access.query_db_find( + table="guildmembers", + query={"isBot": False}, + feature_projection={"discordId": 1, "_id": 0}, + sorting=("joinedAt", -1), + ).limit(count) + usersId = list(cursor) + + usersId = list(map(lambda x: x["discordId"], usersId)) + + return usersId diff --git a/discord_analyzer/analyzer/__init__.py b/discord_analyzer/analyzer/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/discord_analyzer/analyzer/analyzer_heatmaps.py b/discord_analyzer/analyzer/analyzer_heatmaps.py new file mode 100644 index 0000000..d39cfeb --- /dev/null +++ b/discord_analyzer/analyzer/analyzer_heatmaps.py @@ -0,0 +1,196 @@ +import logging +from collections import Counter +from datetime import datetime, timedelta + +# from analyzer.analyzer.base_analyzer import Base_analyzer +from discord_analyzer.analysis.activity_hourly import activity_hourly +from discord_analyzer.analyzer.heatmaps_utils import ( + get_bot_id, + getNumberOfActions, + store_counts_dict, +) +from discord_analyzer.models.GuildsRnDaoModel import GuildsRnDaoModel +from discord_analyzer.models.HeatMapModel import HeatMapModel +from discord_analyzer.models.RawInfoModel import RawInfoModel + + +class Heatmaps: + def __init__(self, DB_connections, testing) -> None: + self.DB_connections = DB_connections + self.testing = testing + + def is_empty(self, guildId: str): + """ + check whether the heatmaps for the guild is empty or not + """ + client = self.DB_connections.mongoOps.mongo_db_access.db_mongo_client + + heatmap_c = HeatMapModel(client[guildId]) + document = heatmap_c.get_one() + + return document is None + + def analysis_heatmap(self, guildId, from_start=False): + """ + Based on the rawdata creates and stores the heatmap data + + Parameters: + ------------- + guildId : str + the guild id to analyze data for + from_start : bool + do the analytics from scrach or not + if True, if wouldn't pay attention to the existing data in heatmaps + and will do the analysis from the first date + + + Returns: + --------- + heatmaps_results : list of dictionary + the list of data analyzed + also the return could be None if no database for guild + or no raw info data was available + """ + # activity_hourly() + guild_msg = f"GUILDID: {guildId}:" + + client = self.DB_connections.mongoOps.mongo_db_access.db_mongo_client + + if guildId not in client.list_database_names(): + logging.error(f"{guild_msg} Database {guildId} doesn't exist") + logging.error( + f"{guild_msg} Existing databases: {client.list_database_names()}" + ) # flake8: noqa + logging.info(f"{guild_msg} Continuing") + return None + + # Collections involved in analysis + # guild parameter is the name of the database + rawinfo_c = RawInfoModel(client[guildId]) + heatmap_c = HeatMapModel(client[guildId]) + guild_rndao_c = GuildsRnDaoModel(client["RnDAO"]) + + # Testing if there are entries in the rawinfo collection + if rawinfo_c.count() == 0: + msg = f"{guild_msg} No entries in the collection" + msg += "'rawinfos' in {guildId} databse" + logging.warning(msg) + return None + + if not heatmap_c.collection_exists(): + raise Exception( + f"{guild_msg} Collection '{heatmap_c.collection_name}' does not exist" + ) + if not rawinfo_c.collection_exists(): + raise Exception( + f"{guild_msg} Collection '{rawinfo_c.collection_name}' does not exist" + ) + + last_date = heatmap_c.get_last_date() + + if last_date is None or from_start: + # If no heatmap was created, than tha last date is the first + # rawdata entry + # last_date = rawinfo_c.get_first_date() + last_date = guild_rndao_c.get_guild_period(guildId) + if last_date is None: + msg = f"{guild_msg} Collection" + msg += f"'{rawinfo_c.collection_name}' does not exist" + raise Exception(msg) + # last_date.replace(tzinfo=timezone.utc) + else: + last_date = last_date + timedelta(days=1) + + # initialize the data array + heatmaps_results = [] + + # getting the id of bots + bot_ids = get_bot_id( + db_mongo_client=self.DB_connections.mongoOps.mongo_db_access.db_mongo_client, + guildId=guildId, + ) + + while last_date.date() < datetime.now().date(): + entries = rawinfo_c.get_day_entries(last_date, "ANALYZER HEATMAPS: ") + if len(entries) == 0: + # analyze next day + last_date = last_date + timedelta(days=1) + continue + + prepared_list = [] + account_list = [] + + for entry in entries: + if "replied_user" not in entry: + reply = "" + else: + reply = entry["replied_user"] + + # eliminating bots + if entry["author"] not in bot_ids: + prepared_list.append( + { + # .strftime('%Y-%m-%d %H:%M'), + "datetime": entry["createdDate"], + "channel": entry["channelId"], + "author": entry["author"], + "replied_user": reply, + "user_mentions": entry["user_mentions"], + "reactions": entry["reactions"], + "threadId": entry["threadId"], + "mess_type": entry["type"], + } + ) + if entry["author"] not in account_list: + account_list.append(entry["author"]) + + if entry["user_mentions"] is not None: + for account in entry["user_mentions"]: + # for making the line shorter + condition2 = account not in bot_ids + if account not in account_list and condition2: + account_list.append(account) + + activity = activity_hourly(prepared_list, acc_names=account_list) + # # activity[0] + # heatmap = activity[1][0] + # Parsing the activity_hourly into the dictionary + results = self._post_process_data(activity[1], len(account_list)) + heatmaps_results.extend(results) + + # analyze next day + last_date = last_date + timedelta(days=1) + + return heatmaps_results + + def _post_process_data(self, heatmap_data, accounts_len): + results = [] + for heatmap in heatmap_data: + for i in range(accounts_len): + heatmap_dict = {} + heatmap_dict["date"] = heatmap["date"][0] + heatmap_dict["channelId"] = heatmap["channel"][0] + heatmap_dict["thr_messages"] = heatmap["thr_messages"][i] + heatmap_dict["lone_messages"] = heatmap["lone_messages"][i] + heatmap_dict["replier"] = heatmap["replier"][i] + heatmap_dict["replied"] = heatmap["replied"][i] + heatmap_dict["mentioner"] = heatmap["mentioner"][i] + heatmap_dict["mentioned"] = heatmap["mentioned"][i] + heatmap_dict["reacter"] = heatmap["reacter"][i] + heatmap_dict["reacted"] = heatmap["reacted"][i] + heatmap_dict["reacted_per_acc"] = store_counts_dict( + dict(Counter(heatmap["reacted_per_acc"][i])) + ) + heatmap_dict["mentioner_per_acc"] = store_counts_dict( + dict(Counter(heatmap["mentioner_per_acc"][i])) + ) + heatmap_dict["replied_per_acc"] = store_counts_dict( + dict(Counter(heatmap["replied_per_acc"][i])) + ) + heatmap_dict["account_name"] = heatmap["acc_names"][i] + sum_ac = getNumberOfActions(heatmap_dict) + + if not self.testing and sum_ac > 0: + results.append(heatmap_dict) + + return results diff --git a/discord_analyzer/analyzer/analyzer_memberactivities.py b/discord_analyzer/analyzer/analyzer_memberactivities.py new file mode 100644 index 0000000..c3a518e --- /dev/null +++ b/discord_analyzer/analyzer/analyzer_memberactivities.py @@ -0,0 +1,142 @@ +from datetime import datetime, timedelta + +from discord_analyzer.analysis.compute_member_activity import compute_member_activity +from discord_analyzer.analyzer.memberactivity_utils import MemberActivityUtils +from discord_analyzer.models.MemberActivityModel import MemberActivityModel +from discord_analyzer.models.RawInfoModel import RawInfoModel + + +class Member_activities: + def __init__(self, DB_connections, logging) -> None: + self.DB_connections = DB_connections + self.logging = logging + + self.utils = MemberActivityUtils(DB_connections) + + def analysis_member_activity(self, guildId, mongo_connection_str, from_start=False): + """ + Based on the rawdata creates and stores the member activity data + + Parameters: + ------------- + guildId : str + the guild id to analyze data for + from_start : bool + do the analytics from scrach or not + if True, if wouldn't pay attention to the existing data in memberactivities + and will do the analysis from the first date + + Returns: + --------- + memberactivity_results : list of dictionary + the list of data analyzed + also the return could be None if no database for guild + or no raw info data was available + memberactivity_networkx_results : list of networkx objects + the list of data analyzed in networkx format + also the return could be None if no database for guild + or no raw info data was available + """ + guild_msg = f"GUILDID: {guildId}:" + + client = self.DB_connections.mongoOps.mongo_db_access.db_mongo_client + + # check current guild is exist + if guildId not in client.list_database_names(): + self.logging.error(f"{guild_msg} Database {guildId} doesn't exist") + self.logging.error(f"{guild_msg} No such databse!") + self.logging.info(f"{guild_msg} Continuing") + return (None, None) + + member_activity_c = MemberActivityModel(client[guildId]) + rawinfo_c = RawInfoModel(client[guildId]) + + # Testing if there are entries in the rawinfo collection + if rawinfo_c.count() == 0: + self.logging.warning( + f"No entries in the collection 'rawinfos' in {guildId} databse" + ) + return (None, None) + + # get current guild setting + setting = self.utils.get_one_guild(guildId) + + channels, window, action = ( + setting["selectedChannels"], + setting["window"], + setting["action"], + ) + channels = setting["selectedChannels"] + window = setting["window"] + action = setting["action"] + period = setting["period"] + + channels = list(map(lambda x: x["channelId"], channels)) + + # get date range to be analyzed + today = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) + + self.logging.info(f"{guild_msg} memberactivities Analysis started!") + + # initialize + load_past_data = False + + # if we had data from past to use + if member_activity_c.count() != 0: + load_past_data = True + + load_past_data = load_past_data and not from_start + + # first_date = rawinfo_c.get_first_date().replace( + # hour=0, minute=0, second=0, microsecond=0 + # ) + + first_date = period + if first_date is None: + self.logging.error(f"No guild: {guildId} available in RnDAO.guilds!") + return None, None + + last_date = today - timedelta(days=1) + + date_range = [first_date, last_date] + + if load_past_data: + # num_days_to_load = ( + # max([CON_T_THR, VITAL_T_THR, STILL_T_THR, PAUSED_T_THR])+1 + # ) * WINDOW_D + num_days_to_load = ( + max([action[3], action[7], action[9], action[2]]) + 1 + ) * window[0] + date_range[0] = date_range[1] - timedelta(days=num_days_to_load) + + if date_range[0] < period: + date_range[0] = period + timedelta(days=window[0]) + + # get all users during date_range + all_users = self.utils.get_all_users(guildId) + # change format like 23/03/27 + date_range = [dt.strftime("%y/%m/%d") for dt in date_range] + + networkx_objects, activities = compute_member_activity( + guildId, + mongo_connection_str, + channels, + all_users, + date_range, + window, + action, + logging=self.logging, + load_past_data=load_past_data, + ) + + if not from_start: + # first date of storing the data + first_storing_date = member_activity_c.get_last_date() + activities = self.utils.refine_memberactivities_data( + activities, first_storing_date + ) + + memberactivity_results = activities + memberactivity_networkx_results = networkx_objects + + return memberactivity_results, memberactivity_networkx_results diff --git a/discord_analyzer/analyzer/base_analyzer.py b/discord_analyzer/analyzer/base_analyzer.py new file mode 100644 index 0000000..0f7323d --- /dev/null +++ b/discord_analyzer/analyzer/base_analyzer.py @@ -0,0 +1,74 @@ +from typing import Any + +from discord_analyzer.DB_operations.mongo_neo4j_ops import MongoNeo4jDB + + +class Base_analyzer: + def __init__(self): + """ + base class for the analyzer + """ + self.connection_str = None + + def set_mongo_database_info( + self, + mongo_db_user: str, + mongo_db_password: str, + mongo_db_host: str, + mongo_db_port: str, + ): + """ + MongoDB Database information setter + """ + self.mongo_user = mongo_db_user + self.mongo_pass = mongo_db_password + self.mongo_host = mongo_db_host + self.mongo_port = mongo_db_port + + self.connection_str = f"mongodb://{self.mongo_user}:{self.mongo_pass}@{self.mongo_host}:{self.mongo_port}" + + def set_neo4j_database_info(self, neo4j_creds: dict[str, Any]): + """ + set neo4J database informations + + Parameters: + ------------- + neo4j_creds : dict[str, Any] + neo4j_credentials to connect + the keys should be + - db_name: str + - protocol: str + - host: str + - port: int + - user: str + - password: str + """ + self.neo4j_db_name = neo4j_creds["db_name"] + self.neo4j_protocol = neo4j_creds["protocol"] + self.neo4j_host = neo4j_creds["host"] + self.neo4j_port = neo4j_creds["port"] + self.neo4j_user = neo4j_creds["user"] + self.neo4j_password = neo4j_creds["password"] + + def database_connect(self): + """ + Connect to the database + """ + """ Connection String will be modified once the url is provided""" + + self.DB_connections = MongoNeo4jDB(testing=False) + self.DB_connections.set_mongo_db_ops( + mongo_user=self.mongo_user, + mongo_pass=self.mongo_pass, + mongo_host=self.mongo_host, + mongo_port=self.mongo_port, + ) + + self.DB_connections.set_neo4j_utils( + db_name=self.neo4j_db_name, + host=self.neo4j_host, + port=self.neo4j_port, + protocol=self.neo4j_protocol, + user=self.neo4j_user, + password=self.neo4j_password, + ) diff --git a/discord_analyzer/analyzer/heatmaps_utils.py b/discord_analyzer/analyzer/heatmaps_utils.py new file mode 100644 index 0000000..79b1b61 --- /dev/null +++ b/discord_analyzer/analyzer/heatmaps_utils.py @@ -0,0 +1,72 @@ +from discord_analyzer.schemas.accounts import AccountCounts +from pymongo import MongoClient + + +def store_counts_dict(counts_dict): + # make empty result array + obj_array = [] + + # for each account + for acc in counts_dict.keys(): + # make dict and store in array + obj_array.append(AccountCounts(acc, counts_dict[acc]).asdict()) + + return obj_array + + +def getNumberOfActions(heatmap): + """get number of actions""" + sum_ac = 0 + fields = [ + "thr_messages", + "lone_messages", + "replier", + "replied", + "mentioned", + "mentioner", + "reacter", + "reacted", + ] + for field in fields: + for i in range(24): + sum_ac += heatmap[field][i] + return sum_ac + + +def get_bot_id( + db_mongo_client: MongoClient, + guildId: str, + collection_name: str = "guildmembers", + id_field_name: str = "discordId", +) -> list[str]: + """ + get the bot id from guildmembers collection + + Parameters: + ------------ + db_mongo_client : MongoClient + the access to database + guildId : str + the guildId to connect to + collection_name : str + the collection name to use + default is "guildmembers" + id_field_name : str + the fieldId that the account id is saved + default is "discordId" + + Returns: + --------- + bots : list[str] + the list of bot ids + """ + cursor = db_mongo_client[guildId][collection_name].find( + {"isBot": True}, {"_id": 0, id_field_name: 1} + ) + bots = list(cursor) + + bot_ids = [] + if bots != []: + bot_ids = list(map(lambda x: x[id_field_name], bots)) + + return bot_ids diff --git a/discord_analyzer/analyzer/memberactivity_utils.py b/discord_analyzer/analyzer/memberactivity_utils.py new file mode 100644 index 0000000..bbbe6f1 --- /dev/null +++ b/discord_analyzer/analyzer/memberactivity_utils.py @@ -0,0 +1,104 @@ +import logging + +from dateutil import parser + + +class MemberActivityUtils: + def __init__(self, DB_connection) -> None: + self.DB_connection = DB_connection + + def refine_memberactivities_data(self, all_member_activities, first_date): + """ + refine the data of memberactivities (don't use the data that are not needed) + it would save the data from the first_date + + Parameters: + -------------- + all_member_activities : array of dict + the memberactivities for the whole date + first_date : datetime + the first date of saving date + we would use this to specify the exact data activity to save + """ + + data_to_save = [] + for activity in all_member_activities: + if first_date is None or parser.parse(activity["date"]) > first_date: + data_to_save.append(activity) + + return data_to_save + + # get detailed info from one guild + def get_one_guild(self, guild): + """Get one guild setting from guilds collection by guild""" + + # guild_c = GuildsRnDaoModel( + # self.DB_connection.mongoOps.mongo_db_access.db_mongo_client["RnDAO"] + # ) + + # result = guild_c.get_guild_info(guild) + result = self.DB_connection.mongoOps.mongo_db_access.db_mongo_client["RnDAO"][ + "guilds" + ].find_one({"guildId": guild}) + return result + + # get all user accounts during date_range in guild from rawinfo data + def get_all_users( + self, + guildId: str, + ) -> list[str]: + # check guild is exist + + client = self.DB_connection.mongoOps.mongo_db_access.db_mongo_client + + if guildId not in client.list_database_names(): + logging.error(f"Database {guildId} doesn't exist") + logging.error(f"Existing databases: {client.list_database_names()}") + logging.info("Continuing") + return [] + + cursor = client[guildId]["guildmembers"].find( + { + "isBot": {"$ne": True}, + }, + {"discordId": 1, "_id": 0}, + ) + + users_data = list(cursor) + all_users = list(map(lambda x: x["discordId"], users_data)) + + return all_users + + def parse_reaction(self, s): + result = [] + for subitem in s: + items = subitem.split(",") + parsed_items = [] + for item in items: + parsed_items.append(item) + self.merge_array(result, result[:-1]) + return result + + def get_users_from_oneday(self, entries): + """get all users from one day messages""" + users = [] + for entry in entries: + # author + if entry["author"]: + self.merge_array(users, [entry["author"]]) + # mentioned users + # mentions = entry["user_mentions"][0].split(",") + mentions = entry["user_mentions"] + self.merge_array(users, mentions) + # reacters + reactions = self.parse_reaction(entry["reactions"]) + self.merge_array(users, reactions) + return users + + def merge_array(self, parent_arr, child_arr): + """insert all elements in child_arr to parent_arr which are not in parent_arr""" + for element in child_arr: + if element == "": + continue + if element not in parent_arr: + parent_arr.append(element) diff --git a/discord_analyzer/analyzer/neo4j_analytics.py b/discord_analyzer/analyzer/neo4j_analytics.py new file mode 100644 index 0000000..a0b8d48 --- /dev/null +++ b/discord_analyzer/analyzer/neo4j_analytics.py @@ -0,0 +1,157 @@ +# A wrapper to compute the neo4j metrics in cron-job +import logging + +from discord_analyzer.analysis.neo4j_analysis.analyzer_node_stats import NodeStats +from discord_analyzer.analysis.neo4j_analysis.centrality import Centerality +from discord_analyzer.analysis.neo4j_analysis.local_clustering_coefficient import ( + LocalClusteringCoeff, +) +from tc_neo4j_lib.neo4j_ops import Neo4jOps + + +class Neo4JAnalytics: + def __init__(self, neo4j_ops: Neo4jOps) -> None: + """ + neo4j metrics to be compute + input variables are all the neo4j credentials + """ + self.neo4j_ops = neo4j_ops + + def compute_metrics(self, guildId: str, from_start: bool) -> None: + """ + compute the essential metrics we wanted for neo4j + + Parameters: + ------------ + guildId : str + the specific guild we want to compute metrics for + from_start : bool + compute metrics from start or not + Note: only some metrics support this + others would be computed from_start=True + """ + if from_start: + self._remove_analytics_interacted_in(guildId) + + self.compute_local_clustering_coefficient(guildId, from_start) + self.compute_network_decentrality(guildId, from_start) + self.compute_node_stats(guildId, from_start) + + def compute_local_clustering_coefficient( + self, + guildId: str, + from_start: bool, + ): + """ + compute localClusteringCoefficient + + Parameters: + ------------ + guildId : str + the specific guild we want to compute metrics for + from_start : bool + compute metrics from start or not + Note: only some metrics support this + others would be computed from_start=True + """ + msg = f"GUILDID: {guildId}:" + try: + # Local Clustering Coefficient + logging.info(f"{msg} Computing LocalClusteringCoefficient") + lcc = LocalClusteringCoeff(gds=self.neo4j_ops.gds) + lcc.compute(guildId=guildId, from_start=from_start) + except Exception as exp: + logging.error( + f"{msg} Exception in computing LocalClusteringCoefficient, {exp}" + ) + + def compute_fragmentation_score( + self, + guildId: str, + past_window_date: float, + scale_fragmentation_score: int = 1, + ): + """ + average throught localClusteringCoefficients and group by date + this is the fragmentation score over each date period + + Note: We can compute this metric in backend, + so we might not add it to pipeline. + + Parameters: + -------------- + guildId : str + the guildId to use + past_window_date : float + the timestamp for window date + scale_fragmentation_score : int + scaling the fragmentation score by a value + default is `1` meaning no scale + """ + msg = f"GUILDID: {guildId}:" + logging.info(f"{msg} Averaging LocalClusteringCoefficient") + query = """ + MATCH ()-[r:INTERACTED_IN]->(g:Guild {guildId: $guildId }) + WHERE r.date >= $past_date + WITH r.date as date, r.localClusteringCoefficient as lcc + RETURN + avg(lcc) * $scale AS fragmentation_score, + date + """ + records, _, _ = self.neo4j_ops.neo4j_driver.execute_query( + query, + guildId=guildId, + scale=scale_fragmentation_score, + past_date=past_window_date, + ) + + return records + + def compute_network_decentrality(self, guildId: str, from_start: bool): + """ + compute network decentrality and save results back to neo4j + """ + msg = f"GUILDID: {guildId}:" + try: + centrality = Centerality(self.neo4j_ops) + # degree decentrality + _ = centrality.compute_network_decentrality( + guildId=guildId, from_start=from_start + ) + except Exception as exp: + logging.error( + f"{msg} Exception occured in computing Network decentrality, {exp}!" + ) + + def compute_node_stats(self, guildId: str, from_start: bool): + """ + compute node stats + each DiscordAccount node could be either + - "0": meaning Sender + - "1": Receiver + - "2": Balanced + """ + msg = f"GUILDID: {guildId}:" + try: + logging.info(f"{msg}: computing node stats") + node_stats = NodeStats(self.neo4j_ops, threshold=2) + node_stats.compute_stats(guildId, from_start) + except Exception as exp: + logging.error(f"{msg} Exception occured in node stats computation, {exp}") + + def _remove_analytics_interacted_in(self, guildId: str) -> None: + """ + Remove the INTERACTED_IN relations + Note: we saved those under the INTERACTED_IN relation + + Parameters: + -------------- + guildId : str + the guild we want to delete the relations for + """ + with self.neo4j_ops.neo4j_driver.session() as session: + query = """ + MATCH (:DiscordAccount) -[r:INTERACTED_IN]->(:Guild {guildId: $guildId}) + DELETE r + """ + session.run(query=query, guildId=guildId) diff --git a/discord_analyzer/connector/__init__.py b/discord_analyzer/connector/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/discord_analyzer/models/BaseModel.py b/discord_analyzer/models/BaseModel.py new file mode 100644 index 0000000..c15ff8f --- /dev/null +++ b/discord_analyzer/models/BaseModel.py @@ -0,0 +1,97 @@ +#!/usr/bin/env python3 +import logging + + +class BaseModel: + """ + BaseModel description + All integrated models inherit from this object + """ + + def __init__(self, collection_name, database): + self.collection_name = collection_name + self.database = database + self.exists = False + + def collection_exists(self): + """ + Collection presence test + returns True if collection with this name exists in the + database + """ + if self.collection_name in self.database.list_collection_names(): + return True + else: + return False + + def insert_one(self, obj_dict): + """ + Inserts one document into the defined collection + """ + + if not self.collection_exists(): + msg = "Inserting guild object into the" + msg += f" {self.collection_name} collection failed:" + msg += "Collection does not exist" + logging.info(msg) + return + self.collection = self.database[self.collection_name] + logging.info( + f"Inserting guild object into the {self.collection_name} collection." + ) + + return self.collection.insert_one(obj_dict) + + def insert_many(self, obj_dict_arr): + """ + Inserts one document into the defined collection + If create is True then a new collection is created + """ + + if not self.collection_exists(): + msg = "Inserting many guild objects into the" + msg += f"{self.collection_name} collection failed: " + msg += "Collection does not exist" + logging.info(msg) + return + self.collection = self.database[self.collection_name] + msg = "Inserting many guild objects into the " + msg += f"{self.collection_name} collection." + logging.info(msg) + + return self.collection.insert_many(obj_dict_arr) + + def _create_collection_if_not_exists(self): + """ + Creates the collection with specified name if it does not exist + """ + logging.info(f"Check if collection {self.collection_name} exists in database") + if self.collection_name in self.database.list_collection_names(): + logging.info(f"Collection {self.collection_name} exists") + else: + logging.info(f"Collection {self.collection_name} doesn't exist") + result = self.database.create_collection(self.collection_name) + logging.info(result) + self.database.command("collMod", self.collection_name) + self.collection = self.database[self.collection_name] + self.exists = True + + def get_one(self): + """ + Gets one documents from the database, + For testing purposes, no filtering is implemented. + """ + return self.database[self.collection_name].find_one() + + def get_all(self): + """ + Gets all documents from the database + """ + + return self.database[self.collection_name].find() + + def count(self): + """ + Returns the number of entries in this collection + """ + return self.database[self.collection_name].count_documents({}) diff --git a/discord_analyzer/models/GuildsRnDaoModel.py b/discord_analyzer/models/GuildsRnDaoModel.py new file mode 100644 index 0000000..6a6b7c0 --- /dev/null +++ b/discord_analyzer/models/GuildsRnDaoModel.py @@ -0,0 +1,116 @@ +#!/usr/bin/env python3 +import logging + +from discord_analyzer.models.BaseModel import BaseModel + + +class GuildsRnDaoModel(BaseModel): + def __init__(self, database=None): + if database is None: + logging.exception("Database does not exist.") + raise Exception("Database should not be None") + super().__init__(collection_name="guilds", database=database) + # print(self.database[self.collection_name].find_one()) + + def get_connected_guilds(self, guildId): + """ + Returns the list of the connected guilds if guildId is None + Otherwise the list of one connected guild with given guildId + """ + findOption = {"isDisconnected": False} + if guildId is not None: + findOption["guildId"] = guildId + guilds = self.database[self.collection_name].find(findOption) + return [x["guildId"] for x in guilds] + + def get_guild_info(self, guildId): + """ + Return detailed information of guild settings + Return None if such guild is not exist + """ + guild = self.database[self.collection_name].find({"guildId": guildId}) + if guild == []: + return None + return guild[0] + + def get_guild_period(self, guildId: str): + """ + get the period field from guild saved in RnDAO collection + """ + data = self.database[self.collection_name].find_one( + {"guildId": guildId}, {"period": 1, "_id": 0} + ) + if data is not None: + return data["period"] + else: + return None + + def get_guild_channels(self, guildId): + """ + get the channelSelection from a guild + + Parameters: + ------------- + guildId : str + the guildId to update its channel selection + + + Returns: + ---------- + channels : list of dictionaries + a list of dictionaries representative of channelName, channelId, and _id + """ + + query = {"guildId": f"{guildId}"} + feature_projection = {"selectedChannels": 1, "_id": 0} + + cursor = self.database[self.collection_name].find( + query, projection=feature_projection + ) + + channels = list(cursor) + + # initialize + selected_channels = None + + if channels == []: + selected_channels = [] + else: + selected_channels = channels[0]["selectedChannels"] + + return selected_channels + + def update_guild_channel_selection(self, guildId, selectedChannels): + """ + Update the channel selection in RnDAO for a specific guild + + Parameters: + ------------ + guildId : str + the guildId to update its channel selection + selectedChannels : dict + a dictionary for the channel selection, + each key values of dictionary must have the followings + `channelId`, `channelName`, and `_id` + example: + {'0': { + 'channelId': '1073334445554337223', + 'channelName': 'sample_channel_name', + '_id': ObjectId('156a84sd1') + }, + '1': {...} + } + + Returns: + ----------- + status : bool + if True, the channel selection is updated + else, the channel selection is not updated + """ + # query to filter the guilds of the RnDAO.guilds table + query = {"guildId": f"{guildId}"} + + update_field_query = {"$set": {"selectedChannels": selectedChannels}} + + # update the selectedChannels + self.database[self.collection_name].update_one(query, update_field_query) diff --git a/discord_analyzer/models/HeatMapModel.py b/discord_analyzer/models/HeatMapModel.py new file mode 100644 index 0000000..3b9f9b9 --- /dev/null +++ b/discord_analyzer/models/HeatMapModel.py @@ -0,0 +1,150 @@ +#!/usr/bin/env python3 +import logging +from datetime import datetime, timedelta, timezone + +import pymongo +from discord_analyzer.models.BaseModel import BaseModel + + +class HeatMapModel(BaseModel): + def __init__(self, database=None): + if database is None: + logging.exception("Database does not exist.") + raise Exception("Database should not be None") + super().__init__(collection_name="heatmaps", database=database) + self.validator = { + "$jsonSchema": { + "bsonType": "object", + "properties": { + "date": { + "bsonType": "date", + }, + "channel": { + "bsonType": "string", + }, + "lone_messages": { + "bsonType": "array", + "items": {"bsonType": "int"}, + }, + "thr_messages": {"bsonType": "array", "items": {"bsonType": "int"}}, + "replier": {"bsonType": "array", "items": {"bsonType": "int"}}, + "replier_accounts": { + "bsonType": "array", + "items": { + "bsonType": "object", + "required": ["_id", "account", "count"], + "properties": { + "_id": {"bsonType": "string"}, + "account": {"bsonType": "string"}, + "count": {"bsonType": "int"}, + }, + }, + }, + "replied": {"bsonType": "array", "items": {"bsonType": "int"}}, + "mentioner": {"bsonType": "array", "items": {"bsonType": "int"}}, + "mentioner_accounts": { + "bsonType": "array", + "items": { + "bsonType": "object", + "required": ["_id", "account", "count"], + "properties": { + "_id": {"bsonType": "string"}, + "account": {"bsonType": "string"}, + "count": {"bsonType": "int"}, + }, + }, + }, + "mentioned": {"bsonType": "array", "items": {"bsonType": "int"}}, + "reacter": {"bsonType": "array", "items": {"bsonType": "int"}}, + "reacter_accounts": { + "bsonType": "array", + "items": { + "bsonType": "object", + "required": ["_id", "account", "count"], + "properties": { + "_id": {"bsonType": "string"}, + "account": {"bsonType": "string"}, + "count": {"bsonType": "int"}, + }, + }, + }, + "reacted": {"bsonType": "array", "items": {"bsonType": "int"}}, + "account_name": { + "bsonType": "string", + }, + }, + } + } + + def get_last_date(self): + """ + Gets the date of the last document + """ + try: + date_str = ( + self.database[self.collection_name] + .find() + .sort([("date", pymongo.DESCENDING)]) + .limit(1)[0]["date"] + ) + date_obj = datetime.strptime(date_str, "%Y-%m-%d") + + return date_obj + # Parsing the time and timezone + date_str = date_str.split(" GMT") + date_str[1] = "GMT" + date_str[1] + date_str[1] = date_str[1].split(" ")[0].replace("GMT", "") + zone = [date_str[1][0:3], date_str[1][3::]] + zone_hrs = int(zone[0]) + zone_min = int(zone[1]) + date_obj = datetime.strptime(date_str[0], "%a %b %d %Y %H:%M:%S").replace( + tzinfo=timezone(timedelta(hours=zone_hrs, minutes=zone_min)) + ) + return date_obj + except Exception as e: + print(e) + return None + + def get_channels_disctinct(self): + """ + get the unique channels available in heatmaps + + Returns: + ---------- + distinct_channels : array of str + the returned data distincted + """ + feature_projection = {"channelId": 1} + + try: + cursor = ( + self.database[self.collection_name] + .find(projection=feature_projection) + .distinct("channelId") + ) + data = list(cursor) + except Exception as e: + print("Couldn't retreve distinct channels, exception: ", e) + data = None + + return data + + def remove_all_data(self): + """ + Removes all data whithing the collection + + Note: this is a dangerous function, + since it deletes all data whithin memberactivity collection. + + Returns: + ----------- + state : bool + if True, the data whithin collection is successfully deleted + if False, an exception is happened + """ + try: + self.database[self.collection_name].delete_many({}) + return True + except Exception as e: + print(e) + return False diff --git a/discord_analyzer/models/MemberActivityModel.py b/discord_analyzer/models/MemberActivityModel.py new file mode 100644 index 0000000..f4fbf02 --- /dev/null +++ b/discord_analyzer/models/MemberActivityModel.py @@ -0,0 +1,123 @@ +#!/usr/bin/env python3 +import logging +from datetime import datetime + +import pymongo +from discord_analyzer.models.BaseModel import BaseModel + + +class MemberActivityModel(BaseModel): + def __init__(self, database=None): + if database is None: + logging.exception("Database does not exist.") + raise Exception("Database should not be None") + super().__init__(collection_name="memberactivities", database=database) + self.validator = { + "$jsonSchema": { + "bsonType": "object", + "properties": { + "date": { + "bsonType": "date", + }, + "all_active": { + "bsonType": "array", + "items": {"bsonType": "string"}, + }, + "all_consistent": { + "bsonType": "array", + "items": {"bsonType": "string"}, + }, + "all_vital": {"bsonType": "array", "items": {"bsonType": "string"}}, + "all_connected": { + "bsonType": "array", + "items": {"bsonType": "string"}, + }, + "all_paused": { + "bsonType": "array", + "items": {"bsonType": "string"}, + }, + "all_new_disengaged": { + "bsonType": "array", + "items": {"bsonType": "string"}, + }, + "all_disengaged": { + "bsonType": "array", + "items": {"bsonType": "string"}, + }, + "all_unpaused": { + "bsonType": "array", + "items": {"bsonType": "string"}, + }, + "all_returned": { + "bsonType": "array", + "items": {"bsonType": "string"}, + }, + "all_new_active": { + "bsonType": "array", + "items": {"bsonType": "string"}, + }, + "all_still_active": { + "bsonType": "array", + "items": {"bsonType": "string"}, + }, + "all_dropped": { + "bsonType": "array", + "items": {"bsonType": "string"}, + }, + "all_joined": { + "bsonType": "array", + "items": {"bsonType": "string"}, + }, + "all_disengaged_were_newly_active": { + "bsonType": "array", + "items": {"bsonType": "string"}, + }, + "all_disengaged_were_consistenly_active": { + "bsonType": "array", + "items": {"bsonType": "string"}, + }, + "all_disengaged_were_vital": { + "bsonType": "array", + "items": {"bsonType": "string"}, + }, + }, + } + } + + def get_last_date(self): + """ + Gets the date of the last document + """ + try: + date_str = ( + self.database[self.collection_name] + .find() + .sort([("date", pymongo.DESCENDING)]) + .limit(1)[0]["date"] + ) + date_format = "%Y-%m-%dT%H:%M:%S" + date_object = datetime.strptime(date_str, date_format) + return date_object + except Exception as e: + print(e) + return None + + def remove_all_data(self): + """ + Removes all data whithing the collection + + Note: this is a dangerous function, + since it deletes all data whithin memberactivity collection. + + Returns: + ----------- + state : bool + if True, the data whithin collection is successfully deleted + if False, an exception is happened + """ + try: + self.database[self.collection_name].delete_many({}) + return True + except Exception as e: + print(e) + return False diff --git a/discord_analyzer/models/RawInfoModel.py b/discord_analyzer/models/RawInfoModel.py new file mode 100644 index 0000000..9deda62 --- /dev/null +++ b/discord_analyzer/models/RawInfoModel.py @@ -0,0 +1,96 @@ +#!/usr/bin/env python3 +import logging +from datetime import timedelta + +import pymongo +from discord_analyzer.models.BaseModel import BaseModel + + +class RawInfoModel(BaseModel): + def __init__(self, database=None): + if database is None: + logging.info("Database does not exist.") + raise Exception("Database should not be None") + super().__init__(collection_name="rawinfos", database=database) + self.guild_msg = f"GUILDID: {self.database.name}:" + self.validator = { + "$jsonSchema": { + "bsonType": "object", + "properties": { + "type": {"bsonType": "string"}, + "author": {"bsonType": "string"}, + "content": {"bsonType": "string"}, + "user_Mentions": { + "bsonType": "array", + "items": {"bsonType": "string"}, + }, + "roles_Mentions": { + "bsonType": "array", + "items": {"bsonType": "string"}, + }, + "reactions": {"bsonType": "array", "items": {"bsonType": "string"}}, + "replied_User": {"bsonType": "string"}, + "reference_Message": {"bsonType": "int"}, + "datetime": { + "bsonType": "string", + }, + "channelId": { + "bsonType": "string", + }, + }, + } + } + + def get_first_date(self): + """ + Get's the date of the first document in the collection + For determining the analysis date ranges + This is RawInfo specific method + """ + if self.database[self.collection_name].count_documents({}) > 0: + record = self.database[self.collection_name].find_one( + {}, sort=[("createdDate", pymongo.ASCENDING)] + ) + + first_date = record["createdDate"] + + # ( + # self.database[self.collection_name] + # .find() + # .sort([("createdDate", pymongo.ASCENDING)]) + # .limit(1)[0]["createdDate"] + # ) + # date_obj = datetime.strptime(first_date, "%Y-%m-%d %H:%M:%S") + + return first_date + # do something with the first document + else: + # handle the case where no documents are returned by the query + print(f"{self.guild_msg} No documents found in the collection") + return None + + def get_day_entries(self, day, msg=""): + """ + Gets the list of entries for the stated day + This is RawInfo specific method + + `msg` parameter is for additional info to be logged + """ + guild_msg = f"GUILDID: {self.database.name}:{msg}" + + start_day = day.replace(hour=0, minute=0, second=0) + end_day = start_day + timedelta(days=1) + + logg_msg = f"{guild_msg} Fetching documents |" + logg_msg += f" {self.collection_name}: {start_day} -> {end_day}" + logging.info(logg_msg) + + # date_str = day.strftime("%Y-%m-%d") + + # entries = self.database[self.collection_name].find( + # {"datetime": {"$regex": "^" + date_str}} + # ) + entries = self.database[self.collection_name].find( + {"createdDate": {"$gte": start_day, "$lte": end_day}} + ) + return list(entries) diff --git a/discord_analyzer/models/RnDaoModel.py b/discord_analyzer/models/RnDaoModel.py new file mode 100644 index 0000000..898e30c --- /dev/null +++ b/discord_analyzer/models/RnDaoModel.py @@ -0,0 +1,14 @@ +#!/usr/bin/env python3 + + +import logging + +from discord_analyzer.models.BaseModel import BaseModel + + +class RnDaoModel(BaseModel): + def __init__(self, database=None): + if database is None: + logging.exception("Database does not exist.") + raise Exception("Database should not be None") + super().__init__(collection_name="RnDAO", database=database) diff --git a/discord_analyzer/models/__init__.py b/discord_analyzer/models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/discord_analyzer/rn_analyzer.py b/discord_analyzer/rn_analyzer.py new file mode 100644 index 0000000..e857c11 --- /dev/null +++ b/discord_analyzer/rn_analyzer.py @@ -0,0 +1,334 @@ +#!/usr/bin/env python3 +import logging +import os +import sys + +from discord_analyzer.analyzer.analyzer_heatmaps import Heatmaps +from discord_analyzer.analyzer.analyzer_memberactivities import Member_activities +from discord_analyzer.analyzer.base_analyzer import Base_analyzer +from discord_analyzer.analyzer.neo4j_analytics import Neo4JAnalytics +from discord_analyzer.models.GuildsRnDaoModel import GuildsRnDaoModel +from discord_analyzer.models.HeatMapModel import HeatMapModel +from discord_analyzer.models.RawInfoModel import RawInfoModel +from dotenv import load_dotenv + + +class RnDaoAnalyzer(Base_analyzer): + """ + RnDaoAnalyzer + class that handles database connection and data analysis + """ + + def __init__(self, testing=False): + """ + Class initiation function + """ + """ Testing, prevents from data upload""" + self.testing = testing + logging.basicConfig() + logging.getLogger().setLevel(logging.INFO) + + def setup_neo4j_metrics(self) -> None: + """ + setup the neo4j analytics wrapper + """ + + self.neo4j_analytics = Neo4JAnalytics(neo4j_ops=self.DB_connections.neo4j_ops) + + def run_once(self, guildId): + """Run analysis once (Wrapper)""" + + guilds_c = GuildsRnDaoModel( + self.DB_connections.mongoOps.mongo_db_access.db_mongo_client["RnDAO"] + ) + + guilds = guilds_c.get_connected_guilds(guildId) + + logging.info(f"Creating heatmaps for {guilds}") + + # each guild data in a nested dictionary format + guilds_data = {} + + for guild in guilds: + logging.info(f"Doing Analytics for {guild}") + + heatmaps_analysis = Heatmaps(self.DB_connections, self.testing) + heatmaps_data = heatmaps_analysis.analysis_heatmap(guild) + + # storing heatmaps since memberactivities use them + analytics_data = {} + analytics_data[f"{guild}"] = { + "heatmaps": heatmaps_data, + "memberactivities": ( + None, + None, + ), + } + self.DB_connections.store_analytics_data( + analytics_data=analytics_data, + remove_memberactivities=False, + remove_heatmaps=False, + ) + + memberactivities_analysis = Member_activities( + self.DB_connections, logging=logging + ) + ( + member_activities_data, + member_acitivities_networkx_data, + ) = memberactivities_analysis.analysis_member_activity( + guild, self.connection_str + ) + + # storing whole data into a dictinoary + guilds_data[f"{guild}"] = { + "heatmaps": None, + "memberactivities": ( + member_activities_data, + member_acitivities_networkx_data, + ), + } + + self.DB_connections.store_analytics_data( + analytics_data=guilds_data, + remove_heatmaps=False, + remove_memberactivities=False, + ) + + self.neo4j_analytics.compute_metrics(guildId=guildId, from_start=False) + + self._update_isin_progress(guildId=guild) + + def get_guilds(self): + """Returns the list of all guilds""" + client = self.DB_connections.mongoOps.mongo_db_access.db_mongo_client + rawinfo_c = RawInfoModel(client) + + logging.info(f"Listed guilds {rawinfo_c.database.list_collection_names()}") + + def recompute_analytics_on_guilds(self, guildId_list): + """ + recompute the analytics for the guilds available in RnDAO table + if the guildId_list wasn't available in RnDAO then don't recompute the analytics + + Parameters: + -------------- + guildId_list : list of str + list of `guildId`s + Input can be `None` meaning recompute for all guilds + + Returns: + --------- + `None` + """ + client = self.DB_connections.mongoOps.mongo_db_access.db_mongo_client + + # check if the guild was available in RnDAO table + guilds_c = GuildsRnDaoModel(client["RnDAO"]) + guilds = guilds_c.get_connected_guilds(guildId_list) + + logging.info(f"Recomputing analytics for {guilds}") + + for guildId in guilds: + self.recompute_analytics(guildId) + + self._update_isin_progress(guildId=guildId) + + return None + + def recompute_analytics(self, guildId): + """ + recompute the memberactivities (and heatmaps in case needed) + for a new selection of channels + + + - first it would update the channel selection in RnDAO + + - Second the memebracitivites collection + of the input guildId would become empty + + - Third we would have the analytics running again on the + new channel selection (analytics would be inserted in memebractivities) + + + Parameters: + ------------- + guildId : str + the guildId to remove its collection data + + Returns: + --------- + `None` + """ + + client = self.DB_connections.mongoOps.mongo_db_access.db_mongo_client + + guild_c = GuildsRnDaoModel(client["RnDAO"]) + selectedChannels = guild_c.get_guild_channels(guildId=guildId) + + if selectedChannels != []: + # get the `channel_id`s + channel_id_list = list( + map(lambda channel_info: channel_info["channelId"], selectedChannels) + ) + else: + channel_id_list = [] + + # check if all the channels were available in heatmaps + is_available = self.DB_connections.mongoOps.check_heatmaps( + guildId=guildId, + selectedChannels=channel_id_list, + heatmap_model=HeatMapModel, + ) + + # initialize variable + heatmaps_data = None + heatmaps_analysis = Heatmaps(self.DB_connections, self.testing) + heatmap_isempty = heatmaps_analysis.is_empty(guildId) + + # if not available we should remove heatmaps data + # and run the analytics for heatmaps too + # TODO: condition update + is_available = False + if not is_available or heatmap_isempty: + logging.info(f"Analyzing the Heatmaps data for guild: {guildId}!") + heatmaps_data = heatmaps_analysis.analysis_heatmap( + guildId=guildId, from_start=True + ) + + # storing heatmaps since memberactivities use them + analytics_data = {} + analytics_data[f"{guildId}"] = { + "heatmaps": heatmaps_data, + "memberactivities": ( + None, + None, + ), + } + self.DB_connections.store_analytics_data( + analytics_data=analytics_data, + remove_memberactivities=False, + remove_heatmaps=not is_available, + ) + + # run the member_activity analyze + logging.info(f"Analyzing the MemberActivities data for guild: {guildId}!") + memberactivity_analysis = Member_activities( + self.DB_connections, logging=logging + ) + ( + member_activities_data, + member_acitivities_networkx_data, + ) = memberactivity_analysis.analysis_member_activity( + guildId, self.connection_str, from_start=True + ) + + # storing whole data into a dictinoary + analytics_data = {} + analytics_data[f"{guildId}"] = { + "heatmaps": None, + "memberactivities": ( + member_activities_data, + member_acitivities_networkx_data, + ), + } + + self.DB_connections.store_analytics_data( + analytics_data=analytics_data, + remove_memberactivities=True, + remove_heatmaps=False, + ) + + self.neo4j_analytics.compute_metrics(guildId=guildId, from_start=True) + + self._update_isin_progress(guildId=guildId) + + # returning a value when the jobs finished + return True + + def _update_isin_progress(self, guildId): + """ + update isInProgress field of RnDAO collection + + Parameters: + ------------ + guildId : str + the guildId to update its document + """ + client = self.DB_connections.mongoOps.mongo_db_access.db_mongo_client + + client["RnDAO"]["guilds"].update_one( + {"guildId": guildId}, {"$set": {"isInProgress": False}} + ) + + +# get guildId from command, if not given return None +# python ./analyzer.py guildId + + +def getParamsFromCmd(): + """ + get guildId and recompute analysis arguments from cmd + the second argument should be guildId, + and the third one should be recompute_analysis + (if third args not given, then recompute analysis will be False) + + Returns: + ---------- + guildId : str + the guildId to analyze + recompute_analysis : bool + whether to recompute the analysis or just run once + + """ + args = sys.argv + guildId = None + recompute_analysis = False + if len(args) == 2: + guildId = args[1] + elif len(args) == 3: + guildId = args[1] + recompute_analysis = True + return guildId, recompute_analysis + + +if __name__ == "__main__": + load_dotenv() + + # logging.basicConfig() + # logging.getLogger().setLevel(logging.INFO) + analyzer = RnDaoAnalyzer() + + user = os.getenv("MONGODB_USER", "") + password = os.getenv("MONGODB_PASS", "") + host = os.getenv("MONGODB_HOST", "") + port = os.getenv("MONGODB_PORT", "") + + neo4j_creds = {} + neo4j_creds["db_name"] = os.getenv("NEO4J_DB", "") + neo4j_creds["protocol"] = os.getenv("NEO4J_PROTOCOL", "") + neo4j_creds["host"] = os.getenv("NEO4J_HOST", "") + neo4j_creds["port"] = os.getenv("NEO4J_PORT", "") + neo4j_creds["password"] = os.getenv("NEO4J_PASSWORD", "") + neo4j_creds["user"] = os.getenv("NEO4J_USER", "") + + neo4j_user = os.getenv("NEO4J_USER", "") + neo4j_password = os.getenv("NEO4J_PASSWORD", "") + + analyzer.set_mongo_database_info( + mongo_db_host=host, + mongo_db_password=password, + mongo_db_user=user, + mongo_db_port=port, + ) + + analyzer.set_neo4j_database_info(neo4j_creds=neo4j_creds) + + guildId, recompute_analysis = getParamsFromCmd() + analyzer.database_connect() + analyzer.setup_neo4j_metrics() + + if not recompute_analysis: + analyzer.run_once(guildId) + else: + analyzer.recompute_analytics_on_guilds(guildId) diff --git a/discord_analyzer/schemas/__init__.py b/discord_analyzer/schemas/__init__.py new file mode 100644 index 0000000..e5a0d9b --- /dev/null +++ b/discord_analyzer/schemas/__init__.py @@ -0,0 +1 @@ +#!/usr/bin/env python3 diff --git a/discord_analyzer/schemas/accounts.py b/discord_analyzer/schemas/accounts.py new file mode 100644 index 0000000..5e2c416 --- /dev/null +++ b/discord_analyzer/schemas/accounts.py @@ -0,0 +1,13 @@ +class AccountCounts: + """ + Class for storing number of interactions per account + """ + + # define constructor + def __init__(self, account, counts): + self.account = account # account name + self.counts = counts # number of interactions + + # convert as dict + def asdict(self): + return ({"account": self.account, "count": self.counts},) diff --git a/discord_utils.py b/discord_utils.py new file mode 100644 index 0000000..9b3721e --- /dev/null +++ b/discord_utils.py @@ -0,0 +1,114 @@ +import logging +from typing import Any + +from analyzer_init import AnalyzerInit +from tc_messageBroker.rabbit_mq.saga.saga_base import get_saga +from utils.get_rabbitmq import prepare_rabbit_mq +from utils.transactions_ordering import sort_transactions + + +def analyzer_recompute(sagaId: str, rabbit_creds: dict[str, Any]): + analyzer_init = AnalyzerInit() + analyzer, mongo_creds = analyzer_init.get_analyzer() + + saga = get_saga_instance( + sagaId=sagaId, + connection=mongo_creds["connection_str"], + saga_db=mongo_creds["db_name"], + saga_collection=mongo_creds["collection_name"], + ) + if saga is None: + logging.warn( + f"Warn: Saga not found!, stopping the recompute for sagaId: {sagaId}" + ) + else: + guildId = saga.data["guildId"] + + def recompute_wrapper(**kwargs): + analyzer.recompute_analytics(guildId=guildId) + + def publish_wrapper(**kwargs): + pass + + saga.next( + publish_method=publish_wrapper, + call_function=recompute_wrapper, + mongo_creds=mongo_creds, + ) + + return rabbit_creds, sagaId, mongo_creds + + +def analyzer_run_once(sagaId: str, rabbit_creds: dict[str, Any]): + analyzer_init = AnalyzerInit() + analyzer, mongo_creds = analyzer_init.get_analyzer() + + saga = get_saga_instance( + sagaId=sagaId, + connection=mongo_creds["connection_str"], + saga_db=mongo_creds["db_name"], + saga_collection=mongo_creds["collection_name"], + ) + if saga is None: + logging.warn(f"Saga not found!, stopping the run_once for sagaId: {sagaId}") + else: + guildId = saga.data["guildId"] + + def run_once_wrapper(**kwargs): + analyzer.run_once(guildId=guildId) + + def publish_wrapper(**kwargs): + pass + + saga.next( + publish_method=publish_wrapper, + call_function=run_once_wrapper, + mongo_creds=mongo_creds, + ) + return rabbit_creds, sagaId, mongo_creds + + +def get_saga_instance(sagaId: str, connection: str, saga_db: str, saga_collection: str): + saga = get_saga( + sagaId=sagaId, + connection_url=connection, + db_name=saga_db, + collection=saga_collection, + ) + return saga + + +def publish_on_success(connection, result, *args, **kwargs): + # we must get these three things + try: + rabbit_creds = args[0][0] + sagaId = args[0][1] + mongo_creds = args[0][2] + logging.info(f"SAGAID: {sagaId}: ON_SUCCESS callback! ") + + saga = get_saga_instance( + sagaId=sagaId, + connection=mongo_creds["connection_str"], + saga_db=mongo_creds["db_name"], + saga_collection=mongo_creds["collection_name"], + ) + rabbitmq = prepare_rabbit_mq(rabbit_creds) + + transactions = saga.choreography.transactions + + (transactions_ordered, tx_not_started_count) = sort_transactions(transactions) + + if tx_not_started_count != 0: + guildId = saga.data["guildId"] + tx = transactions_ordered[0] + + logging.info(f"GUILDID: {guildId}: Publishing for {tx.queue}") + + rabbitmq.connect(tx.queue) + rabbitmq.publish( + queue_name=tx.queue, + event=tx.event, + content={"uuid": sagaId, "data": saga.data}, + ) + except Exception as exp: + logging.info(f"Exception occured in job on_success callback: {exp}") diff --git a/docker-compose.example.yml b/docker-compose.example.yml index 0eccf2e..db8a3de 100644 --- a/docker-compose.example.yml +++ b/docker-compose.example.yml @@ -6,6 +6,7 @@ services: context: . target: prod dockerfile: Dockerfile + command: python3 start_rabbit_mq.py worker: build: context: . diff --git a/docker-compose.test.yml b/docker-compose.test.yml index 0f4af40..f796239 100644 --- a/docker-compose.test.yml +++ b/docker-compose.test.yml @@ -31,40 +31,44 @@ services: - SENTRY_ENV=local volumes: - ./coverage:/project/coverage - networks: - - python_service_network depends_on: - - redis - - mongo - - neo4j - - rabbitmq - redis: - image: "redis:7.0.12-alpine" - networks: - - python_service_network + mongo: + condition: service_healthy + neo4j: + condition: service_healthy + rabbitmq: + condition: service_healthy mongo: image: "mongo:6.0.8" environment: - MONGO_INITDB_ROOT_USERNAME=root - MONGO_INITDB_ROOT_PASSWORD=pass - networks: - - python_service_network - rabbitmq: - image: "rabbitmq:3-management-alpine" - environment: - - RABBITMQ_DEFAULT_USER=root - - RABBITMQ_DEFAULT_PASS=pass - networks: - - python_service_network + healthcheck: + test: echo 'db.stats().ok' | mongosh localhost:27017/test --quiet + interval: 60s + timeout: 10s + retries: 2 + start_period: 40s neo4j: image: "neo4j:5.9.0" environment: - NEO4J_AUTH=neo4j/password - NEO4J_PLUGINS=["apoc", "graph-data-science"] - NEO4J_dbms_security_procedures_unrestricted=apoc.*,gds.* - networks: - - python_service_network - -networks: - python_service_network: - driver: bridge + healthcheck: + test: ["CMD" ,"wget", "http://localhost:7474"] + interval: 1m30s + timeout: 10s + retries: 2 + start_period: 40s + rabbitmq: + image: "rabbitmq:3-management-alpine" + environment: + - RABBITMQ_DEFAULT_USER=root + - RABBITMQ_DEFAULT_PASS=pass + healthcheck: + test: rabbitmq-diagnostics -q ping + interval: 30s + timeout: 30s + retries: 2 + start_period: 40s diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh new file mode 100644 index 0000000..b31af7c --- /dev/null +++ b/docker-entrypoint.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash +python3 -m coverage run --omit=tests/* -m pytest tests +python3 -m coverage lcov -o coverage/lcov.info \ No newline at end of file diff --git a/mongodb_connection.py b/mongodb_connection.py deleted file mode 100644 index b14768e..0000000 --- a/mongodb_connection.py +++ /dev/null @@ -1,15 +0,0 @@ -import os - -from dotenv import load_dotenv -from pymongo import MongoClient - -load_dotenv() - -host = os.getenv("DB_HOST") -port = os.getenv("DB_PORT") -user = os.getenv("DB_USER") -password = os.getenv("DB_PASSWORD") -db_name = os.getenv("DB_NAME") - -mongodb_connection = f"mongodb://{user}:{password}@{host}:{port}/{db_name}" -client = MongoClient(host=mongodb_connection) diff --git a/neo4j_connection.py b/neo4j_connection.py deleted file mode 100644 index af67016..0000000 --- a/neo4j_connection.py +++ /dev/null @@ -1,30 +0,0 @@ -import os - -from dotenv import load_dotenv -from neo4j import GraphDatabase - -load_dotenv() - -user = os.getenv("NEO4J_USER") -protocol = os.getenv("NEO4J_PROTOCOL") -password = os.getenv("NEO4J_PASSWORD") -host = os.getenv("NEO4J_HOST") -port = os.getenv("NEO4J_PORT") -db_name = os.getenv("NEO4J_DB") - -neo4j_url = f"{protocol}://{host}:{port}" -neo4j_auth = (user, password) - -driver = GraphDatabase.driver(neo4j_url, auth=neo4j_auth) -with GraphDatabase.driver(neo4j_url, auth=neo4j_auth) as driver: - driver.verify_connectivity() - - -def read(cypher, database="neo4j"): - records, summary, keys = driver.execute_query(cypher, database_=database) - return records, summary, keys - - -def write(cypher, database="neo4j"): - records, summary, keys = driver.execute_query(cypher, database_=database) - return summary diff --git a/rabbitmq_connection.py b/rabbitmq_connection.py deleted file mode 100644 index 5089ed1..0000000 --- a/rabbitmq_connection.py +++ /dev/null @@ -1,16 +0,0 @@ -import os - -from dotenv import load_dotenv -from tc_messageBroker import RabbitMQ - -load_dotenv() - -if __name__ == "__main__": - broker_url = os.getenv("RABBIT_HOST") - port = os.getenv("RABBIT_PORT") - username = os.getenv("RABBIT_USER") - password = os.getenv("RABBIT_PASSWORD") - - rabbit_mq = RabbitMQ( - broker_url=broker_url, port=port, username=username, password=password - ) diff --git a/redis_worker.py b/redis_worker.py new file mode 100644 index 0000000..01fae69 --- /dev/null +++ b/redis_worker.py @@ -0,0 +1,34 @@ +import logging + +import redis +from rq import Worker +from utils.daolytics_uitls import get_redis_credentials + + +def worker_exception_handler(job, exc_type, exc_value, traceback): + logging.error(" ========= RQ Exception =========") + logging.error(f"JOB: {job}") + logging.error(f"exc_type: {exc_type}") + logging.error(f"exc_value: {exc_value}") + logging.error(f"traceback: {traceback}") + + +if __name__ == "__main__": + redis_creds = get_redis_credentials() + + logging.basicConfig() + logging.getLogger().setLevel(logging.INFO) + + host = redis_creds["host"] + port = redis_creds["port"] + password = redis_creds["pass"] + + r = redis.Redis(host=host, port=port, password=password) + worker = Worker( + queues=["default"], connection=r, exception_handlers=worker_exception_handler + ) + try: + worker.work(with_scheduler=True, max_jobs=1) + except KeyboardInterrupt: + worker.clean_registries() + worker.stop_scheduler() diff --git a/requirements.txt b/requirements.txt index 5f41820..1929f8b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,33 +1,26 @@ -async-timeout==4.0.2 attrs==22.2.0 -cffi==1.15.1 -click==8.1.6 -coverage==7.2.5 -cryptography==41.0.2 dnspython==2.2.1 exceptiongroup==1.1.0 iniconfig==2.0.0 -jsonschema==4.17.3 -neo4j==5.10.0 -networkx==3.1 numpy==1.24.1 packaging==23.0 -pandas==2.0.3 -pika==1.3.2 pluggy==1.0.0 -pycparser==2.21 pymongo==4.3.3 -pyrsistent==0.19.3 pytest==7.2.0 -pytest-cov==4.0.0 python-dateutil==2.8.2 -python-dotenv==0.21.1 pytz==2022.7.1 -redis==4.6.0 -rq==1.15.1 -ruff==0.0.265 +python-dotenv==0.21.1 six==1.16.0 -tc-messageBroker==1.4.0 tomli==2.0.1 -types-pyOpenSSL==23.2.0.2 -tzdata==2023.3 +networkx==3.1 +requests==2.29.0 +pytest-cov==4.0.0 +coverage==7.2.5 +python-dateutil==2.8.2 +tqdm +tc-messageBroker==1.4.0 +sentry-sdk +rq +redis +tc-core-analyzer-lib==1.0.1 +tc-neo4j-lib==1.0.0 diff --git a/start_rabbit_mq.py b/start_rabbit_mq.py new file mode 100644 index 0000000..4fb8ecb --- /dev/null +++ b/start_rabbit_mq.py @@ -0,0 +1,95 @@ +""" +start the project using rabbitMQ +""" +import functools +import logging +from typing import Any + +from discord_utils import analyzer_recompute, analyzer_run_once, publish_on_success +from redis import Redis +from rq import Queue as RQ_Queue +from tc_messageBroker.message_broker import RabbitMQ +from tc_messageBroker.rabbit_mq.event import Event +from tc_messageBroker.rabbit_mq.queue import Queue +from utils.daolytics_uitls import ( + get_rabbit_mq_credentials, + get_redis_credentials, + get_sentryio_service_creds, +) +from utils.sentryio_service import set_up_sentryio + + +def analyzer(): + rabbit_mq_creds = get_rabbit_mq_credentials() + sentry_creds = get_sentryio_service_creds() + + # sentryio service + set_up_sentryio(sentry_creds["dsn"], sentry_creds["env"]) + redis_creds = get_redis_credentials() + + rabbit_mq = RabbitMQ( + broker_url=rabbit_mq_creds["broker_url"], + port=rabbit_mq_creds["port"], + username=rabbit_mq_creds["username"], + password=rabbit_mq_creds["password"], + ) + + redis = Redis( + host=redis_creds["host"], + port=redis_creds["port"], + password=redis_creds["pass"], + ) + + # 24 hours equal to 86400 seconds + rq_queue = RQ_Queue(connection=redis, default_timeout=86400) + + analyzer_recompute = functools.partial( + recompute_wrapper, redis_queue=rq_queue, rabbit_mq_creds=rabbit_mq_creds + ) + analyzer_run_once = functools.partial( + run_once_wrapper, redis_queue=rq_queue, rabbit_mq_creds=rabbit_mq_creds + ) + + rabbit_mq.connect(Queue.DISCORD_ANALYZER, heartbeat=60) + + rabbit_mq.on_event(Event.DISCORD_ANALYZER.RUN, analyzer_recompute) + rabbit_mq.on_event(Event.DISCORD_ANALYZER.RUN_ONCE, analyzer_run_once) + + if rabbit_mq.channel is None: + logging.info("Error: was not connected to RabbitMQ broker!") + else: + logging.info("Started Consuming!") + rabbit_mq.channel.start_consuming() + + +def recompute_wrapper( + body: dict[str, Any], redis_queue: RQ_Queue, rabbit_mq_creds: dict[str, Any] +): + sagaId = body["content"]["uuid"] + logging.info(f"SAGAID:{sagaId} recompute job Adding to queue") + + redis_queue.enqueue( + analyzer_recompute, + sagaId=sagaId, + rabbit_creds=rabbit_mq_creds, + on_success=publish_on_success, + ) + + +def run_once_wrapper( + body: dict[str, Any], redis_queue: RQ_Queue, rabbit_mq_creds: dict[str, Any] +): + sagaId = body["content"]["uuid"] + logging.info(f"SAGAID:{sagaId} run_once job Adding to queue") + redis_queue.enqueue( + analyzer_run_once, + sagaId=sagaId, + rabbit_creds=rabbit_mq_creds, + on_success=publish_on_success, + ) + + +if __name__ == "__main__": + logging.basicConfig() + logging.getLogger().setLevel(logging.INFO) + analyzer() diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/integration/test_activity_hourly_lone_message.py b/tests/integration/test_activity_hourly_lone_message.py new file mode 100644 index 0000000..cfb125e --- /dev/null +++ b/tests/integration/test_activity_hourly_lone_message.py @@ -0,0 +1,64 @@ +from datetime import datetime, timedelta + +from discord_analyzer.analysis.activity_hourly import activity_hourly +from discord_analyzer.analyzer.analyzer_heatmaps import Heatmaps + + +def test_lone_messages(): + # data preparation + day = datetime(2023, 1, 1) + # hours to include interactions + hours_to_include = [2, 4, 5, 13, 16, 18, 19, 20, 21] + DAY_COUNT = 2 + + acc_names = [] + for i in range(10): + acc_names.append(f"87648702709958252{i}") + + prepared_list = [] + channelIds = set() + dates = set() + + for i in range(DAY_COUNT): + for hour in hours_to_include: + for acc in acc_names: + data_date = (day + timedelta(days=i)).replace(hour=hour) + chId = f"10207071292141118{i}" + prepared_data = { + "mess_type": 0, + "author": acc, + "user_mentions": [], + "reactions": [], + "replied_user": None, + "datetime": data_date, + "channel": chId, + "threadId": None, + } + + prepared_list.append(prepared_data) + channelIds.add(chId) + dates.add(data_date.strftime("%Y-%m-%d")) + + (_, heatmap_data) = activity_hourly(prepared_list, acc_names=acc_names) + + analyzer_heatmaps = Heatmaps("DB_connection", testing=False) + results = analyzer_heatmaps._post_process_data(heatmap_data, len(acc_names)) + + assert len(results) == (len(acc_names) - 1) * DAY_COUNT + for document in results: + assert document["account_name"] in acc_names + assert document["date"] in dates + assert document["account_name"] in acc_names + assert document["channelId"] in channelIds + assert document["reacted_per_acc"] == [] + assert document["mentioner_per_acc"] == [] + assert document["replied_per_acc"] == [] + assert sum(document["thr_messages"]) == 0 + assert sum(document["mentioner"]) == 0 + assert sum(document["replied"]) == 0 + assert sum(document["replier"]) == 0 + assert sum(document["mentioned"]) == 0 + assert sum(document["reacter"]) == 0 + + # the only document we have + assert sum(document["lone_messages"]) == len(hours_to_include) diff --git a/tests/integration/test_activity_hourly_mentions.py b/tests/integration/test_activity_hourly_mentions.py new file mode 100644 index 0000000..ac7f1ea --- /dev/null +++ b/tests/integration/test_activity_hourly_mentions.py @@ -0,0 +1,87 @@ +from datetime import datetime, timedelta + +from discord_analyzer.analysis.activity_hourly import activity_hourly +from discord_analyzer.analyzer.analyzer_heatmaps import Heatmaps + + +def test_mentioned_messages(): + # data preparation + day = datetime(2023, 1, 1) + # hours to include interactions + hours_to_include = [2, 4, 5, 13, 16, 18, 19, 20, 21] + DAY_COUNT = 2 + + acc_names = [] + for i in range(10): + acc_names.append(f"87648702709958252{i}") + + prepared_list = [] + channelIds = set() + dates = set() + + for i in range(DAY_COUNT): + for hour in hours_to_include: + for acc in acc_names: + data_date = (day + timedelta(days=i)).replace(hour=hour) + chId = f"10207071292141118{i}" + prepared_data = { + "mess_type": 0, + "author": acc, + "user_mentions": ["876487027099582520", "876487027099582521"], + "reactions": [], + "replied_user": None, + "datetime": data_date, + "channel": chId, + "threadId": None, + } + + prepared_list.append(prepared_data) + channelIds.add(chId) + dates.add(data_date.strftime("%Y-%m-%d")) + + accs_mentioned = ["876487027099582520", "876487027099582521"] + + (_, heatmap_data) = activity_hourly(prepared_list, acc_names=acc_names) + + analyzer_heatmaps = Heatmaps("DB_connection", testing=False) + results = analyzer_heatmaps._post_process_data(heatmap_data, len(acc_names)) + + assert len(results) == (len(acc_names) - 1) * DAY_COUNT + for document in results: + assert document["account_name"] in acc_names + assert document["date"] in dates + assert document["channelId"] in channelIds + assert document["reacted_per_acc"] == [] + assert sum(document["thr_messages"]) == 0 + assert sum(document["reacter"]) == 0 + assert sum(document["replied"]) == 0 + assert sum(document["replier"]) == 0 + assert document["replied_per_acc"] == [] + assert sum(document["lone_messages"]) == len(hours_to_include) + + if document["account_name"] == "876487027099582520": + assert document["mentioner_per_acc"] == [ + ({"account": "876487027099582521", "count": (len(acc_names) - 2)},) + ] + assert sum(document["mentioner"]) == len(hours_to_include) + assert sum(document["mentioned"]) == len(hours_to_include) * ( + len(acc_names) - 2 + ) + + elif document["account_name"] == "876487027099582521": + assert document["mentioner_per_acc"] == [ + ({"account": "876487027099582520", "count": (len(acc_names) - 2)},) + ] + assert sum(document["mentioner"]) == len(hours_to_include) + assert sum(document["mentioned"]) == len(hours_to_include) * ( + len(acc_names) - 2 + ) + else: + assert document["mentioner_per_acc"] == [ + ({"account": "876487027099582520", "count": 9},), + ({"account": "876487027099582521", "count": 9},), + ] + assert sum(document["mentioner"]) == len(hours_to_include) * len( + accs_mentioned + ) + assert sum(document["mentioned"]) == 0 diff --git a/tests/integration/test_activity_hourly_reactions.py b/tests/integration/test_activity_hourly_reactions.py new file mode 100644 index 0000000..6bec4cb --- /dev/null +++ b/tests/integration/test_activity_hourly_reactions.py @@ -0,0 +1,88 @@ +from datetime import datetime, timedelta + +from discord_analyzer.analysis.activity_hourly import activity_hourly +from discord_analyzer.analyzer.analyzer_heatmaps import Heatmaps + + +def test_reacted_messages(): + # data preparation + day = datetime(2023, 1, 1) + # hours to include interactions + hours_to_include = [2, 4, 5, 13, 16, 18, 19, 20, 21] + DAY_COUNT = 3 + + acc_names = [] + for i in range(10): + acc_names.append(f"87648702709958252{i}") + + prepared_list = [] + channelIds = set() + dates = set() + + for i in range(DAY_COUNT): + for hour in hours_to_include: + for acc in acc_names: + data_date = (day + timedelta(days=i)).replace(hour=hour) + chId = f"10207071292141118{i}" + prepared_data = { + "mess_type": 0, + "author": acc, + "user_mentions": [], + "reactions": [ + "876487027099582520,876487027099582521,👍", + "876487027099582522,heatface", + ], + "replied_user": None, + "datetime": data_date, + "channel": chId, + "threadId": None, + } + + prepared_list.append(prepared_data) + channelIds.add(chId) + dates.add(data_date.strftime("%Y-%m-%d")) + + reacted_accs = set( + ["876487027099582520", "876487027099582521", "876487027099582522"] + ) + + (_, heatmap_data) = activity_hourly(prepared_list, acc_names=acc_names) + + analyzer_heatmaps = Heatmaps("DB_connection", testing=False) + results = analyzer_heatmaps._post_process_data(heatmap_data, len(acc_names)) + + # print(results) + + assert len(results) == (len(acc_names) - 1) * DAY_COUNT + for document in results: + assert document["account_name"] in acc_names + assert document["date"] in dates + assert document["account_name"] in acc_names + assert document["channelId"] in channelIds + assert sum(document["thr_messages"]) == 0 + assert sum(document["mentioner"]) == 0 + assert sum(document["replied"]) == 0 + assert sum(document["replier"]) == 0 + assert sum(document["mentioned"]) == 0 + assert document["mentioner_per_acc"] == [] + assert document["replied_per_acc"] == [] + assert sum(document["lone_messages"]) == len(hours_to_include) + + if document["account_name"] not in reacted_accs: + assert document["reacted_per_acc"] == [ + ({"account": "876487027099582520", "count": len(acc_names) - 2},), + ({"account": "876487027099582521", "count": len(acc_names) - 2},), + ({"account": "876487027099582522", "count": len(acc_names) - 2},), + ] + + # the only document we have + # 3 is the emoji count + assert sum(document["reacter"]) == 0 + assert sum(document["reacted"]) == len(hours_to_include) * len(reacted_accs) + else: + assert sum(document["reacter"]) == len(hours_to_include) * ( + len(acc_names) - 2 + ) + assert sum(document["reacted"]) == len(hours_to_include) * ( + len(reacted_accs) - 1 + ) diff --git a/tests/integration/test_activity_hourly_replier.py b/tests/integration/test_activity_hourly_replier.py new file mode 100644 index 0000000..e8223e8 --- /dev/null +++ b/tests/integration/test_activity_hourly_replier.py @@ -0,0 +1,84 @@ +from datetime import datetime, timedelta + +from discord_analyzer.analysis.activity_hourly import activity_hourly +from discord_analyzer.analyzer.analyzer_heatmaps import Heatmaps + + +def test_reply_messages(): + # data preparation + day = datetime(2023, 1, 1) + # hours to include interactions + hours_to_include = [2, 4, 5, 13, 16, 18, 19, 20, 21] + DAY_COUNT = 3 + + acc_names = [] + for i in range(10): + acc_names.append(f"87648702709958252{i}") + + prepared_list = [] + channelIds = set() + dates = set() + + for i in range(DAY_COUNT): + for hour in hours_to_include: + for acc in acc_names: + data_date = (day + timedelta(days=i)).replace(hour=hour) + chId = f"10207071292141118{i}" + prepared_data = { + "mess_type": 19, + "author": acc, + "user_mentions": [], + "reactions": [], + "replied_user": "876487027099582520", + "datetime": data_date, + "channel": chId, + "threadId": None, + } + + prepared_list.append(prepared_data) + channelIds.add(chId) + dates.add(data_date.strftime("%Y-%m-%d")) + + (_, heatmap_data) = activity_hourly(prepared_list, acc_names=acc_names) + + analyzer_heatmaps = Heatmaps("DB_connection", testing=False) + results = analyzer_heatmaps._post_process_data(heatmap_data, len(acc_names)) + # print(results) + assert len(results) == (len(acc_names) - 1) * DAY_COUNT + for document in results: + assert document["account_name"] in acc_names + assert document["date"] in dates + assert document["account_name"] in acc_names + assert document["channelId"] in channelIds + assert document["reacted_per_acc"] == [] + assert document["mentioner_per_acc"] == [] + assert sum(document["lone_messages"]) == 0 + assert sum(document["thr_messages"]) == 0 + assert sum(document["mentioner"]) == 0 + assert sum(document["mentioned"]) == 0 + assert sum(document["reacter"]) == 0 + + # if it is the account that everyone replied to + if document["account_name"] == "876487027099582520": + # the only document we have + assert document["replied_per_acc"] == [ + # `len(acc_names) - 2` is because + # len is returning one more and we are replying one account less + ({"account": "876487027099582520", "count": len(acc_names) - 2},), + ({"account": "876487027099582521", "count": len(acc_names) - 2},), + ({"account": "876487027099582522", "count": len(acc_names) - 2},), + ({"account": "876487027099582523", "count": len(acc_names) - 2},), + ({"account": "876487027099582524", "count": len(acc_names) - 2},), + ({"account": "876487027099582525", "count": len(acc_names) - 2},), + ({"account": "876487027099582526", "count": len(acc_names) - 2},), + ({"account": "876487027099582527", "count": len(acc_names) - 2},), + ({"account": "876487027099582528", "count": len(acc_names) - 2},), + ({"account": "876487027099582529", "count": len(acc_names) - 2},), + ] + assert sum(document["replier"]) == len(hours_to_include) + assert sum(document["replied"]) == len(hours_to_include) * ( + len(acc_names) - 1 + ) + # other accounts + else: + assert sum(document["replier"]) == len(hours_to_include) diff --git a/tests/integration/test_activity_hourly_thread_msg.py b/tests/integration/test_activity_hourly_thread_msg.py new file mode 100644 index 0000000..a282bb0 --- /dev/null +++ b/tests/integration/test_activity_hourly_thread_msg.py @@ -0,0 +1,62 @@ +from datetime import datetime, timedelta + +from discord_analyzer.analysis.activity_hourly import activity_hourly +from discord_analyzer.analyzer.analyzer_heatmaps import Heatmaps + + +def test_thread_messages(): + # data preparation + day = datetime(2023, 1, 1) + # hours to include interactions + hours_to_include = [2, 4, 5, 13, 16, 18, 19, 20, 21] + DAY_COUNT = 2 + + acc_names = [] + for i in range(10): + acc_names.append(f"87648702709958252{i}") + + prepared_list = [] + channelIds = set() + dates = set() + + for i in range(DAY_COUNT): + for hour in hours_to_include: + for acc in acc_names: + data_date = (day + timedelta(days=i)).replace(hour=hour) + chId = f"10207071292141118{i}" + prepared_data = { + "mess_type": 0, + "author": acc, + "user_mentions": [], + "reactions": [], + "replied_user": None, + "datetime": data_date, + "channel": chId, + "threadId": f"109635841296880850{i}", + } + + prepared_list.append(prepared_data) + channelIds.add(chId) + dates.add(data_date.strftime("%Y-%m-%d")) + + (_, heatmap_data) = activity_hourly(prepared_list, acc_names=acc_names) + + analyzer_heatmaps = Heatmaps("DB_connection", testing=False) + results = analyzer_heatmaps._post_process_data(heatmap_data, len(acc_names)) + + assert len(results) == (len(acc_names) - 1) * DAY_COUNT + for document in results: + assert document["account_name"] in acc_names + assert document["date"] in dates + assert document["channelId"] in channelIds + assert document["reacted_per_acc"] == [] + assert document["mentioner_per_acc"] == [] + assert document["replied_per_acc"] == [] + assert sum(document["thr_messages"]) == len(hours_to_include) + assert sum(document["mentioner"]) == 0 + assert sum(document["replied"]) == 0 + assert sum(document["replier"]) == 0 + assert sum(document["mentioned"]) == 0 + assert sum(document["reacter"]) == 0 + + assert sum(document["lone_messages"]) == 0 diff --git a/tests/integration/test_all_active_period.py b/tests/integration/test_all_active_period.py new file mode 100644 index 0000000..0f400eb --- /dev/null +++ b/tests/integration/test_all_active_period.py @@ -0,0 +1,169 @@ +from datetime import datetime, timedelta + +from .utils.analyzer_setup import launch_db_access, setup_analyzer +from .utils.remove_and_setup_guild import setup_db_guild + + +def test_two_weeks_period_active_members(): + """ + test all_active members for the two weeks period in the new schema + """ + guildId = "1234567" + db_access = launch_db_access(guildId) + + acc_id = [ + "user0", + "user1", + "user2", + "user3", + ] + + # A guild connected at 35 days ago + connected_days_before = 35 + setup_db_guild( + db_access, guildId, discordId_list=acc_id, days_ago_period=connected_days_before + ) + + db_access.db_mongo_client[guildId].create_collection("heatmaps") + db_access.db_mongo_client[guildId].create_collection("memberactivities") + + # generating rawinfo samples + rawinfo_samples = [] + + # A message from user0 to user1 on day 0 of past two weeks + sample = { + "type": 19, + "author": acc_id[0], + "content": "test_message_0", + "user_mentions": [], + "role_mentions": [], + "reactions": [], + "replied_user": acc_id[1], + "createdDate": (datetime.now() - timedelta(days=14)), + "messageId": "111881432193433601", + "channelId": "1020707129214111827", + "channelName": "general", + "threadId": None, + "threadName": None, + } + + rawinfo_samples.append(sample) + + # A message from user1 to user0 on day 0 of past two weeks + sample = { + "type": 19, + "author": acc_id[1], + "content": "test_message_1", + "user_mentions": [], + "role_mentions": [], + "reactions": [], + "replied_user": acc_id[0], + "createdDate": (datetime.now() - timedelta(days=14)), + "messageId": "111881432193433602", + "channelId": "1020707129214111827", + "channelName": "general", + "threadId": None, + "threadName": None, + } + + rawinfo_samples.append(sample) + + # A message from user2 to user3 on day 3 of past two weeks + sample = { + "type": 19, + "author": acc_id[2], + "content": "test_message_1", + "user_mentions": [], + "role_mentions": [], + "reactions": [], + "replied_user": acc_id[3], + "createdDate": (datetime.now() - timedelta(days=(14 - 3))), + "messageId": "111881432193433603", + "channelId": "1020707129214111827", + "channelName": "general", + "threadId": None, + "threadName": None, + } + + rawinfo_samples.append(sample) + + # A message from user3 to user2 on day 3 of past two weeks + sample = { + "type": 19, + "author": acc_id[3], + "content": "test_message_1", + "user_mentions": [], + "role_mentions": [], + "reactions": [], + "replied_user": acc_id[2], + "createdDate": (datetime.now() - timedelta(days=(14 - 3))), + "messageId": "111881432193433604", + "channelId": "1020707129214111827", + "channelName": "general", + "threadId": None, + "threadName": None, + } + + rawinfo_samples.append(sample) + + db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) + + analyzer = setup_analyzer() + analyzer.run_once(guildId=guildId) + + memberactivities_cursor = db_access.query_db_find( + "memberactivities", + {}, + feature_projection={"_id": 0, "date": 1, "all_active": 1}, + ) + memberactivities = list(memberactivities_cursor) + + print(f"memberactivities: {memberactivities}") + + date_now = datetime.now() + + for activities in memberactivities: + date = datetime.fromisoformat(activities["date"]).date() + print("date: ", date) + # 14 days minues 7 + if date == (date_now - timedelta(days=14)).date(): + print("time delta days: 14") + assert set(activities["all_active"]) == set(["user0", "user1"]) + elif date == (date_now - timedelta(days=13)).date(): + print("time delta days: 13") + assert set(activities["all_active"]) == set(["user0", "user1"]) + elif date == (date_now - timedelta(days=12)).date(): + print("time delta days: 12") + assert set(activities["all_active"]) == set(["user0", "user1"]) + elif date == (date_now - timedelta(days=11)).date(): + print("time delta days: 11") + assert set(activities["all_active"]) == set( + ["user0", "user1", "user2", "user3"] + ) + elif date == (date_now - timedelta(days=10)).date(): + print("time delta days: 10") + assert set(activities["all_active"]) == set( + ["user0", "user1", "user2", "user3"] + ) + elif date == (date_now - timedelta(days=9)).date(): + print("time delta days: 9") + assert set(activities["all_active"]) == set( + ["user0", "user1", "user2", "user3"] + ) + elif date == (date_now - timedelta(days=8)).date(): + print("time delta days: 8") + assert set(activities["all_active"]) == set( + ["user0", "user1", "user2", "user3"] + ) + elif date == (date_now - timedelta(days=7)).date(): + print("time delta days: 7") + assert set(activities["all_active"]) == set(["user2", "user3"]) + elif date == (date_now - timedelta(days=6)).date(): + print("time delta days: 6") + assert set(activities["all_active"]) == set(["user2", "user3"]) + elif date == (date_now - timedelta(days=5)).date(): + print("time delta days: 5") + assert set(activities["all_active"]) == set(["user2", "user3"]) + else: + print("time delta days: else") + assert set(activities["all_active"]) == set() diff --git a/tests/integration/test_all_joined_day_members.py b/tests/integration/test_all_joined_day_members.py new file mode 100644 index 0000000..5fe7927 --- /dev/null +++ b/tests/integration/test_all_joined_day_members.py @@ -0,0 +1,113 @@ +# test analyzing memberactivities +from datetime import datetime, timedelta + +import numpy as np + +from .utils.analyzer_setup import launch_db_access, setup_analyzer +from .utils.remove_and_setup_guild import setup_db_guild + + +def test_all_joined_day_members(): + """ + testing the all_joined_day + """ + guildId = "1234" + db_access = launch_db_access(guildId) + today = datetime.now() + + acc_id = [ + "973993299281076285", + "973993299281076286", + ] + # users joining 15 days ago + # and 13 days ago + acc_join_dates = [ + today - timedelta(days=15), + today - timedelta(days=13), + ] + + setup_db_guild( + db_access, + guildId, + discordId_list=acc_id, + dates=acc_join_dates, + days_ago_period=30, + ) + + db_access.db_mongo_client[guildId].create_collection("heatmaps") + db_access.db_mongo_client[guildId].create_collection("memberactivities") + + rawinfo_samples = [] + + # generating random rawinfo data + for i in range(150): + sample = { + "type": 19, + "author": np.random.choice(acc_id), + "content": f"test{i}", + "user_mentions": [], + "role_mentions": [], + "reactions": [], + "replied_user": np.random.choice(acc_id), + "createdDate": (datetime.now() - timedelta(hours=i)), + "messageId": f"11188143219343360{i}", + "channelId": "1020707129214111827", + "channelName": "general", + "threadId": None, + "threadName": None, + } + rawinfo_samples.append(sample) + + db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) + + analyzer = setup_analyzer() + analyzer.run_once(guildId=guildId) + + cursor = db_access.db_mongo_client[guildId]["memberactivities"].find([]) + + memberactivities = list(cursor) + + for document in memberactivities: + date_str = document["date"].split("T")[0] + date = datetime.strptime(date_str, "%Y-%m-%d") + + # checking the types + assert isinstance(document["all_joined_day"], list) + assert isinstance(document["all_joined"], list) + + joined_day = set(document["all_joined_day"]) + joined = set(document["all_joined"]) + + if (today - date).days == 15: + assert joined_day == {"973993299281076285"} + assert joined == {"973993299281076285"} + elif (today - date).days == 14: + assert joined_day == set() + assert joined == {"973993299281076285"} + elif (today - date).days == 13: + assert joined_day == {"973993299281076286"} + assert joined == {"973993299281076285", "973993299281076286"} + elif (today - date).days == 12: + assert joined_day == set() + assert joined == {"973993299281076286", "973993299281076285"} + elif (today - date).days == 11: + assert joined_day == set() + assert joined == {"973993299281076286", "973993299281076285"} + elif (today - date).days == 10: + assert joined_day == set() + assert joined == {"973993299281076286", "973993299281076285"} + elif (today - date).days == 9: + assert joined_day == set() + assert joined == {"973993299281076286", "973993299281076285"} + elif (today - date).days == 8: + assert joined_day == set() + assert joined == {"973993299281076286", "973993299281076285"} + elif (today - date).days == 7: + assert joined_day == set() + assert joined == {"973993299281076286"} + elif (today - date).days == 6: + assert joined_day == set() + assert joined == {"973993299281076286"} + else: + assert joined_day == set() + assert joined == set() diff --git a/tests/integration/test_analyzer_init.py b/tests/integration/test_analyzer_init.py new file mode 100644 index 0000000..ce09b1f --- /dev/null +++ b/tests/integration/test_analyzer_init.py @@ -0,0 +1,93 @@ +from datetime import datetime, timedelta + +from analyzer_init import AnalyzerInit +from pymongo import MongoClient +from utils.daolytics_uitls import get_mongo_credentials + + +def test_analyzer_init(): + analyzer = AnalyzerInit() + + guildId = "1234" + days_ago_period = 30 + mongo_creds = get_mongo_credentials() + user = mongo_creds["user"] + password = mongo_creds["password"] + host = mongo_creds["host"] + port = mongo_creds["port"] + + url = f"mongodb://{user}:{password}@{host}:{port}" + + mongo_client: MongoClient = MongoClient(url) + + mongo_client["RnDAO"]["guilds"].delete_one({"guildId": guildId}) + mongo_client.drop_database(guildId) + + mongo_client["RnDAO"]["guilds"].insert_one( + { + "guildId": guildId, + "user": "876487027099582524", + "name": "Sample Guild", + "connectedAt": (datetime.now() - timedelta(days=10)), + "isInProgress": True, + "isDisconnected": False, + "icon": "afd0d06fd12b2905c53708ca742e6c66", + "window": [7, 1], + "action": [1, 1, 1, 4, 3, 5, 5, 4, 3, 3, 2, 2, 1], + "selectedChannels": [ + { + "channelId": "1020707129214111827", + "channelName": "general", + }, + ], + "period": (datetime.now() - timedelta(days=days_ago_period)), + } + ) + + mongo_client[guildId]["guildmembers"].insert_one( + { + "discordId": "user1", + "username": "sample_user1", + "roles": ["1012430565959553145"], + "joinedAt": datetime.now() - timedelta(days=5), + "avatar": "3ddd6e429f75d6a711d0a58ba3060694", + "isBot": False, + "discriminator": "0", + } + ) + mongo_client[guildId].create_collection("heatmaps") + mongo_client[guildId].create_collection("memberactivities") + + # generating random rawinfo data + # 24 hours + # 90 days + rawinfo_samples = [] + for i in range(24 * days_ago_period): + sample = { + "type": 19, + "author": "user1", + "content": f"test{i}", + "user_mentions": [], + "role_mentions": [], + "reactions": [], + "replied_user": "user2", + "createdDate": (datetime.now() - timedelta(hours=i)), + "messageId": f"11188143219343360{i}", + "channelId": "1020707129214111827", + "channelName": "general", + "threadId": None, + "threadName": None, + } + rawinfo_samples.append(sample) + + mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) + + tc_discord_analyzer, mongo_creds = analyzer.get_analyzer() + + tc_discord_analyzer.recompute_analytics(guildId) + + heatmaps_data = mongo_client[guildId]["heatmaps"].find_one({}) + assert heatmaps_data is not None + + memberactivities_data = mongo_client[guildId]["memberactivities"].find_one({}) + assert memberactivities_data is not None diff --git a/tests/integration/test_analyzer_period_1year_recompute_available_analytics.py b/tests/integration/test_analyzer_period_1year_recompute_available_analytics.py new file mode 100644 index 0000000..3475de8 --- /dev/null +++ b/tests/integration/test_analyzer_period_1year_recompute_available_analytics.py @@ -0,0 +1,103 @@ +# test analyzing memberactivities +from datetime import datetime, timedelta + +import numpy as np + +from .utils.analyzer_setup import launch_db_access, setup_analyzer +from .utils.mock_heatmaps import create_empty_heatmaps_data +from .utils.mock_memberactivities import create_empty_memberactivities_data +from .utils.remove_and_setup_guild import setup_db_guild + + +def test_analyzer_one_year_period_recompute_available_analytics(): + """ + test the whole analyzer pipeline for a guild with a 1 year period + and use recompute method with some analytics data available + """ + # first create the collections + guildId = "1234" + db_access = launch_db_access(guildId) + + acc_id = [ + "973993299281076285", + "973993299281076286", + ] + + setup_db_guild(db_access, guildId, discordId_list=acc_id, days_ago_period=360) + + db_access.db_mongo_client[guildId].create_collection("heatmaps") + db_access.db_mongo_client[guildId].create_collection("memberactivities") + + # filling memberactivities with some data + # filling heatmaps with some data + # filling up to 2 days ago with 353 documents + memberactivity_data = create_empty_memberactivities_data( + datetime.now() - timedelta(days=354), count=353 + ) + db_access.db_mongo_client[guildId]["memberactivities"].insert_many( + memberactivity_data + ) + + # filling heatmaps with some data + # filling up to 2 days ago with 359 documents + # just yesterday is left to be analyzed + heatmaps_data = create_empty_heatmaps_data( + datetime.now() - timedelta(days=360), count=359 + ) + db_access.db_mongo_client[guildId]["heatmaps"].insert_many(heatmaps_data) + + # generating rawinfo samples + rawinfo_samples = [] + + # generating random rawinfo data + # 24 hours + # 360 days + for i in range(24 * 360): + sample = { + "type": 19, + "author": np.random.choice(acc_id), + "content": f"test{i}", + "user_mentions": [], + "role_mentions": [], + "reactions": [], + "replied_user": np.random.choice(acc_id), + "createdDate": (datetime.now() - timedelta(hours=i)), + "messageId": f"11188143219343360{i}", + "channelId": "1020707129214111827", + "channelName": "general", + "threadId": None, + "threadName": None, + } + rawinfo_samples.append(sample) + + db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) + + analyzer = setup_analyzer() + analyzer.recompute_analytics(guildId=guildId) + + memberactivities_cursor = db_access.query_db_find( + "memberactivities", {}, sorting=("date", -1) + ) + memberactivities_data = list(memberactivities_cursor) + + yesterday = (datetime.now() - timedelta(days=1)).replace( + hour=0, minute=0, second=0, microsecond=0 + ) + + assert len(memberactivities_data) == 354 + assert memberactivities_data[0]["date"] == yesterday.isoformat() + # yesterday is `-1` day and so + # we would use 353 days ago rather than 354 + document_start_date = yesterday - timedelta(days=353) + assert memberactivities_data[-1]["date"] == (document_start_date).isoformat() + + heatmaps_cursor = db_access.query_db_find("heatmaps", {}, sorting=("date", -1)) + heatmaps_data = list(heatmaps_cursor) + + # 360 days, multiplied with 2 + # (accounts are: "973993299281076285", "973993299281076286") + assert len(heatmaps_data) == 360 * 2 + # checking first and last document + assert heatmaps_data[0]["date"] == yesterday.strftime("%Y-%m-%d") + date_ago = yesterday - timedelta(359) + assert heatmaps_data[-1]["date"] == date_ago.strftime("%Y-%m-%d") diff --git a/tests/integration/test_analyzer_period_1year_recompute_empty_analytics.py b/tests/integration/test_analyzer_period_1year_recompute_empty_analytics.py new file mode 100644 index 0000000..5b57020 --- /dev/null +++ b/tests/integration/test_analyzer_period_1year_recompute_empty_analytics.py @@ -0,0 +1,84 @@ +# test analyzing memberactivities +from datetime import datetime, timedelta + +import numpy as np + +from .utils.analyzer_setup import launch_db_access, setup_analyzer +from .utils.remove_and_setup_guild import setup_db_guild + + +def test_analyzer_one_year_period_recompute_empty_analytics(): + """ + test the whole analyzer pipeline for a guild with a 6 month period + and use recompute method with empty analytics available + """ + # first create the collections + guildId = "1234" + db_access = launch_db_access(guildId) + + acc_id = [ + "973993299281076285", + "973993299281076286", + ] + + setup_db_guild(db_access, guildId, discordId_list=acc_id, days_ago_period=360) + + db_access.db_mongo_client[guildId].create_collection("heatmaps") + db_access.db_mongo_client[guildId].create_collection("memberactivities") + + # generating rawinfo samples + rawinfo_samples = [] + + # generating random rawinfo data + # 24 hours + # 360 days + for i in range(24 * 360): + sample = { + "type": 19, + "author": np.random.choice(acc_id), + "content": f"test{i}", + "user_mentions": [], + "role_mentions": [], + "reactions": [], + "replied_user": np.random.choice(acc_id), + "createdDate": (datetime.now() - timedelta(hours=i)), + "messageId": f"11188143219343360{i}", + "channelId": "1020707129214111827", + "channelName": "general", + "threadId": None, + "threadName": None, + } + rawinfo_samples.append(sample) + + db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) + + analyzer = setup_analyzer() + analyzer.recompute_analytics(guildId=guildId) + + memberactivities_cursor = db_access.query_db_find( + "memberactivities", {}, sorting=("date", -1) + ) + memberactivities_data = list(memberactivities_cursor) + + yesterday = (datetime.now() - timedelta(days=1)).replace( + hour=0, minute=0, second=0, microsecond=0 + ) + + # 354 days, analytics saving is the end day + assert len(memberactivities_data) == (354) + assert memberactivities_data[0]["date"] == yesterday.isoformat() + # yesterday is `-1` day and so + # we would use 353 days ago rather than 354 + document_start_date = yesterday - timedelta(days=353) + assert memberactivities_data[-1]["date"] == (document_start_date).isoformat() + + heatmaps_cursor = db_access.query_db_find("heatmaps", {}, sorting=("date", -1)) + heatmaps_data = list(heatmaps_cursor) + + # 90 days, multiplied with 2 + # (accounts are: "973993299281076285", "973993299281076286") + assert len(heatmaps_data) == 360 * 2 + # checking first and last document + assert heatmaps_data[0]["date"] == yesterday.strftime("%Y-%m-%d") + year_ago = yesterday - timedelta(359) + assert heatmaps_data[-1]["date"] == year_ago.strftime("%Y-%m-%d") diff --git a/tests/integration/test_analyzer_period_1year_run_once_available_analytics.py b/tests/integration/test_analyzer_period_1year_run_once_available_analytics.py new file mode 100644 index 0000000..ec7de27 --- /dev/null +++ b/tests/integration/test_analyzer_period_1year_run_once_available_analytics.py @@ -0,0 +1,104 @@ +# test analyzing memberactivities +from datetime import datetime, timedelta + +import numpy as np + +from .utils.analyzer_setup import launch_db_access, setup_analyzer +from .utils.mock_heatmaps import create_empty_heatmaps_data +from .utils.mock_memberactivities import create_empty_memberactivities_data +from .utils.remove_and_setup_guild import setup_db_guild + + +def test_analyzer_one_year_period_run_once_available_analytics(): + """ + test the whole analyzer pipeline for a guild with a 1 year period + and use run_once method with some analytics data available + """ + # first create the collections + guildId = "1234" + db_access = launch_db_access(guildId) + + acc_id = [ + "973993299281076285", + "973993299281076286", + ] + + setup_db_guild(db_access, guildId, discordId_list=acc_id, days_ago_period=360) + + db_access.db_mongo_client[guildId].create_collection("heatmaps") + db_access.db_mongo_client[guildId].create_collection("memberactivities") + + # filling memberactivities with some data + # filling heatmaps with some data + # filling up to 2 days ago with 353 documents + memberactivity_data = create_empty_memberactivities_data( + datetime.now() - timedelta(days=354), count=353 + ) + db_access.db_mongo_client[guildId]["memberactivities"].insert_many( + memberactivity_data + ) + + # filling heatmaps with some data + # filling up to 2 days ago with 359 documents + # just yesterday is left to be analyzed + heatmaps_data = create_empty_heatmaps_data( + datetime.now() - timedelta(days=360), count=359 + ) + db_access.db_mongo_client[guildId]["heatmaps"].insert_many(heatmaps_data) + + # generating rawinfo samples + rawinfo_samples = [] + + # generating random rawinfo data + # 24 hours + # 360 days + for i in range(24 * 360): + sample = { + "type": 19, + "author": np.random.choice(acc_id), + "content": f"test{i}", + "user_mentions": [], + "role_mentions": [], + "reactions": [], + "replied_user": np.random.choice(acc_id), + "createdDate": (datetime.now() - timedelta(hours=i)), + "messageId": f"11188143219343360{i}", + "channelId": "1020707129214111827", + "channelName": "general", + "threadId": None, + "threadName": None, + } + rawinfo_samples.append(sample) + + db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) + + analyzer = setup_analyzer() + analyzer.run_once(guildId=guildId) + + memberactivities_cursor = db_access.query_db_find( + "memberactivities", {}, sorting=("date", -1) + ) + memberactivities_data = list(memberactivities_cursor) + + yesterday = (datetime.now() - timedelta(days=1)).replace( + hour=0, minute=0, second=0, microsecond=0 + ) + + # 354 days, analytics saving is the end day + assert len(memberactivities_data) == (354) + assert memberactivities_data[0]["date"] == yesterday.isoformat() + # yesterday is `-1` day and so + # we would use 353 days ago rather than 354 + document_start_date = yesterday - timedelta(days=353) + assert memberactivities_data[-1]["date"] == (document_start_date).isoformat() + + heatmaps_cursor = db_access.query_db_find("heatmaps", {}, sorting=("date", -1)) + heatmaps_data = list(heatmaps_cursor) + + # days, multiplied with 2 + # (accounts are: "973993299281076285", "973993299281076286") + assert len(heatmaps_data) == 359 + 2 + # checking first and last document + assert heatmaps_data[0]["date"] == yesterday.strftime("%Y-%m-%d") + year_ago = yesterday - timedelta(359) + assert heatmaps_data[-1]["date"] == year_ago.strftime("%Y-%m-%d") diff --git a/tests/integration/test_analyzer_period_1year_run_once_empty_analytics.py b/tests/integration/test_analyzer_period_1year_run_once_empty_analytics.py new file mode 100644 index 0000000..e3b8de1 --- /dev/null +++ b/tests/integration/test_analyzer_period_1year_run_once_empty_analytics.py @@ -0,0 +1,84 @@ +# test analyzing memberactivities +from datetime import datetime, timedelta + +import numpy as np + +from .utils.analyzer_setup import launch_db_access, setup_analyzer +from .utils.remove_and_setup_guild import setup_db_guild + + +def test_analyzer_one_year_period_run_once_empty_analytics(): + """ + test the whole analyzer pipeline for a guild with a 6 month period + and use run_once method with empty analytics available + """ + # first create the collections + guildId = "1234" + db_access = launch_db_access(guildId) + + acc_id = [ + "973993299281076285", + "973993299281076286", + ] + + setup_db_guild(db_access, guildId, discordId_list=acc_id, days_ago_period=360) + + db_access.db_mongo_client[guildId].create_collection("heatmaps") + db_access.db_mongo_client[guildId].create_collection("memberactivities") + + # generating rawinfo samples + rawinfo_samples = [] + + # generating random rawinfo data + # 24 hours + # 360 days + for i in range(24 * 360): + sample = { + "type": 19, + "author": np.random.choice(acc_id), + "content": f"test{i}", + "user_mentions": [], + "role_mentions": [], + "reactions": [], + "replied_user": np.random.choice(acc_id), + "createdDate": (datetime.now() - timedelta(hours=i)), + "messageId": f"11188143219343360{i}", + "channelId": "1020707129214111827", + "channelName": "general", + "threadId": None, + "threadName": None, + } + rawinfo_samples.append(sample) + + db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) + + analyzer = setup_analyzer() + analyzer.run_once(guildId=guildId) + + memberactivities_cursor = db_access.query_db_find( + "memberactivities", {}, sorting=("date", -1) + ) + memberactivities_data = list(memberactivities_cursor) + + yesterday = (datetime.now() - timedelta(days=1)).replace( + hour=0, minute=0, second=0, microsecond=0 + ) + + # 354 days, analytics saving is the end day + assert len(memberactivities_data) == (354) + assert memberactivities_data[0]["date"] == yesterday.isoformat() + # yesterday is `-1` day and so + # we would use 173 days ago rather than 174 + document_start_date = yesterday - timedelta(days=353) + assert memberactivities_data[-1]["date"] == (document_start_date).isoformat() + + heatmaps_cursor = db_access.query_db_find("heatmaps", {}, sorting=("date", -1)) + heatmaps_data = list(heatmaps_cursor) + + # 360 days, multiplied with 2 + # (accounts are: "973993299281076285", "973993299281076286") + assert len(heatmaps_data) == 360 * 2 + # checking first and last document + assert heatmaps_data[0]["date"] == yesterday.strftime("%Y-%m-%d") + year_ago = yesterday - timedelta(359) + assert heatmaps_data[-1]["date"] == year_ago.strftime("%Y-%m-%d") diff --git a/tests/integration/test_analyzer_period_3month_recompute_available_analytics.py b/tests/integration/test_analyzer_period_3month_recompute_available_analytics.py new file mode 100644 index 0000000..5bb228c --- /dev/null +++ b/tests/integration/test_analyzer_period_3month_recompute_available_analytics.py @@ -0,0 +1,103 @@ +# test analyzing memberactivities +from datetime import datetime, timedelta + +import numpy as np + +from .utils.analyzer_setup import launch_db_access, setup_analyzer +from .utils.mock_heatmaps import create_empty_heatmaps_data +from .utils.mock_memberactivities import create_empty_memberactivities_data +from .utils.remove_and_setup_guild import setup_db_guild + + +def test_analyzer_three_month_period_recompute_available_analytics(): + """ + test the whole analyzer pipeline for a guild with a 3 month period + and use recompute method with some analytics available + """ + # first create the collections + guildId = "1234" + db_access = launch_db_access(guildId) + + acc_id = [ + "973993299281076285", + "973993299281076286", + ] + + setup_db_guild(db_access, guildId, discordId_list=acc_id, days_ago_period=90) + + db_access.db_mongo_client[guildId].create_collection("heatmaps") + db_access.db_mongo_client[guildId].create_collection("memberactivities") + + # filling memberactivities with some data + # filling heatmaps with some data + # filling up to 2 days ago with 83 documents + memberactivity_data = create_empty_memberactivities_data( + datetime.now() - timedelta(days=84), count=83 + ) + db_access.db_mongo_client[guildId]["memberactivities"].insert_many( + memberactivity_data + ) + + # filling heatmaps with some data + # filling up to 2 days ago with 89 documents + # just yesterday is left to be analyzed + heatmaps_data = create_empty_heatmaps_data( + datetime.now() - timedelta(days=90), count=89 + ) + db_access.db_mongo_client[guildId]["heatmaps"].insert_many(heatmaps_data) + + # generating rawinfo samples + rawinfo_samples = [] + + # generating random rawinfo data + # 24 hours + # 90 days + for i in range(24 * 90): + sample = { + "type": 19, + "author": np.random.choice(acc_id), + "content": f"test{i}", + "user_mentions": [], + "role_mentions": [], + "reactions": [], + "replied_user": np.random.choice(acc_id), + "createdDate": (datetime.now() - timedelta(hours=i)), + "messageId": f"11188143219343360{i}", + "channelId": "1020707129214111827", + "channelName": "general", + "threadId": None, + "threadName": None, + } + rawinfo_samples.append(sample) + + db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) + + analyzer = setup_analyzer() + analyzer.recompute_analytics(guildId=guildId) + + memberactivities_cursor = db_access.query_db_find( + "memberactivities", {}, sorting=("date", -1) + ) + memberactivities_data = list(memberactivities_cursor) + + yesterday = (datetime.now() - timedelta(days=1)).replace( + hour=0, minute=0, second=0, microsecond=0 + ) + + assert len(memberactivities_data) == 84 + assert memberactivities_data[0]["date"] == yesterday.isoformat() + # yesterday is `-1` day and so + # we would use 83 days ago rather than 84 + document_start_date = yesterday - timedelta(days=83) + assert memberactivities_data[-1]["date"] == (document_start_date).isoformat() + + heatmaps_cursor = db_access.query_db_find("heatmaps", {}, sorting=("date", -1)) + heatmaps_data = list(heatmaps_cursor) + + # 90 days, multiplied with 2 + # (accounts are: "973993299281076285", "973993299281076286") + assert len(heatmaps_data) == 90 * 2 + # checking first and last document + assert heatmaps_data[0]["date"] == yesterday.strftime("%Y-%m-%d") + month_ago = yesterday - timedelta(89) + assert heatmaps_data[-1]["date"] == month_ago.strftime("%Y-%m-%d") diff --git a/tests/integration/test_analyzer_period_3month_recompute_empty_analytics.py b/tests/integration/test_analyzer_period_3month_recompute_empty_analytics.py new file mode 100644 index 0000000..427c971 --- /dev/null +++ b/tests/integration/test_analyzer_period_3month_recompute_empty_analytics.py @@ -0,0 +1,83 @@ +# test analyzing memberactivities +from datetime import datetime, timedelta + +import numpy as np + +from .utils.analyzer_setup import launch_db_access, setup_analyzer +from .utils.remove_and_setup_guild import setup_db_guild + + +def test_analyzer_three_month_period_recompute_empty_analytics(): + """ + test the whole analyzer pipeline for a guild with a 3 month period + and use recompute method with no analytics data available + """ + # first create the collections + guildId = "1234" + db_access = launch_db_access(guildId) + + acc_id = [ + "973993299281076285", + "973993299281076286", + ] + + setup_db_guild(db_access, guildId, discordId_list=acc_id, days_ago_period=90) + + db_access.db_mongo_client[guildId].create_collection("heatmaps") + db_access.db_mongo_client[guildId].create_collection("memberactivities") + + # generating rawinfo samples + rawinfo_samples = [] + + # generating random rawinfo data + # 24 hours + # 90 days + for i in range(24 * 90): + sample = { + "type": 19, + "author": np.random.choice(acc_id), + "content": f"test{i}", + "user_mentions": [], + "role_mentions": [], + "reactions": [], + "replied_user": np.random.choice(acc_id), + "createdDate": (datetime.now() - timedelta(hours=i)), + "messageId": f"11188143219343360{i}", + "channelId": "1020707129214111827", + "channelName": "general", + "threadId": None, + "threadName": None, + } + rawinfo_samples.append(sample) + + db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) + + analyzer = setup_analyzer() + analyzer.recompute_analytics(guildId=guildId) + + memberactivities_cursor = db_access.query_db_find( + "memberactivities", {}, sorting=("date", -1) + ) + memberactivities_data = list(memberactivities_cursor) + + yesterday = (datetime.now() - timedelta(days=1)).replace( + hour=0, minute=0, second=0, microsecond=0 + ) + + assert len(memberactivities_data) == 84 + assert memberactivities_data[0]["date"] == yesterday.isoformat() + # yesterday is `-1` day and so + # we would use 83 days ago rather than 84 + document_start_date = yesterday - timedelta(days=83) + assert memberactivities_data[-1]["date"] == (document_start_date).isoformat() + + heatmaps_cursor = db_access.query_db_find("heatmaps", {}, sorting=("date", -1)) + heatmaps_data = list(heatmaps_cursor) + + # 90 days, multiplied with 2 + # (accounts are: "973993299281076285", "973993299281076286") + assert len(heatmaps_data) == 90 * 2 + # checking first and last document + assert heatmaps_data[0]["date"] == yesterday.strftime("%Y-%m-%d") + month_ago = yesterday - timedelta(89) + assert heatmaps_data[-1]["date"] == month_ago.strftime("%Y-%m-%d") diff --git a/tests/integration/test_analyzer_period_3month_run_once_available_analytics.py b/tests/integration/test_analyzer_period_3month_run_once_available_analytics.py new file mode 100644 index 0000000..3383018 --- /dev/null +++ b/tests/integration/test_analyzer_period_3month_run_once_available_analytics.py @@ -0,0 +1,104 @@ +# test analyzing memberactivities +from datetime import datetime, timedelta + +import numpy as np + +from .utils.analyzer_setup import launch_db_access, setup_analyzer +from .utils.mock_heatmaps import create_empty_heatmaps_data +from .utils.mock_memberactivities import create_empty_memberactivities_data +from .utils.remove_and_setup_guild import setup_db_guild + + +def test_analyzer_three_month_period_run_once_available_analytics(): + """ + test the whole analyzer pipeline for a guild with a week period + and use run_once method with empty analytics available + """ + # first create the collections + guildId = "1234" + db_access = launch_db_access(guildId) + + acc_id = [ + "973993299281076285", + "973993299281076286", + ] + + setup_db_guild(db_access, guildId, discordId_list=acc_id, days_ago_period=90) + + db_access.db_mongo_client[guildId].create_collection("heatmaps") + db_access.db_mongo_client[guildId].create_collection("memberactivities") + + # filling memberactivities with some data + # filling heatmaps with some data + # filling up to 2 days ago with 83 documents + memberactivity_data = create_empty_memberactivities_data( + datetime.now() - timedelta(days=84), count=83 + ) + db_access.db_mongo_client[guildId]["memberactivities"].insert_many( + memberactivity_data + ) + + # filling heatmaps with some data + # filling up to 2 days ago with 89 documents + # just yesterday is left to be analyzed + heatmaps_data = create_empty_heatmaps_data( + datetime.now() - timedelta(days=90), count=89 + ) + db_access.db_mongo_client[guildId]["heatmaps"].insert_many(heatmaps_data) + + # generating rawinfo samples + rawinfo_samples = [] + + # generating random rawinfo data + # 24 hours, 90 days + # 24 * 90 = 720 + for i in range(24 * 90): + sample = { + "type": 19, + "author": np.random.choice(acc_id), + "content": f"test{i}", + "user_mentions": [], + "role_mentions": [], + "reactions": [], + "replied_user": np.random.choice(acc_id), + "createdDate": (datetime.now() - timedelta(hours=i)), + "messageId": f"11188143219343360{i}", + "channelId": "1020707129214111827", + "channelName": "general", + "threadId": None, + "threadName": None, + } + rawinfo_samples.append(sample) + + db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) + + analyzer = setup_analyzer() + analyzer.run_once(guildId=guildId) + + memberactivities_cursor = db_access.query_db_find( + "memberactivities", {}, sorting=("date", -1) + ) + memberactivities_data = list(memberactivities_cursor) + + yesterday = (datetime.now() - timedelta(days=1)).replace( + hour=0, minute=0, second=0, microsecond=0 + ) + + assert len(memberactivities_data) == 84 + assert memberactivities_data[0]["date"] == yesterday.isoformat() + # yesterday is `-1` day and so + # we would use 83 days ago rather than 84 + document_start_date = yesterday - timedelta(days=83) + assert memberactivities_data[-1]["date"] == (document_start_date).isoformat() + + heatmaps_cursor = db_access.query_db_find("heatmaps", {}, sorting=("date", -1)) + heatmaps_data = list(heatmaps_cursor) + + # 89 days of 1 document plus the last day as 2 documents + # as we have 2 accounts + # (accounts are: "973993299281076285", "973993299281076286") + assert len(heatmaps_data) == 89 + 2 + # checking first and last document + assert heatmaps_data[0]["date"] == yesterday.strftime("%Y-%m-%d") + month_ago = yesterday - timedelta(89) + assert heatmaps_data[-1]["date"] == month_ago.strftime("%Y-%m-%d") diff --git a/tests/integration/test_analyzer_period_3month_run_once_empty_analytics.py b/tests/integration/test_analyzer_period_3month_run_once_empty_analytics.py new file mode 100644 index 0000000..053dfbe --- /dev/null +++ b/tests/integration/test_analyzer_period_3month_run_once_empty_analytics.py @@ -0,0 +1,83 @@ +# test analyzing memberactivities +from datetime import datetime, timedelta + +import numpy as np + +from .utils.analyzer_setup import launch_db_access, setup_analyzer +from .utils.remove_and_setup_guild import setup_db_guild + + +def test_analyzer_three_month_period_run_once_empty_analytics(): + """ + test the whole analyzer pipeline for a guild with a 3 month period + and use run_once method with empty analytics available + """ + # first create the collections + guildId = "1234" + db_access = launch_db_access(guildId) + + acc_id = [ + "973993299281076285", + "973993299281076286", + ] + + setup_db_guild(db_access, guildId, discordId_list=acc_id, days_ago_period=90) + + db_access.db_mongo_client[guildId].create_collection("heatmaps") + db_access.db_mongo_client[guildId].create_collection("memberactivities") + + # generating rawinfo samples + rawinfo_samples = [] + + # generating random rawinfo data + # 24 hours, 90 days + # 24 * 90 + for i in range(24 * 90): + sample = { + "type": 19, + "author": np.random.choice(acc_id), + "content": f"test{i}", + "user_mentions": [], + "role_mentions": [], + "reactions": [], + "replied_user": np.random.choice(acc_id), + "createdDate": (datetime.now() - timedelta(hours=i)), + "messageId": f"11188143219343360{i}", + "channelId": "1020707129214111827", + "channelName": "general", + "threadId": None, + "threadName": None, + } + rawinfo_samples.append(sample) + + db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) + + analyzer = setup_analyzer() + analyzer.run_once(guildId=guildId) + + memberactivities_cursor = db_access.query_db_find( + "memberactivities", {}, sorting=("date", -1) + ) + memberactivities_data = list(memberactivities_cursor) + + yesterday = (datetime.now() - timedelta(days=1)).replace( + hour=0, minute=0, second=0, microsecond=0 + ) + + assert len(memberactivities_data) == 84 + assert memberactivities_data[0]["date"] == yesterday.isoformat() + # yesterday is `-1` day and so + # we would use 83 days ago rather than 84 + document_start_date = yesterday - timedelta(days=83) + assert memberactivities_data[-1]["date"] == (document_start_date).isoformat() + + heatmaps_cursor = db_access.query_db_find("heatmaps", {}, sorting=("date", -1)) + heatmaps_data = list(heatmaps_cursor) + + # 90 days, multiplied with 2 + # (accounts are: "973993299281076285", "973993299281076286") + assert len(heatmaps_data) == 90 * 2 + # checking first and last document + assert heatmaps_data[0]["date"] == yesterday.strftime("%Y-%m-%d") + month_ago = yesterday - timedelta(89) + assert heatmaps_data[-1]["date"] == month_ago.strftime("%Y-%m-%d") diff --git a/tests/integration/test_analyzer_period_6month_recompute_available_analytics.py b/tests/integration/test_analyzer_period_6month_recompute_available_analytics.py new file mode 100644 index 0000000..cf12e33 --- /dev/null +++ b/tests/integration/test_analyzer_period_6month_recompute_available_analytics.py @@ -0,0 +1,105 @@ +# test analyzing memberactivities +from datetime import datetime, timedelta + +import numpy as np + +from .utils.analyzer_setup import launch_db_access, setup_analyzer +from .utils.mock_heatmaps import create_empty_heatmaps_data +from .utils.mock_memberactivities import create_empty_memberactivities_data +from .utils.remove_and_setup_guild import setup_db_guild + + +def test_analyzer_six_month_period_recompute_available_analytics(): + """ + test the whole analyzer pipeline for a guild with 6 month period + and use recompute method with empty analytics available + """ + # first create the collections + guildId = "1234" + db_access = launch_db_access(guildId) + + acc_id = [ + "973993299281076285", + "973993299281076286", + ] + + setup_db_guild(db_access, guildId, discordId_list=acc_id, days_ago_period=180) + + db_access.db_mongo_client[guildId].create_collection("heatmaps") + db_access.db_mongo_client[guildId].create_collection("memberactivities") + + # filling memberactivities with some data + # filling heatmaps with some data + # filling up to 2 days ago with 173 documents + memberactivity_data = create_empty_memberactivities_data( + datetime.now() - timedelta(days=174), count=173 + ) + db_access.db_mongo_client[guildId]["memberactivities"].insert_many( + memberactivity_data + ) + + # filling heatmaps with some data + # filling up to 2 days ago with 179 documents + # just yesterday is left to be analyzed + heatmaps_data = create_empty_heatmaps_data( + datetime.now() - timedelta(days=180), count=179 + ) + db_access.db_mongo_client[guildId]["heatmaps"].insert_many(heatmaps_data) + + # generating rawinfo samples + rawinfo_samples = [] + + # generating random rawinfo data + # 24 hours + # 180 days + for i in range(24 * 180): + sample = { + "type": 19, + "author": np.random.choice(acc_id), + "content": f"test{i}", + "user_mentions": [], + "role_mentions": [], + "reactions": [], + "replied_user": np.random.choice(acc_id), + "createdDate": (datetime.now() - timedelta(hours=i)), + "messageId": f"11188143219343360{i}", + "channelId": "1020707129214111827", + "channelName": "general", + "threadId": None, + "threadName": None, + } + rawinfo_samples.append(sample) + + db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) + + analyzer = setup_analyzer() + analyzer.recompute_analytics(guildId=guildId) + + memberactivities_cursor = db_access.query_db_find( + "memberactivities", {}, sorting=("date", -1) + ) + memberactivities_data = list(memberactivities_cursor) + + yesterday = (datetime.now() - timedelta(days=1)).replace( + hour=0, minute=0, second=0, microsecond=0 + ) + + # 180 days, analytics saving is the end day + # so the 7 days start wouldn't be counted + assert len(memberactivities_data) == (174) + assert memberactivities_data[0]["date"] == yesterday.isoformat() + # yesterday is `-1` day and so + # we would use 173 days ago rather than 174 + document_start_date = yesterday - timedelta(days=173) + assert memberactivities_data[-1]["date"] == (document_start_date).isoformat() + + heatmaps_cursor = db_access.query_db_find("heatmaps", {}, sorting=("date", -1)) + heatmaps_data = list(heatmaps_cursor) + + # 180 days, multiplied with 2 + # (accounts are: "973993299281076285", "973993299281076286") + assert len(heatmaps_data) == 180 * 2 + # checking first and last document + assert heatmaps_data[0]["date"] == yesterday.strftime("%Y-%m-%d") + month_ago = yesterday - timedelta(179) + assert heatmaps_data[-1]["date"] == month_ago.strftime("%Y-%m-%d") diff --git a/tests/integration/test_analyzer_period_6month_recompute_empty_analytics.py b/tests/integration/test_analyzer_period_6month_recompute_empty_analytics.py new file mode 100644 index 0000000..47657b8 --- /dev/null +++ b/tests/integration/test_analyzer_period_6month_recompute_empty_analytics.py @@ -0,0 +1,85 @@ +# test analyzing memberactivities +from datetime import datetime, timedelta + +import numpy as np + +from .utils.analyzer_setup import launch_db_access, setup_analyzer +from .utils.remove_and_setup_guild import setup_db_guild + + +def test_analyzer_six_month_period_recompute_empty_analytics(): + """ + test the whole analyzer pipeline for a guild with a 6 month period + and use recompute method with empty analytics available + """ + # first create the collections + guildId = "1234" + db_access = launch_db_access(guildId) + + acc_id = [ + "973993299281076285", + "973993299281076286", + ] + + setup_db_guild(db_access, guildId, discordId_list=acc_id, days_ago_period=180) + + db_access.db_mongo_client[guildId].create_collection("heatmaps") + db_access.db_mongo_client[guildId].create_collection("memberactivities") + + # generating rawinfo samples + rawinfo_samples = [] + + # generating random rawinfo data + # 24 hours + # 180 days + for i in range(24 * 180): + sample = { + "type": 19, + "author": np.random.choice(acc_id), + "content": f"test{i}", + "user_mentions": [], + "role_mentions": [], + "reactions": [], + "replied_user": np.random.choice(acc_id), + "createdDate": (datetime.now() - timedelta(hours=i)), + "messageId": f"11188143219343360{i}", + "channelId": "1020707129214111827", + "channelName": "general", + "threadId": None, + "threadName": None, + } + rawinfo_samples.append(sample) + + db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) + + analyzer = setup_analyzer() + analyzer.recompute_analytics(guildId=guildId) + + memberactivities_cursor = db_access.query_db_find( + "memberactivities", {}, sorting=("date", -1) + ) + memberactivities_data = list(memberactivities_cursor) + + yesterday = (datetime.now() - timedelta(days=1)).replace( + hour=0, minute=0, second=0, microsecond=0 + ) + + # 180 days, analytics saving is the end day + # so the 7 days start wouldn't be counted + assert len(memberactivities_data) == (174) + assert memberactivities_data[0]["date"] == yesterday.isoformat() + # yesterday is `-1` day and so + # we would use 173 days ago rather than 174 + document_start_date = yesterday - timedelta(days=173) + assert memberactivities_data[-1]["date"] == (document_start_date).isoformat() + + heatmaps_cursor = db_access.query_db_find("heatmaps", {}, sorting=("date", -1)) + heatmaps_data = list(heatmaps_cursor) + + # 90 days, multiplied with 2 + # (accounts are: "973993299281076285", "973993299281076286") + assert len(heatmaps_data) == 180 * 2 + # checking first and last document + assert heatmaps_data[0]["date"] == yesterday.strftime("%Y-%m-%d") + month_ago = yesterday - timedelta(179) + assert heatmaps_data[-1]["date"] == month_ago.strftime("%Y-%m-%d") diff --git a/tests/integration/test_analyzer_period_6month_run_once_available_analytics.py b/tests/integration/test_analyzer_period_6month_run_once_available_analytics.py new file mode 100644 index 0000000..377dedd --- /dev/null +++ b/tests/integration/test_analyzer_period_6month_run_once_available_analytics.py @@ -0,0 +1,104 @@ +# test analyzing memberactivities +from datetime import datetime, timedelta + +import numpy as np + +from .utils.analyzer_setup import launch_db_access, setup_analyzer +from .utils.mock_heatmaps import create_empty_heatmaps_data +from .utils.mock_memberactivities import create_empty_memberactivities_data +from .utils.remove_and_setup_guild import setup_db_guild + + +def test_analyzer_six_month_period_run_once_available_analytics(): + """ + test the whole analyzer pipeline for a guild with 6 month period + and use run_once method with empty analytics available + """ + # first create the collections + guildId = "1234" + db_access = launch_db_access(guildId) + + acc_id = [ + "973993299281076285", + "973993299281076286", + ] + + setup_db_guild(db_access, guildId, discordId_list=acc_id, days_ago_period=180) + + db_access.db_mongo_client[guildId].create_collection("heatmaps") + db_access.db_mongo_client[guildId].create_collection("memberactivities") + + # filling memberactivities with some data + # filling heatmaps with some data + # filling up to 2 days ago with 173 documents + memberactivity_data = create_empty_memberactivities_data( + datetime.now() - timedelta(days=174), count=173 + ) + db_access.db_mongo_client[guildId]["memberactivities"].insert_many( + memberactivity_data + ) + + # filling heatmaps with some data + # filling up to 2 days ago with 179 documents + # just yesterday is left to be analyzed + heatmaps_data = create_empty_heatmaps_data( + datetime.now() - timedelta(days=180), count=179 + ) + db_access.db_mongo_client[guildId]["heatmaps"].insert_many(heatmaps_data) + + # generating rawinfo samples + rawinfo_samples = [] + + # generating random rawinfo data + # 24 hours + # 180 days + for i in range(24 * 180): + sample = { + "type": 19, + "author": np.random.choice(acc_id), + "content": f"test{i}", + "user_mentions": [], + "role_mentions": [], + "reactions": [], + "replied_user": np.random.choice(acc_id), + "createdDate": (datetime.now() - timedelta(hours=i)), + "messageId": f"11188143219343360{i}", + "channelId": "1020707129214111827", + "channelName": "general", + "threadId": None, + "threadName": None, + } + rawinfo_samples.append(sample) + + db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) + + analyzer = setup_analyzer() + analyzer.run_once(guildId=guildId) + + memberactivities_cursor = db_access.query_db_find( + "memberactivities", {}, sorting=("date", -1) + ) + memberactivities_data = list(memberactivities_cursor) + + yesterday = (datetime.now() - timedelta(days=1)).replace( + hour=0, minute=0, second=0, microsecond=0 + ) + + assert len(memberactivities_data) == 174 + assert memberactivities_data[0]["date"] == yesterday.isoformat() + # yesterday is `-1` day and so + # we would use 173 days ago rather than 174 + document_start_date = yesterday - timedelta(days=173) + assert memberactivities_data[-1]["date"] == (document_start_date).isoformat() + + heatmaps_cursor = db_access.query_db_find("heatmaps", {}, sorting=("date", -1)) + heatmaps_data = list(heatmaps_cursor) + + # 179 days of 1 document plus the last day as 2 documents + # as we have 2 accounts + # (accounts are: "973993299281076285", "973993299281076286") + assert len(heatmaps_data) == 179 + 2 + # checking first and last document + assert heatmaps_data[0]["date"] == yesterday.strftime("%Y-%m-%d") + month_ago = yesterday - timedelta(179) + assert heatmaps_data[-1]["date"] == month_ago.strftime("%Y-%m-%d") diff --git a/tests/integration/test_analyzer_period_6month_run_once_empty_analytics.py b/tests/integration/test_analyzer_period_6month_run_once_empty_analytics.py new file mode 100644 index 0000000..5e4e726 --- /dev/null +++ b/tests/integration/test_analyzer_period_6month_run_once_empty_analytics.py @@ -0,0 +1,85 @@ +# test analyzing memberactivities +from datetime import datetime, timedelta + +import numpy as np + +from .utils.analyzer_setup import launch_db_access, setup_analyzer +from .utils.remove_and_setup_guild import setup_db_guild + + +def test_analyzer_six_month_period_run_once_empty_analytics(): + """ + test the whole analyzer pipeline for a guild with a 6 month period + and use run_once method with empty analytics available + """ + # first create the collections + guildId = "1234" + db_access = launch_db_access(guildId) + + acc_id = [ + "973993299281076285", + "973993299281076286", + ] + + setup_db_guild(db_access, guildId, discordId_list=acc_id, days_ago_period=180) + + db_access.db_mongo_client[guildId].create_collection("heatmaps") + db_access.db_mongo_client[guildId].create_collection("memberactivities") + + # generating rawinfo samples + rawinfo_samples = [] + + # generating random rawinfo data + # 24 hours + # 180 days + for i in range(24 * 180): + sample = { + "type": 19, + "author": np.random.choice(acc_id), + "content": f"test{i}", + "user_mentions": [], + "role_mentions": [], + "reactions": [], + "replied_user": np.random.choice(acc_id), + "createdDate": (datetime.now() - timedelta(hours=i)), + "messageId": f"11188143219343360{i}", + "channelId": "1020707129214111827", + "channelName": "general", + "threadId": None, + "threadName": None, + } + rawinfo_samples.append(sample) + + db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) + + analyzer = setup_analyzer() + analyzer.run_once(guildId=guildId) + + memberactivities_cursor = db_access.query_db_find( + "memberactivities", {}, sorting=("date", -1) + ) + memberactivities_data = list(memberactivities_cursor) + + yesterday = (datetime.now() - timedelta(days=1)).replace( + hour=0, minute=0, second=0, microsecond=0 + ) + + # 180 days, analytics saving is the end day + # so the 7 days start wouldn't be counted + assert len(memberactivities_data) == (174) + assert memberactivities_data[0]["date"] == yesterday.isoformat() + # yesterday is `-1` day and so + # we would use 173 days ago rather than 174 + document_start_date = yesterday - timedelta(days=173) + assert memberactivities_data[-1]["date"] == (document_start_date).isoformat() + + heatmaps_cursor = db_access.query_db_find("heatmaps", {}, sorting=("date", -1)) + heatmaps_data = list(heatmaps_cursor) + + # 180 days, multiplied with 2 + # (accounts are: "973993299281076285", "973993299281076286") + assert len(heatmaps_data) == 180 * 2 + # checking first and last document + assert heatmaps_data[0]["date"] == yesterday.strftime("%Y-%m-%d") + month_ago = yesterday - timedelta(179) + assert heatmaps_data[-1]["date"] == month_ago.strftime("%Y-%m-%d") diff --git a/tests/integration/test_analyzer_period_month_recompute_available_analytics.py b/tests/integration/test_analyzer_period_month_recompute_available_analytics.py new file mode 100644 index 0000000..689fdad --- /dev/null +++ b/tests/integration/test_analyzer_period_month_recompute_available_analytics.py @@ -0,0 +1,102 @@ +# test analyzing memberactivities +from datetime import datetime, timedelta + +import numpy as np + +from .utils.analyzer_setup import launch_db_access, setup_analyzer +from .utils.mock_heatmaps import create_empty_heatmaps_data +from .utils.mock_memberactivities import create_empty_memberactivities_data +from .utils.remove_and_setup_guild import setup_db_guild + + +def test_analyzer_month_period_recompute_available_analytics(): + """ + test the whole analyzer pipeline for a guild with 1 month period + and use recompute method with empty analytics available + """ + # first create the collections + guildId = "1234" + db_access = launch_db_access(guildId) + + acc_id = [ + "973993299281076285", + "973993299281076286", + ] + + setup_db_guild(db_access, guildId, discordId_list=acc_id, days_ago_period=30) + + db_access.db_mongo_client[guildId].create_collection("heatmaps") + db_access.db_mongo_client[guildId].create_collection("memberactivities") + + # filling memberactivities with some data + # filling heatmaps with some data + # filling up to 2 days ago with 23 documents + memberactivity_data = create_empty_memberactivities_data( + datetime.now() - timedelta(days=24), count=23 + ) + db_access.db_mongo_client[guildId]["memberactivities"].insert_many( + memberactivity_data + ) + + # filling heatmaps with some data + # filling up to 2 days ago with 29 documents + # just yesterday is left to be analyzed + heatmaps_data = create_empty_heatmaps_data( + datetime.now() - timedelta(days=30), count=29 + ) + db_access.db_mongo_client[guildId]["heatmaps"].insert_many(heatmaps_data) + + # generating rawinfo samples + rawinfo_samples = [] + + # generating random rawinfo data + # 24 * 30 = 720 + for i in range(720): + sample = { + "type": 19, + "author": np.random.choice(acc_id), + "content": f"test{i}", + "user_mentions": [], + "role_mentions": [], + "reactions": [], + "replied_user": np.random.choice(acc_id), + "createdDate": (datetime.now() - timedelta(hours=i)), + "messageId": f"11188143219343360{i}", + "channelId": "1020707129214111827", + "channelName": "general", + "threadId": None, + "threadName": None, + } + rawinfo_samples.append(sample) + + db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) + + analyzer = setup_analyzer() + analyzer.recompute_analytics(guildId=guildId) + + memberactivities_cursor = db_access.query_db_find( + "memberactivities", {}, sorting=("date", -1) + ) + memberactivities_data = list(memberactivities_cursor) + + yesterday = (datetime.now() - timedelta(days=1)).replace( + hour=0, minute=0, second=0, microsecond=0 + ) + + assert len(memberactivities_data) == 24 + assert memberactivities_data[0]["date"] == yesterday.isoformat() + # yesterday is `-1` day and so + # we would use 23 days ago rather than 24 + document_start_date = yesterday - timedelta(days=23) + assert memberactivities_data[-1]["date"] == (document_start_date).isoformat() + + heatmaps_cursor = db_access.query_db_find("heatmaps", {}, sorting=("date", -1)) + heatmaps_data = list(heatmaps_cursor) + + # 30 days, multiplied with 2 + # (accounts are: "973993299281076285", "973993299281076286") + assert len(heatmaps_data) == 60 + # checking first and last document + assert heatmaps_data[0]["date"] == yesterday.strftime("%Y-%m-%d") + month_ago = yesterday - timedelta(29) + assert heatmaps_data[-1]["date"] == month_ago.strftime("%Y-%m-%d") diff --git a/tests/integration/test_analyzer_period_month_recompute_empty_analytics.py b/tests/integration/test_analyzer_period_month_recompute_empty_analytics.py new file mode 100644 index 0000000..a6ccc31 --- /dev/null +++ b/tests/integration/test_analyzer_period_month_recompute_empty_analytics.py @@ -0,0 +1,82 @@ +# test analyzing memberactivities +from datetime import datetime, timedelta + +import numpy as np + +from .utils.analyzer_setup import launch_db_access, setup_analyzer +from .utils.remove_and_setup_guild import setup_db_guild + + +def test_analyzer_month_period_recompute_empty_analytics(): + """ + test the whole analyzer pipeline for a guild with 1 month period + and use recompute method with empty analytics available + """ + # first create the collections + guildId = "1234" + db_access = launch_db_access(guildId) + + acc_id = [ + "973993299281076285", + "973993299281076286", + ] + + setup_db_guild(db_access, guildId, discordId_list=acc_id, days_ago_period=30) + + db_access.db_mongo_client[guildId].create_collection("heatmaps") + db_access.db_mongo_client[guildId].create_collection("memberactivities") + + # generating rawinfo samples + rawinfo_samples = [] + + # generating random rawinfo data + # 24 * 30 = 720 + for i in range(720): + sample = { + "type": 19, + "author": np.random.choice(acc_id), + "content": f"test{i}", + "user_mentions": [], + "role_mentions": [], + "reactions": [], + "replied_user": np.random.choice(acc_id), + "createdDate": (datetime.now() - timedelta(hours=i)), + "messageId": f"11188143219343360{i}", + "channelId": "1020707129214111827", + "channelName": "general", + "threadId": None, + "threadName": None, + } + rawinfo_samples.append(sample) + + db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) + + analyzer = setup_analyzer() + analyzer.recompute_analytics(guildId=guildId) + + memberactivities_cursor = db_access.query_db_find( + "memberactivities", {}, sorting=("date", -1) + ) + memberactivities_data = list(memberactivities_cursor) + + yesterday = (datetime.now() - timedelta(days=1)).replace( + hour=0, minute=0, second=0, microsecond=0 + ) + + assert len(memberactivities_data) == 24 + assert memberactivities_data[0]["date"] == yesterday.isoformat() + # yesterday is `-1` day and so + # we would use 23 days ago rather than 24 + document_start_date = yesterday - timedelta(days=23) + assert memberactivities_data[-1]["date"] == (document_start_date).isoformat() + + heatmaps_cursor = db_access.query_db_find("heatmaps", {}, sorting=("date", -1)) + heatmaps_data = list(heatmaps_cursor) + + # 30 days, multiplied with 2 + # (accounts are: "973993299281076285", "973993299281076286") + assert len(heatmaps_data) == 60 + # checking first and last document + assert heatmaps_data[0]["date"] == yesterday.strftime("%Y-%m-%d") + month_ago = yesterday - timedelta(29) + assert heatmaps_data[-1]["date"] == month_ago.strftime("%Y-%m-%d") diff --git a/tests/integration/test_analyzer_period_month_run_once_available_analytics.py b/tests/integration/test_analyzer_period_month_run_once_available_analytics.py new file mode 100644 index 0000000..a6403ab --- /dev/null +++ b/tests/integration/test_analyzer_period_month_run_once_available_analytics.py @@ -0,0 +1,103 @@ +# test analyzing memberactivities +from datetime import datetime, timedelta + +import numpy as np + +from .utils.analyzer_setup import launch_db_access, setup_analyzer +from .utils.mock_heatmaps import create_empty_heatmaps_data +from .utils.mock_memberactivities import create_empty_memberactivities_data +from .utils.remove_and_setup_guild import setup_db_guild + + +def test_analyzer_month_period_run_once_available_analytics(): + """ + test the whole analyzer pipeline for a guild with 1 month period + and use run_once method with empty analytics available + """ + # first create the collections + guildId = "1234" + db_access = launch_db_access(guildId) + + acc_id = [ + "973993299281076285", + "973993299281076286", + ] + + setup_db_guild(db_access, guildId, discordId_list=acc_id, days_ago_period=30) + + db_access.db_mongo_client[guildId].create_collection("heatmaps") + db_access.db_mongo_client[guildId].create_collection("memberactivities") + + # filling memberactivities with some data + # filling heatmaps with some data + # filling up to 2 days ago with 23 documents + memberactivity_data = create_empty_memberactivities_data( + datetime.now() - timedelta(days=24), count=23 + ) + db_access.db_mongo_client[guildId]["memberactivities"].insert_many( + memberactivity_data + ) + + # filling heatmaps with some data + # filling up to 2 days ago with 29 documents + # just yesterday is left to be analyzed + heatmaps_data = create_empty_heatmaps_data( + datetime.now() - timedelta(days=30), count=29 + ) + db_access.db_mongo_client[guildId]["heatmaps"].insert_many(heatmaps_data) + + # generating rawinfo samples + rawinfo_samples = [] + + # generating random rawinfo data + # 24 * 30 = 720 + for i in range(720): + sample = { + "type": 19, + "author": np.random.choice(acc_id), + "content": f"test{i}", + "user_mentions": [], + "role_mentions": [], + "reactions": [], + "replied_user": np.random.choice(acc_id), + "createdDate": (datetime.now() - timedelta(hours=i)), + "messageId": f"11188143219343360{i}", + "channelId": "1020707129214111827", + "channelName": "general", + "threadId": None, + "threadName": None, + } + rawinfo_samples.append(sample) + + db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) + + analyzer = setup_analyzer() + analyzer.run_once(guildId=guildId) + + memberactivities_cursor = db_access.query_db_find( + "memberactivities", {}, sorting=("date", -1) + ) + memberactivities_data = list(memberactivities_cursor) + + yesterday = (datetime.now() - timedelta(days=1)).replace( + hour=0, minute=0, second=0, microsecond=0 + ) + + assert len(memberactivities_data) == 24 + assert memberactivities_data[0]["date"] == yesterday.isoformat() + # yesterday is `-1` day and so + # so we would use 23 days ago rather than 24 + document_start_date = yesterday - timedelta(days=23) + assert memberactivities_data[-1]["date"] == (document_start_date).isoformat() + + heatmaps_cursor = db_access.query_db_find("heatmaps", {}, sorting=("date", -1)) + heatmaps_data = list(heatmaps_cursor) + + # 29 days of 1 document plus the last day as 2 documents + # as we have 2 accounts + # (accounts are: "973993299281076285", "973993299281076286") + assert len(heatmaps_data) == 29 + 2 + # checking first and last document + assert heatmaps_data[0]["date"] == yesterday.strftime("%Y-%m-%d") + month_ago = yesterday - timedelta(29) + assert heatmaps_data[-1]["date"] == month_ago.strftime("%Y-%m-%d") diff --git a/tests/integration/test_analyzer_period_month_run_once_empty_analytics.py b/tests/integration/test_analyzer_period_month_run_once_empty_analytics.py new file mode 100644 index 0000000..e0ade20 --- /dev/null +++ b/tests/integration/test_analyzer_period_month_run_once_empty_analytics.py @@ -0,0 +1,82 @@ +# test analyzing memberactivities +from datetime import datetime, timedelta + +import numpy as np + +from .utils.analyzer_setup import launch_db_access, setup_analyzer +from .utils.remove_and_setup_guild import setup_db_guild + + +def test_analyzer_month_period_run_once_empty_analytics(): + """ + test the whole analyzer pipeline for a guild with 1 month period + and use run_once method with empty analytics available + """ + # first create the collections + guildId = "1234" + db_access = launch_db_access(guildId) + + acc_id = [ + "973993299281076285", + "973993299281076286", + ] + + setup_db_guild(db_access, guildId, discordId_list=acc_id, days_ago_period=30) + + db_access.db_mongo_client[guildId].create_collection("heatmaps") + db_access.db_mongo_client[guildId].create_collection("memberactivities") + + # generating rawinfo samples + rawinfo_samples = [] + + # generating random rawinfo data + # 24 * 30 = 720 + for i in range(720): + sample = { + "type": 19, + "author": np.random.choice(acc_id), + "content": f"test{i}", + "user_mentions": [], + "role_mentions": [], + "reactions": [], + "replied_user": np.random.choice(acc_id), + "createdDate": (datetime.now() - timedelta(hours=i)), + "messageId": f"11188143219343360{i}", + "channelId": "1020707129214111827", + "channelName": "general", + "threadId": None, + "threadName": None, + } + rawinfo_samples.append(sample) + + db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) + + analyzer = setup_analyzer() + analyzer.run_once(guildId=guildId) + + memberactivities_cursor = db_access.query_db_find( + "memberactivities", {}, sorting=("date", -1) + ) + memberactivities_data = list(memberactivities_cursor) + + yesterday = (datetime.now() - timedelta(days=1)).replace( + hour=0, minute=0, second=0, microsecond=0 + ) + + assert len(memberactivities_data) == 24 + assert memberactivities_data[0]["date"] == yesterday.isoformat() + # yesterday is `-1` day and so + # we would use 23 days ago rather than 24 + document_start_date = yesterday - timedelta(days=23) + assert memberactivities_data[-1]["date"] == (document_start_date).isoformat() + + heatmaps_cursor = db_access.query_db_find("heatmaps", {}, sorting=("date", -1)) + heatmaps_data = list(heatmaps_cursor) + + # 30 days, multiplied with 2 + # (accounts are: "973993299281076285", "973993299281076286") + assert len(heatmaps_data) == 60 + # checking first and last document + assert heatmaps_data[0]["date"] == yesterday.strftime("%Y-%m-%d") + month_ago = yesterday - timedelta(29) + assert heatmaps_data[-1]["date"] == month_ago.strftime("%Y-%m-%d") diff --git a/tests/integration/test_analyzer_period_week_recompute_available_analytics.py b/tests/integration/test_analyzer_period_week_recompute_available_analytics.py new file mode 100644 index 0000000..ab29b80 --- /dev/null +++ b/tests/integration/test_analyzer_period_week_recompute_available_analytics.py @@ -0,0 +1,114 @@ +# test analyzing memberactivities +from datetime import datetime, timedelta + +import numpy as np + +from .utils.analyzer_setup import launch_db_access, setup_analyzer +from .utils.mock_heatmaps import create_empty_heatmaps_data +from .utils.mock_memberactivities import create_empty_memberactivities_data +from .utils.remove_and_setup_guild import setup_db_guild + + +def test_analyzer_week_period_recompute_available_analytics(): + """ + We're assuming our period was 7 days and + analytics was done for 1 day and we're continuing the analytics today + and use run_once method with empty analytics available + """ + # first create the collections + guildId = "1234" + db_access = launch_db_access(guildId) + + acc_id = [ + "973993299281076285", + "973993299281076286", + ] + setup_db_guild(db_access, guildId, discordId_list=acc_id, days_ago_period=8) + + db_access.db_mongo_client[guildId].create_collection("heatmaps") + db_access.db_mongo_client[guildId].create_collection("memberactivities") + + # filling memberactivities with some data + memberactivity_data = create_empty_memberactivities_data( + datetime.now() - timedelta(days=2), count=1 + ) + db_access.db_mongo_client[guildId]["memberactivities"].insert_many( + memberactivity_data + ) + + # filling heatmaps with some data + heatmaps_data = create_empty_heatmaps_data( + datetime.now() - timedelta(days=7), count=1 + ) + db_access.db_mongo_client[guildId]["heatmaps"].insert_many(heatmaps_data) + + # generating rawinfo samples + rawinfo_samples = [] + + # generating random rawinfo data + # 24 hour * 7 days + for i in range(168): + sample = { + "type": 19, + "author": np.random.choice(acc_id), + "content": f"test{i}", + "user_mentions": [], + "role_mentions": [], + "reactions": [], + "replied_user": np.random.choice(acc_id), + "createdDate": (datetime.now() - timedelta(hours=i)), + "messageId": f"11188143219343360{i}", + "channelId": "1020707129214111827", + "channelName": "general", + "threadId": None, + "threadName": None, + } + rawinfo_samples.append(sample) + + db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) + + analyzer = setup_analyzer() + analyzer.recompute_analytics(guildId=guildId) + + memberactivities_cursor = db_access.query_db_find("memberactivities", {}) + memberactivities_data = list(memberactivities_cursor) + yesterday = (datetime.now() - timedelta(days=1)).replace( + hour=0, minute=0, second=0, microsecond=0 + ) + + print("memberactivities_data: ", memberactivities_data) + + memberactivities_expected_dates = [ + yesterday.isoformat(), + (yesterday - timedelta(days=1)).isoformat(), + ] + + # two documents in memberactivities + assert len(memberactivities_data) == 2 + for document in memberactivities_data: + assert document["date"] in memberactivities_expected_dates + + heatmaps_cursor = db_access.query_db_find( + "heatmaps", {}, feature_projection=None, sorting=("yesterday", -1) + ) + heatmaps_data = list(heatmaps_cursor) + + print("heatmaps_data: ", heatmaps_data) + + heatmaps_expected_dates = [ + yesterday.strftime("%Y-%m-%d"), + (yesterday - timedelta(days=1)).strftime("%Y-%m-%d"), + (yesterday - timedelta(days=2)).strftime("%Y-%m-%d"), + (yesterday - timedelta(days=3)).strftime("%Y-%m-%d"), + (yesterday - timedelta(days=4)).strftime("%Y-%m-%d"), + (yesterday - timedelta(days=5)).strftime("%Y-%m-%d"), + (yesterday - timedelta(days=6)).strftime("%Y-%m-%d"), + (yesterday - timedelta(days=7)).strftime("%Y-%m-%d"), + ] + # 7 days, multiplied with 2 + # (accounts are: "973993299281076285", "973993299281076286") + assert len(heatmaps_data) == 14 + # last document must be for yesterday + + for document in heatmaps_data: + document["date"] in heatmaps_expected_dates diff --git a/tests/integration/test_analyzer_period_week_recompute_empty_analytics.py b/tests/integration/test_analyzer_period_week_recompute_empty_analytics.py new file mode 100644 index 0000000..157d042 --- /dev/null +++ b/tests/integration/test_analyzer_period_week_recompute_empty_analytics.py @@ -0,0 +1,77 @@ +# test analyzing memberactivities +from datetime import datetime, timedelta + +import numpy as np + +from .utils.analyzer_setup import launch_db_access, setup_analyzer +from .utils.remove_and_setup_guild import setup_db_guild + + +def test_analyzer_week_period_recompute_empty_analytics(): + """ + test the whole analyzer pipeline for a guild with a week period + and use run_once method with empty analytics available + """ + # first create the collections + guildId = "1234" + db_access = launch_db_access(guildId) + + acc_id = [ + "973993299281076285", + "973993299281076286", + ] + + setup_db_guild(db_access, guildId, discordId_list=acc_id, days_ago_period=7) + + db_access.db_mongo_client[guildId].create_collection("heatmaps") + db_access.db_mongo_client[guildId].create_collection("memberactivities") + + # generating rawinfo samples + rawinfo_samples = [] + + # generating random rawinfo data + # 24 hour * 7 days + for i in range(168): + sample = { + "type": 19, + "author": np.random.choice(acc_id), + "content": f"test{i}", + "user_mentions": [], + "role_mentions": [], + "reactions": [], + "replied_user": np.random.choice(acc_id), + "createdDate": (datetime.now() - timedelta(hours=i)), + "messageId": f"11188143219343360{i}", + "channelId": "1020707129214111827", + "channelName": "general", + "threadId": None, + "threadName": None, + } + rawinfo_samples.append(sample) + + db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) + + analyzer = setup_analyzer() + analyzer.recompute_analytics(guildId=guildId) + + memberactivities_cursor = db_access.db_mongo_client[guildId][ + "memberactivities" + ].find({}) + memberactivities_data = list(memberactivities_cursor) + + date = (datetime.now() - timedelta(days=1)).replace( + hour=0, minute=0, second=0, microsecond=0 + ) + + # just one document in memberactivities + assert len(memberactivities_data) == 1 + assert memberactivities_data[0]["date"] == date.isoformat() + + heatmaps_cursor = db_access.db_mongo_client[guildId]["heatmaps"].find({}) + heatmaps_data = list(heatmaps_cursor) + + # 7 days, multiplied with 2 + # (accounts are: "973993299281076285", "remainder") + assert len(heatmaps_data) == 14 + # last document must be for yesterday + assert heatmaps_data[-1]["date"] == date.strftime("%Y-%m-%d") diff --git a/tests/integration/test_analyzer_period_week_run_once_available_analytics.py b/tests/integration/test_analyzer_period_week_run_once_available_analytics.py new file mode 100644 index 0000000..29e1346 --- /dev/null +++ b/tests/integration/test_analyzer_period_week_run_once_available_analytics.py @@ -0,0 +1,124 @@ +# test analyzing memberactivities +from datetime import datetime, timedelta + +import numpy as np + +from .utils.analyzer_setup import launch_db_access, setup_analyzer +from .utils.mock_heatmaps import create_empty_heatmaps_data +from .utils.mock_memberactivities import create_empty_memberactivities_data +from .utils.remove_and_setup_guild import setup_db_guild + + +def test_analyzer_week_period_run_once_available_analytics(): + """ + We're assuming our period was 7 days and + analytics was done for 1 day and we're continuing the analytics today + and use run_once method with empty analytics available + """ + # first create the collections + guildId = "1234" + db_access = launch_db_access(guildId) + + acc_id = [ + "973993299281076285", + "973993299281076286", + ] + setup_db_guild(db_access, guildId, discordId_list=acc_id, days_ago_period=8) + + db_access.db_mongo_client[guildId].create_collection("heatmaps") + db_access.db_mongo_client[guildId].create_collection("memberactivities") + + # filling memberactivities with some data + memberactivity_data = create_empty_memberactivities_data( + datetime.now() - timedelta(days=2), count=1 + ) + db_access.db_mongo_client[guildId]["memberactivities"].insert_many( + memberactivity_data + ) + + # filling heatmaps with some data + heatmaps_data = create_empty_heatmaps_data( + datetime.now() - timedelta(days=7), count=1 + ) + db_access.db_mongo_client[guildId]["heatmaps"].insert_many(heatmaps_data) + + # generating rawinfo samples + rawinfo_samples = [] + + # generating random rawinfo data + for i in range(150): + sample = { + "type": 19, + "author": np.random.choice(acc_id), + "content": f"test{i}", + "user_mentions": [], + "role_mentions": [], + "reactions": [], + "replied_user": np.random.choice(acc_id), + "createdDate": (datetime.now() - timedelta(hours=i)), + "messageId": f"11188143219343360{i}", + "channelId": "1020707129214111827", + "channelName": "general", + "threadId": None, + "threadName": None, + } + rawinfo_samples.append(sample) + + db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) + + analyzer = setup_analyzer() + analyzer.run_once(guildId=guildId) + + memberactivities_cursor = db_access.query_db_find( + "memberactivities", {}, sorting=("date", -1) + ) + memberactivities_data = list(memberactivities_cursor) + yesterday = (datetime.now() - timedelta(days=1)).replace( + hour=0, minute=0, second=0, microsecond=0 + ) + + print("memberactivities_data: ", memberactivities_data) + + memberactivities_expected_dates = [ + yesterday.isoformat(), + (yesterday - timedelta(days=1)).isoformat(), + ] + + # two documents in memberactivities + assert len(memberactivities_data) == 2 + data = zip(memberactivities_expected_dates, memberactivities_data) + for date, document in data: + print("memberactivities_data Looping: ", date) + assert document["date"] == date + + heatmaps_cursor = db_access.query_db_find("heatmaps", {}, sorting=("date", -1)) + heatmaps_data = list(heatmaps_cursor) + + print("heatmaps_data: ", heatmaps_data) + + heatmaps_expected_dates = [ + yesterday.strftime("%Y-%m-%d"), + yesterday.strftime("%Y-%m-%d"), + (yesterday - timedelta(days=1)).strftime("%Y-%m-%d"), + (yesterday - timedelta(days=1)).strftime("%Y-%m-%d"), + (yesterday - timedelta(days=2)).strftime("%Y-%m-%d"), + (yesterday - timedelta(days=2)).strftime("%Y-%m-%d"), + (yesterday - timedelta(days=3)).strftime("%Y-%m-%d"), + (yesterday - timedelta(days=3)).strftime("%Y-%m-%d"), + (yesterday - timedelta(days=4)).strftime("%Y-%m-%d"), + (yesterday - timedelta(days=4)).strftime("%Y-%m-%d"), + (yesterday - timedelta(days=5)).strftime("%Y-%m-%d"), + (yesterday - timedelta(days=5)).strftime("%Y-%m-%d"), + (yesterday - timedelta(days=6)).strftime("%Y-%m-%d"), + (yesterday - timedelta(days=6)).strftime("%Y-%m-%d"), + (yesterday - timedelta(days=7)).strftime("%Y-%m-%d"), + ] + # 6 days, multiplied with 2 + # (accounts are: "973993299281076285", "973993299281076286") + # plust 1 document we added manually + assert len(heatmaps_data) == 13 + # last document must be for yesterday + data = zip(heatmaps_expected_dates, heatmaps_data) + for date, document in data: + print("heatmaps_data Looping: ", date) + assert document["date"] == date diff --git a/tests/integration/test_analyzer_period_week_run_once_empty_analytics.py b/tests/integration/test_analyzer_period_week_run_once_empty_analytics.py new file mode 100644 index 0000000..4ca0941 --- /dev/null +++ b/tests/integration/test_analyzer_period_week_run_once_empty_analytics.py @@ -0,0 +1,107 @@ +# test analyzing memberactivities +from datetime import datetime, timedelta + +import numpy as np + +from .utils.analyzer_setup import launch_db_access, setup_analyzer +from .utils.remove_and_setup_guild import setup_db_guild + + +def test_analyzer_week_period_run_once_empty_analytics(): + """ + test the whole analyzer pipeline for a guild with a week period + and use run_once method with empty analytics available + """ + # first create the collections + guildId = "1234" + db_access = launch_db_access(guildId) + + acc_id = [ + "973993299281076285", + "973993299281076286", + ] + + setup_db_guild(db_access, guildId, discordId_list=acc_id, days_ago_period=7) + + db_access.db_mongo_client[guildId].create_collection("heatmaps") + db_access.db_mongo_client[guildId].create_collection("memberactivities") + + # generating rawinfo samples + rawinfo_samples = [] + + # generating random rawinfo data + for i in range(150): + sample = { + "type": 19, + "author": np.random.choice(acc_id), + "content": f"test{i}", + "user_mentions": [], + "role_mentions": [], + "reactions": [], + "replied_user": np.random.choice(acc_id), + "createdDate": (datetime.now() - timedelta(hours=i)), + "messageId": f"11188143219343360{i}", + "channelId": "1020707129214111827", + "channelName": "general", + "threadId": None, + "threadName": None, + } + rawinfo_samples.append(sample) + + db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) + + analyzer = setup_analyzer() + analyzer.run_once(guildId=guildId) + + memberactivities_cursor = db_access.query_db_find( + "memberactivities", {}, sorting=("date", -1) + ) + memberactivities_data = list(memberactivities_cursor) + yesterday = (datetime.now() - timedelta(days=1)).replace( + hour=0, minute=0, second=0, microsecond=0 + ) + + print("memberactivities_data: ", memberactivities_data) + + memberactivities_expected_dates = [ + yesterday.isoformat(), + # (yesterday - timedelta(days=1)).isoformat() + ] + + # two documents in memberactivities + assert len(memberactivities_data) == 1 + data = zip(memberactivities_expected_dates, memberactivities_data) + for date, document in data: + print("memberactivities_data Looping: ", date) + assert document["date"] == date + + heatmaps_cursor = db_access.query_db_find("heatmaps", {}, sorting=("date", -1)) + heatmaps_data = list(heatmaps_cursor) + + print("heatmaps_data: ", heatmaps_data) + + heatmaps_expected_dates = [ + yesterday.strftime("%Y-%m-%d"), + yesterday.strftime("%Y-%m-%d"), + (yesterday - timedelta(days=1)).strftime("%Y-%m-%d"), + (yesterday - timedelta(days=1)).strftime("%Y-%m-%d"), + (yesterday - timedelta(days=2)).strftime("%Y-%m-%d"), + (yesterday - timedelta(days=2)).strftime("%Y-%m-%d"), + (yesterday - timedelta(days=3)).strftime("%Y-%m-%d"), + (yesterday - timedelta(days=3)).strftime("%Y-%m-%d"), + (yesterday - timedelta(days=4)).strftime("%Y-%m-%d"), + (yesterday - timedelta(days=4)).strftime("%Y-%m-%d"), + (yesterday - timedelta(days=5)).strftime("%Y-%m-%d"), + (yesterday - timedelta(days=5)).strftime("%Y-%m-%d"), + (yesterday - timedelta(days=6)).strftime("%Y-%m-%d"), + (yesterday - timedelta(days=6)).strftime("%Y-%m-%d"), + # (yesterday - timedelta(days=7)).strftime("%Y-%m-%d"), + ] + # 6 days, multiplied with 2 + # (accounts are: "973993299281076285", "973993299281076286") + assert len(heatmaps_data) == 12 + # last document must be for yesterday + data = zip(heatmaps_expected_dates, heatmaps_data) + for date, document in data: + print("heatmaps_data Looping: ", date) + assert document["date"] == date diff --git a/tests/integration/test_decentralization_score.py b/tests/integration/test_decentralization_score.py new file mode 100644 index 0000000..80a1c93 --- /dev/null +++ b/tests/integration/test_decentralization_score.py @@ -0,0 +1,72 @@ +# the nodes of the graph are partially connected +from discord_analyzer.analysis.neo4j_analysis.centrality import Centerality + +from .utils.neo4j_conn import neo4j_setup + + +def test_decentralization_score(): + """ + 5 nodes partially connected + using two dates: 166 and 167 + + To see more info for this test: + https://miro.com/app/board/uXjVM7GdYqo=/?moveToWidget=3458764558210553321&cot=14 + """ + guildId = "1234" + neo4j_ops = neo4j_setup() + + centrality = Centerality(neo4j_ops) + # deleting all data + neo4j_ops.gds.run_cypher("MATCH (n) DETACH DELETE (n)") + + # timestamps + today = 1689280200.0 + yesterday = 1689193800.0 + + # creating some nodes with data + neo4j_ops.gds.run_cypher( + f""" + CREATE (a:DiscordAccount) -[:IS_MEMBER]->(g:Guild {{guildId: '{guildId}'}}) + CREATE (b:DiscordAccount) -[:IS_MEMBER]->(g) + CREATE (c:DiscordAccount) -[:IS_MEMBER]->(g) + CREATE (d:DiscordAccount) -[:IS_MEMBER]->(g) + CREATE (e:DiscordAccount) -[:IS_MEMBER]->(g) + SET a.userId = "1000" + SET b.userId = "1001" + SET c.userId = "1002" + SET d.userId = "1003" + SET e.userId = "1004" + MERGE (a) -[r:INTERACTED_WITH {{date: {yesterday}, weight: 1}}]->(b) + MERGE (a) -[r2:INTERACTED_WITH {{date: {today}, weight: 2}}]->(b) + MERGE (a) -[r3:INTERACTED_WITH {{date: {yesterday}, weight: 3}}]->(d) + MERGE (c) -[r4:INTERACTED_WITH {{date: {yesterday}, weight: 2}}]->(b) + MERGE (c) -[r5:INTERACTED_WITH {{date: {today}, weight: 1}}]->(b) + MERGE (c) -[r6:INTERACTED_WITH {{date: {yesterday}, weight: 2}}]->(d) + MERGE (d) -[r7:INTERACTED_WITH {{date: {yesterday}, weight: 1}}]->(b) + MERGE (c) -[r8:INTERACTED_WITH {{date: {today}, weight: 2}}]->(a) + MERGE (d) -[r9:INTERACTED_WITH {{date: {today}, weight: 1}}]->(c) + MERGE (b) -[r10:INTERACTED_WITH {{date: {today}, weight: 2}}]->(d) + MERGE (d) -[r11:INTERACTED_WITH {{date: {today}, weight: 1}}]->(c) + MERGE (e) -[r12:INTERACTED_WITH {{date: {today}, weight: 3}}]->(b) + SET r.guildId = '{guildId}' + SET r2.guildId = '{guildId}' + SET r3.guildId = '{guildId}' + SET r4.guildId = '{guildId}' + SET r5.guildId = '{guildId}' + SET r6.guildId = '{guildId}' + SET r7.guildId = '{guildId}' + SET r8.guildId = '{guildId}' + SET r9.guildId = '{guildId}' + SET r10.guildId = '{guildId}' + SET r11.guildId = '{guildId}' + SET r12.guildId = '{guildId}' + """ + ) + + network_decentrality = centrality.compute_network_decentrality( + guildId=guildId, from_start=True, save=True + ) + + # because python is not good with equality comparison of float values + assert network_decentrality[yesterday] - 133.33 < 0.1 + assert network_decentrality[today] - 66.66 < 0.1 diff --git a/tests/integration/test_degree_centrality_multiple_guilds.py b/tests/integration/test_degree_centrality_multiple_guilds.py new file mode 100644 index 0000000..432cee9 --- /dev/null +++ b/tests/integration/test_degree_centrality_multiple_guilds.py @@ -0,0 +1,98 @@ +# we have nodes of a community is connected to another one +# meaning we have nodes available in more than one community +from discord_analyzer.analysis.neo4j_analysis.centrality import Centerality + +from .utils.neo4j_conn import neo4j_setup + + +def test_multiple_guilds(): + """ + 5 nodes connected to guild 1234 + 2 nodes conected to guild 1235 + using two dates: 166 and 167 + + We do not have to have the result of guild 1234 in guild 1235 and vice versa + To see more info for this test: + https://miro.com/app/board/uXjVM7GdYqo=/?share_link_id=105382864070 + """ + guildId = "1234" + neo4j_ops = neo4j_setup() + + # deleting all data + neo4j_ops.gds.run_cypher("MATCH (n) DETACH DELETE (n)") + + # timestamps + today = 1689280200.0 + yesterday = 1689193800.0 + + guildId = "1234" + guildId2 = "1235" + + # creating some nodes with data + neo4j_ops.gds.run_cypher( + f""" + CREATE (a:DiscordAccount) -[:IS_MEMBER]->(g:Guild {{guildId: '{guildId}'}}) + CREATE (b:DiscordAccount) -[:IS_MEMBER]->(g) + CREATE (c:DiscordAccount) -[:IS_MEMBER]->(g) + CREATE (d:DiscordAccount) -[:IS_MEMBER]->(g) + CREATE (e:DiscordAccount) -[:IS_MEMBER]->(g) + CREATE (f2:DiscordAccount) + -[:IS_MEMBER]->(guild2:Guild {{guildId: '{guildId2}'}}) + CREATE (g2:DiscordAccount) -[:IS_MEMBER]->(guild2) + SET a.userId = "1000" + SET b.userId = "1001" + SET c.userId = "1002" + SET d.userId = "1003" + SET e.userId = "1004" + SET f2.userId = "1005" + SET g2.userId = "1006" + MERGE (a) -[r:INTERACTED_WITH {{date: {yesterday}, weight: 1}}]->(b) + MERGE (a) -[r2:INTERACTED_WITH {{date: {today}, weight: 2}}]->(b) + MERGE (a) -[r3:INTERACTED_WITH {{date: {yesterday}, weight: 3}}]->(d) + MERGE (c) -[r4:INTERACTED_WITH {{date: {yesterday}, weight: 2}}]->(b) + MERGE (c) -[r5:INTERACTED_WITH {{date: {today}, weight: 1}}]->(b) + MERGE (c) -[r6:INTERACTED_WITH {{date: {yesterday}, weight: 2}}]->(d) + MERGE (d) -[r7:INTERACTED_WITH {{date: {yesterday}, weight: 1}}]->(b) + MERGE (c) -[r8:INTERACTED_WITH {{date: {today}, weight: 2}}]->(a) + MERGE (d) -[r9:INTERACTED_WITH {{date: {today}, weight: 1}}]->(c) + MERGE (b) -[r10:INTERACTED_WITH {{date: {today}, weight: 2}}]->(d) + MERGE (d) -[r11:INTERACTED_WITH {{date: {today}, weight: 1}}]->(c) + MERGE (e) -[r12:INTERACTED_WITH {{date: {today}, weight: 3}}]->(b) + MERGE (f2) -[r13:INTERACTED_WITH {{date: {yesterday}, weight: 3}}]->(g2) + MERGE (g2) -[r14:INTERACTED_WITH {{date: {yesterday}, weight: 3}}]->(f2) + SET r.guildId = '{guildId}' + SET r2.guildId = '{guildId}' + SET r3.guildId = '{guildId}' + SET r4.guildId = '{guildId}' + SET r5.guildId = '{guildId}' + SET r6.guildId = '{guildId}' + SET r7.guildId = '{guildId}' + SET r8.guildId = '{guildId}' + SET r9.guildId = '{guildId}' + SET r10.guildId = '{guildId}' + SET r11.guildId = '{guildId}' + SET r12.guildId = '{guildId}' + SET r13.guildId = '{guildId2}' + SET r14.guildId = '{guildId2}' + """ + ) + centrality = Centerality(neo4j_ops) + degree_centrality = centrality.compute_degree_centerality( + guildId=guildId2, + direction="undirected", + normalize=True, + weighted=False, + preserve_parallel=True, + from_start=True, + ) + print("degree_centrality: ", degree_centrality) + + assert today not in degree_centrality + assert "1000" not in degree_centrality[yesterday] + assert "1001" not in degree_centrality[yesterday] + assert "1002" not in degree_centrality[yesterday] + assert "1003" not in degree_centrality[yesterday] + assert "1004" not in degree_centrality[yesterday] + + assert "1005" in degree_centrality[yesterday] + assert "1006" in degree_centrality[yesterday] diff --git a/tests/integration/test_degree_centrality_multiple_guilds_preserve_parallel.py b/tests/integration/test_degree_centrality_multiple_guilds_preserve_parallel.py new file mode 100644 index 0000000..2de197d --- /dev/null +++ b/tests/integration/test_degree_centrality_multiple_guilds_preserve_parallel.py @@ -0,0 +1,94 @@ +# we have nodes of a community is connected to another one +# meaning we have nodes available in more than one community +from discord_analyzer.analysis.neo4j_analysis.centrality import Centerality + +from .utils.neo4j_conn import neo4j_setup + + +def test_multiple_guilds_preserve_parallel(): + """ + 5 nodes connected to guild 1234 + 2 nodes conected to guild 1235 + using two dates: 166 and 167 + + We do not have to have the result of guild 1234 in guild 1235 and vice versa + To see more info for this test: + https://miro.com/app/board/uXjVM7GdYqo=/?share_link_id=105382864070 + """ + guildId = "1234" + neo4j_ops = neo4j_setup() + + centrality = Centerality(neo4j_ops) + # deleting all data + neo4j_ops.gds.run_cypher("MATCH (n) DETACH DELETE (n)") + + # timestamps + today = 1689280200.0 + yesterday = 1689193800.0 + + guildId = "1234" + guildId2 = "1235" + + # creating some nodes with data + neo4j_ops.gds.run_cypher( + f""" + CREATE (a:DiscordAccount) -[:IS_MEMBER]->(g:Guild {{guildId: '{guildId}'}}) + CREATE (b:DiscordAccount) -[:IS_MEMBER]->(g) + CREATE (c:DiscordAccount) -[:IS_MEMBER]->(g) + CREATE (d:DiscordAccount) -[:IS_MEMBER]->(g) + CREATE (e:DiscordAccount) -[:IS_MEMBER]->(g) + CREATE (f2:DiscordAccount) + -[:IS_MEMBER]->(guild2:Guild {{guildId: '{guildId2}'}}) + CREATE (g2:DiscordAccount) -[:IS_MEMBER]->(guild2) + SET a.userId = "1000" + SET b.userId = "1001" + SET c.userId = "1002" + SET d.userId = "1003" + SET e.userId = "1004" + SET f2.userId = "1005" + SET g2.userId = "1006" + MERGE (a) -[r:INTERACTED_WITH {{date: {yesterday}, weight: 1}}]->(b) + MERGE (a) -[r2:INTERACTED_WITH {{date: {today}, weight: 2}}]->(b) + MERGE (a) -[r3:INTERACTED_WITH {{date: {yesterday}, weight: 3}}]->(d) + MERGE (c) -[r4:INTERACTED_WITH {{date: {yesterday}, weight: 2}}]->(b) + MERGE (c) -[r5:INTERACTED_WITH {{date: {today}, weight: 1}}]->(b) + MERGE (c) -[r6:INTERACTED_WITH {{date: {yesterday}, weight: 2}}]->(d) + MERGE (d) -[r7:INTERACTED_WITH {{date: {yesterday}, weight: 1}}]->(b) + MERGE (c) -[r8:INTERACTED_WITH {{date: {today}, weight: 2}}]->(a) + MERGE (d) -[r9:INTERACTED_WITH {{date: {today}, weight: 1}}]->(c) + MERGE (b) -[r10:INTERACTED_WITH {{date: {today}, weight: 2}}]->(d) + MERGE (d) -[r11:INTERACTED_WITH {{date: {today}, weight: 1}}]->(c) + MERGE (e) -[r12:INTERACTED_WITH {{date: {today}, weight: 3}}]->(b) + MERGE (f2) -[r13:INTERACTED_WITH {{date: {yesterday}, weight: 3}}]->(g2) + MERGE (g2) -[r14:INTERACTED_WITH {{date: {yesterday}, weight: 3}}]->(f2) + SET r.guildId = '{guildId}' + SET r2.guildId = '{guildId}' + SET r3.guildId = '{guildId}' + SET r4.guildId = '{guildId}' + SET r5.guildId = '{guildId}' + SET r6.guildId = '{guildId}' + SET r7.guildId = '{guildId}' + SET r8.guildId = '{guildId}' + SET r9.guildId = '{guildId}' + SET r10.guildId = '{guildId}' + SET r11.guildId = '{guildId}' + SET r12.guildId = '{guildId}' + SET r13.guildId = '{guildId2}' + SET r14.guildId = '{guildId2}' + """ + ) + degree_centrality = centrality.compute_degree_centerality( + guildId=guildId2, + direction="undirected", + normalize=False, + weighted=False, + preserve_parallel=True, + from_start=True, + ) + print("degree_centrality: ", degree_centrality) + + assert "1005" in degree_centrality[yesterday] + assert "1006" in degree_centrality[yesterday] + + assert degree_centrality[yesterday]["1005"] == 2 + assert degree_centrality[yesterday]["1006"] == 2 diff --git a/tests/integration/test_degree_centrality_parallel_preservation.py b/tests/integration/test_degree_centrality_parallel_preservation.py new file mode 100644 index 0000000..af93399 --- /dev/null +++ b/tests/integration/test_degree_centrality_parallel_preservation.py @@ -0,0 +1,91 @@ +# the nodes of the graph are partially connected +from discord_analyzer.analysis.neo4j_analysis.centrality import Centerality + +from .utils.neo4j_conn import neo4j_setup + + +def test_partially_connected_coeffs(): + """ + 5 nodes partially connected + using two dates: 166 and 167 + + To see more info for this test: + https://miro.com/app/board/uXjVM7GdYqo=/?share_link_id=105382864070 + """ + guildId = "1234" + neo4j_ops = neo4j_setup() + + # deleting all data + neo4j_ops.gds.run_cypher("MATCH (n) DETACH DELETE (n)") + + # timestamps + today = 1689280200.0 + yesterday = 1689193800.0 + + guildId = "1234" + + # creating some nodes with data + neo4j_ops.gds.run_cypher( + f""" + CREATE (a:DiscordAccount) -[:IS_MEMBER]->(g:Guild {{guildId: '{guildId}'}}) + CREATE (b:DiscordAccount) -[:IS_MEMBER]->(g) + CREATE (c:DiscordAccount) -[:IS_MEMBER]->(g) + CREATE (d:DiscordAccount) -[:IS_MEMBER]->(g) + CREATE (e:DiscordAccount) -[:IS_MEMBER]->(g) + SET a.userId = "1000" + SET b.userId = "1001" + SET c.userId = "1002" + SET d.userId = "1003" + SET e.userId = "1004" + MERGE (a) -[r:INTERACTED_WITH {{date: {yesterday}, weight: 1}}]->(b) + MERGE (a) -[r2:INTERACTED_WITH {{date: {today}, weight: 2}}]->(b) + MERGE (a) -[r3:INTERACTED_WITH {{date: {yesterday}, weight: 3}}]->(d) + MERGE (c) -[r4:INTERACTED_WITH {{date: {yesterday}, weight: 2}}]->(b) + MERGE (c) -[r5:INTERACTED_WITH {{date: {today}, weight: 1}}]->(b) + MERGE (c) -[r6:INTERACTED_WITH {{date: {yesterday}, weight: 2}}]->(d) + MERGE (d) -[r7:INTERACTED_WITH {{date: {yesterday}, weight: 1}}]->(b) + MERGE (c) -[r8:INTERACTED_WITH {{date: {today}, weight: 2}}]->(a) + MERGE (d) -[r9:INTERACTED_WITH {{date: {today}, weight: 1}}]->(c) + MERGE (b) -[r10:INTERACTED_WITH {{date: {today}, weight: 2}}]->(d) + MERGE (d) -[r11:INTERACTED_WITH {{date: {today}, weight: 1}}]->(c) + MERGE (e) -[r12:INTERACTED_WITH {{date: {today}, weight: 3}}]->(b) + + SET r.guildId = '{guildId}' + SET r2.guildId = '{guildId}' + SET r3.guildId = '{guildId}' + SET r4.guildId = '{guildId}' + SET r5.guildId = '{guildId}' + SET r6.guildId = '{guildId}' + SET r7.guildId = '{guildId}' + SET r8.guildId = '{guildId}' + SET r9.guildId = '{guildId}' + SET r10.guildId = '{guildId}' + SET r11.guildId = '{guildId}' + SET r12.guildId = '{guildId}' + """ + ) + centrality = Centerality(neo4j_ops) + degree_centrality = centrality.compute_degree_centerality( + guildId=guildId, + direction="undirected", + normalize=True, + weighted=False, + preserve_parallel=False, + from_start=True, + ) + + print(degree_centrality) + assert degree_centrality[yesterday]["1000"] == 2 / 3 + assert degree_centrality[today]["1000"] == 1 / 2 + + assert degree_centrality[yesterday]["1001"] == 1 + assert degree_centrality[today]["1001"] == 1 + + assert degree_centrality[yesterday]["1002"] == 2 / 3 + assert degree_centrality[today]["1002"] == 3 / 4 + + assert degree_centrality[yesterday]["1003"] == 1 + assert degree_centrality[today]["1003"] == 1 / 2 + + assert "1004" not in degree_centrality[yesterday] + assert degree_centrality[today]["1004"] == 1 / 4 diff --git a/tests/integration/test_exclude_bots.py b/tests/integration/test_exclude_bots.py new file mode 100644 index 0000000..874a8d2 --- /dev/null +++ b/tests/integration/test_exclude_bots.py @@ -0,0 +1,90 @@ +from datetime import datetime, timedelta + +import numpy as np + +from .utils.analyzer_setup import launch_db_access, setup_analyzer +from .utils.remove_and_setup_guild import setup_db_guild + + +def test_excluding_bots_heatmaps(): + """ + test if we're excluding bots from analyzer pipeline + """ + guildId = "1234567" + db_access = launch_db_access(guildId) + + acc_id = [ + "user0", + "user1", + "user2", + "user3", + "bot0", + "bot1", + "bot2", + ] + acc_isbots = [False, False, False, False, True, True, True] + + # A guild connected at 35 days ago + connected_days_before = 35 + setup_db_guild( + db_access, + guildId, + discordId_list=acc_id, + discordId_isbot=acc_isbots, + days_ago_period=connected_days_before, + ) + window_start_date = datetime.now() - timedelta(days=connected_days_before) + + db_access.db_mongo_client[guildId].create_collection("heatmaps") + db_access.db_mongo_client[guildId].create_collection("memberactivities") + + # generating rawinfo samples + rawinfo_samples = [] + + # 24 hours + # 30 days + # 24 * 30 + for i in range(720): + sample = { + "type": 19, + "author": acc_id[i % len(acc_id)], + "content": f"test{i}", + "user_mentions": [], + "role_mentions": [], + "reactions": [], + "replied_user": np.random.choice(acc_id), + "createdDate": (datetime.now() - timedelta(hours=i)), + "messageId": f"11188143219343360{i}", + "channelId": "1020707129214111827", + "channelName": "general", + "threadId": None, + "threadName": None, + } + rawinfo_samples.append(sample) + + db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) + + analyzer = setup_analyzer() + analyzer.run_once(guildId=guildId) + + db_access.db_mongo_client[guildId] + + pipeline = [ + # Filter documents based on date + {"$match": {"date": {"$gte": window_start_date.strftime("%Y-%m-%d")}}}, + {"$group": {"_id": "$account_name"}}, + { + "$group": { + "_id": None, + "uniqueAccounts": {"$push": "$_id"}, + } + }, + ] + result = list(db_access.db_mongo_client[guildId]["heatmaps"].aggregate(pipeline)) + + print(result[0]["uniqueAccounts"]) + print(f"np.array(acc_id)[acc_isbots]: {np.array(acc_id)[acc_isbots]}") + + # checking if the bots are not included in heatmaps + for account_name in result[0]["uniqueAccounts"]: + assert account_name not in np.array(acc_id)[acc_isbots] diff --git a/tests/integration/test_fragmentation_score.py b/tests/integration/test_fragmentation_score.py new file mode 100644 index 0000000..a81b135 --- /dev/null +++ b/tests/integration/test_fragmentation_score.py @@ -0,0 +1,86 @@ +from datetime import datetime, timedelta + +from discord_analyzer.analyzer.neo4j_analytics import Neo4JAnalytics + +from .utils.neo4j_conn import neo4j_setup + + +def test_avg_clustering_coeff(): + """ + test scaling of the avgClusteringCoefficient (a.k.a fragmentation score) + """ + neo4j_ops = neo4j_setup() + + neo4j_analytics = Neo4JAnalytics(neo4j_ops) + # deleting all data + neo4j_ops.gds.run_cypher("MATCH (n) DETACH DELETE (n)") + + # timestamps + today = 1689280200.0 + yesterday = 1689193800.0 + past_window_date = ( + datetime.fromtimestamp(yesterday) - timedelta(days=1) + ).timestamp() + + guildId = "1234" + + # creating some nodes with data + neo4j_ops.gds.run_cypher( + f""" + CREATE (a:DiscordAccount) -[:IS_MEMBER]->(g:Guild {{guildId: '{guildId}'}}) + CREATE (b:DiscordAccount) -[:IS_MEMBER]->(g) + CREATE (c:DiscordAccount) -[:IS_MEMBER]->(g) + CREATE (d:DiscordAccount) -[:IS_MEMBER]->(g) + CREATE (e:DiscordAccount) -[:IS_MEMBER]->(g) + SET a.userId = "1000" + SET b.userId = "1001" + SET c.userId = "1002" + SET d.userId = "1003" + SET e.userId = "1004" + MERGE (a) -[r:INTERACTED_WITH {{date: {yesterday}, weight: 1}}]->(b) + MERGE (a) -[r2:INTERACTED_WITH {{date: {today}, weight: 2}}]->(b) + MERGE (a) -[r3:INTERACTED_WITH {{date: {yesterday}, weight: 3}}]->(d) + MERGE (c) -[r4:INTERACTED_WITH {{date: {yesterday}, weight: 2}}]->(b) + MERGE (c) -[r5:INTERACTED_WITH {{date: {today}, weight: 1}}]->(b) + MERGE (c) -[r6:INTERACTED_WITH {{date: {yesterday}, weight: 2}}]->(d) + MERGE (d) -[r7:INTERACTED_WITH {{date: {yesterday}, weight: 1}}]->(b) + MERGE (c) -[r8:INTERACTED_WITH {{date: {today}, weight: 2}}]->(a) + MERGE (d) -[r9:INTERACTED_WITH {{date: {today}, weight: 1}}]->(c) + MERGE (b) -[r10:INTERACTED_WITH {{date: {today}, weight: 2}}]->(d) + MERGE (d) -[r11:INTERACTED_WITH {{date: {today}, weight: 1}}]->(c) + MERGE (e) -[r12:INTERACTED_WITH {{date: {today}, weight: 3}}]->(b) + + SET r.guildId = '{guildId}' + SET r2.guildId = '{guildId}' + SET r3.guildId = '{guildId}' + SET r4.guildId = '{guildId}' + SET r5.guildId = '{guildId}' + SET r6.guildId = '{guildId}' + SET r7.guildId = '{guildId}' + SET r8.guildId = '{guildId}' + SET r9.guildId = '{guildId}' + SET r10.guildId = '{guildId}' + SET r11.guildId = '{guildId}' + SET r12.guildId = '{guildId}' + """ + ) + + neo4j_analytics.compute_local_clustering_coefficient( + guildId=guildId, from_start=False + ) + fragmentation_score = neo4j_analytics.compute_fragmentation_score( + guildId=guildId, + past_window_date=past_window_date, + scale_fragmentation_score=200, + ) + + for score in fragmentation_score: + if score["date"] == yesterday: + assert score["fragmentation_score"] - 166.6666 < 0.1 + elif score["date"] == today: + assert score["fragmentation_score"] - 120 < 0.1 + else: + print(score["date"]) + # is shouldn't ever get here since + # we do compute for just two periods + assert False is True diff --git a/tests/integration/test_fragmentation_score_exclude_past.py b/tests/integration/test_fragmentation_score_exclude_past.py new file mode 100644 index 0000000..312328e --- /dev/null +++ b/tests/integration/test_fragmentation_score_exclude_past.py @@ -0,0 +1,97 @@ +from datetime import datetime, timedelta + +from discord_analyzer.analyzer.neo4j_analytics import Neo4JAnalytics + +from .utils.neo4j_conn import neo4j_setup + + +def test_avg_clustering_exclude_past(): + """ + test scaling of the avgClusteringCoefficient (a.k.a fragmentation score) + """ + neo4j_ops = neo4j_setup() + + neo4j_analytics = Neo4JAnalytics(neo4j_ops) + # deleting all data + neo4j_ops.gds.run_cypher("MATCH (n) DETACH DELETE (n)") + + # timestamps + today = 1689280200.0 + yesterday = 1689193800.0 + past_three_days = ( + datetime.fromtimestamp(yesterday) - timedelta(days=2) + ).timestamp() + past_window_date = ( + datetime.fromtimestamp(yesterday) - timedelta(days=1) + ).timestamp() + + guildId = "1234" + + # creating some nodes with data + neo4j_ops.gds.run_cypher( + f""" + CREATE (a:DiscordAccount) -[:IS_MEMBER]->(g:Guild {{guildId: '{guildId}'}}) + CREATE (b:DiscordAccount) -[:IS_MEMBER]->(g) + CREATE (c:DiscordAccount) -[:IS_MEMBER]->(g) + CREATE (d:DiscordAccount) -[:IS_MEMBER]->(g) + CREATE (e:DiscordAccount) -[:IS_MEMBER]->(g) + SET a.userId = "1000" + SET b.userId = "1001" + SET c.userId = "1002" + SET d.userId = "1003" + SET e.userId = "1004" + MERGE (a) -[r:INTERACTED_WITH {{date: {yesterday}, weight: 1}}]->(b) + MERGE (a) -[r2:INTERACTED_WITH {{date: {today}, weight: 2}}]->(b) + MERGE (a) -[r3:INTERACTED_WITH {{date: {yesterday}, weight: 3}}]->(d) + MERGE (c) -[r4:INTERACTED_WITH {{date: {yesterday}, weight: 2}}]->(b) + MERGE (c) -[r5:INTERACTED_WITH {{date: {today}, weight: 1}}]->(b) + MERGE (c) -[r6:INTERACTED_WITH {{date: {yesterday}, weight: 2}}]->(d) + MERGE (d) -[r7:INTERACTED_WITH {{date: {yesterday}, weight: 1}}]->(b) + MERGE (c) -[r8:INTERACTED_WITH {{date: {today}, weight: 2}}]->(a) + MERGE (d) -[r9:INTERACTED_WITH {{date: {today}, weight: 1}}]->(c) + MERGE (b) -[r10:INTERACTED_WITH {{date: {today}, weight: 2}}]->(d) + MERGE (d) -[r11:INTERACTED_WITH {{date: {today}, weight: 1}}]->(c) + MERGE (e) -[r12:INTERACTED_WITH {{date: {today}, weight: 3}}]->(b) + MERGE (a) -[r13:INTERACTED_WITH {{date: {past_three_days}, weight: 3}}]->(d) + MERGE (d) -[r14:INTERACTED_WITH {{date: {past_three_days}, weight: 3}}]->(b) + MERGE (b) -[r15:INTERACTED_WITH {{date: {past_three_days}, weight: 3}}]->(e) + MERGE (e) -[r16:INTERACTED_WITH {{date: {past_three_days}, weight: 3}}]->(c) + + SET r.guildId = '{guildId}' + SET r2.guildId = '{guildId}' + SET r3.guildId = '{guildId}' + SET r4.guildId = '{guildId}' + SET r5.guildId = '{guildId}' + SET r6.guildId = '{guildId}' + SET r7.guildId = '{guildId}' + SET r8.guildId = '{guildId}' + SET r9.guildId = '{guildId}' + SET r10.guildId = '{guildId}' + SET r11.guildId = '{guildId}' + SET r12.guildId = '{guildId}' + SET r13.guildId = '{guildId}' + SET r14.guildId = '{guildId}' + SET r15.guildId = '{guildId}' + SET r16.guildId = '{guildId}' + """ + ) + + neo4j_analytics.compute_local_clustering_coefficient( + guildId=guildId, from_start=False + ) + fragmentation_score = neo4j_analytics.compute_fragmentation_score( + guildId=guildId, + past_window_date=past_window_date, + scale_fragmentation_score=200, + ) + + for score in fragmentation_score: + if score["date"] == yesterday: + assert score["fragmentation_score"] - 166.6666 < 0.1 + elif score["date"] == today: + assert score["fragmentation_score"] - 120 < 0.1 + else: + print(score["date"]) + # is shouldn't ever get here since + # we do compute for just two periods + assert False is True diff --git a/tests/integration/test_fragmentation_score_from_start.py b/tests/integration/test_fragmentation_score_from_start.py new file mode 100644 index 0000000..700f385 --- /dev/null +++ b/tests/integration/test_fragmentation_score_from_start.py @@ -0,0 +1,85 @@ +from datetime import datetime, timedelta + +from discord_analyzer.analyzer.neo4j_analytics import Neo4JAnalytics + +from .utils.neo4j_conn import neo4j_setup + + +def test_avg_clustering_coeff_from_start(): + """ + test scaling of the avgClusteringCoefficient (a.k.a fragmentation score) + """ + neo4j_ops = neo4j_setup() + + neo4j_analytics = Neo4JAnalytics(neo4j_ops) + # deleting all data + neo4j_ops.gds.run_cypher("MATCH (n) DETACH DELETE (n)") + + # timestamps + today = 1689280200.0 + yesterday = 1689193800.0 + past_window_date = ( + datetime.fromtimestamp(yesterday) - timedelta(days=1) + ).timestamp() + guildId = "1234" + + # creating some nodes with data + neo4j_ops.gds.run_cypher( + f""" + CREATE (a:DiscordAccount) -[:IS_MEMBER]->(g:Guild {{guildId: '{guildId}'}}) + CREATE (b:DiscordAccount) -[:IS_MEMBER]->(g) + CREATE (c:DiscordAccount) -[:IS_MEMBER]->(g) + CREATE (d:DiscordAccount) -[:IS_MEMBER]->(g) + CREATE (e:DiscordAccount) -[:IS_MEMBER]->(g) + SET a.userId = "1000" + SET b.userId = "1001" + SET c.userId = "1002" + SET d.userId = "1003" + SET e.userId = "1004" + MERGE (a) -[r:INTERACTED_WITH {{date: {yesterday}, weight: 1}}]->(b) + MERGE (a) -[r2:INTERACTED_WITH {{date: {today}, weight: 2}}]->(b) + MERGE (a) -[r3:INTERACTED_WITH {{date: {yesterday}, weight: 3}}]->(d) + MERGE (c) -[r4:INTERACTED_WITH {{date: {yesterday}, weight: 2}}]->(b) + MERGE (c) -[r5:INTERACTED_WITH {{date: {today}, weight: 1}}]->(b) + MERGE (c) -[r6:INTERACTED_WITH {{date: {yesterday}, weight: 2}}]->(d) + MERGE (d) -[r7:INTERACTED_WITH {{date: {yesterday}, weight: 1}}]->(b) + MERGE (c) -[r8:INTERACTED_WITH {{date: {today}, weight: 2}}]->(a) + MERGE (d) -[r9:INTERACTED_WITH {{date: {today}, weight: 1}}]->(c) + MERGE (b) -[r10:INTERACTED_WITH {{date: {today}, weight: 2}}]->(d) + MERGE (d) -[r11:INTERACTED_WITH {{date: {today}, weight: 1}}]->(c) + MERGE (e) -[r12:INTERACTED_WITH {{date: {today}, weight: 3}}]->(b) + + SET r.guildId = '{guildId}' + SET r2.guildId = '{guildId}' + SET r3.guildId = '{guildId}' + SET r4.guildId = '{guildId}' + SET r5.guildId = '{guildId}' + SET r6.guildId = '{guildId}' + SET r7.guildId = '{guildId}' + SET r8.guildId = '{guildId}' + SET r9.guildId = '{guildId}' + SET r10.guildId = '{guildId}' + SET r11.guildId = '{guildId}' + SET r12.guildId = '{guildId}' + """ + ) + + neo4j_analytics.compute_local_clustering_coefficient( + guildId=guildId, from_start=True + ) + fragmentation_score = neo4j_analytics.compute_fragmentation_score( + guildId=guildId, + past_window_date=past_window_date, + scale_fragmentation_score=200, + ) + + for score in fragmentation_score: + if score["date"] == yesterday: + assert score["fragmentation_score"] - 166.666 < 0.1 + elif score["date"] == today: + assert score["fragmentation_score"] - 120 < 0.1 + else: + print(score["date"]) + # is shuoldn't ever get here since + # we do compute for just two periods + assert False is True diff --git a/tests/integration/test_fragmentation_score_rescaling.py b/tests/integration/test_fragmentation_score_rescaling.py new file mode 100644 index 0000000..40c8975 --- /dev/null +++ b/tests/integration/test_fragmentation_score_rescaling.py @@ -0,0 +1,78 @@ +from datetime import datetime, timedelta + +from discord_analyzer.analyzer.neo4j_analytics import Neo4JAnalytics + +from .utils.neo4j_conn import neo4j_setup + + +def test_avg_clustering_coeff_scaling(): + """ + test scaling of the avgClusteringCoefficient (a.k.a fragmentation score) + """ + neo4j_ops = neo4j_setup() + + neo4j_analytics = Neo4JAnalytics(neo4j_ops) + # deleting all data + neo4j_ops.gds.run_cypher("MATCH (n) DETACH DELETE (n)") + + # timestamps + today = 1689280200.0 + yesterday = 1689193800.0 + past_window_date = ( + datetime.fromtimestamp(yesterday) - timedelta(days=1) + ).timestamp() + + guildId = "1234" + + # creating some nodes with data + neo4j_ops.gds.run_cypher( + f""" + CREATE (a:DiscordAccount) -[:IS_MEMBER]->(g:Guild {{guildId: '{guildId}'}}) + CREATE (b:DiscordAccount) -[:IS_MEMBER]->(g) + CREATE (c:DiscordAccount) -[:IS_MEMBER]->(g) + CREATE (d:DiscordAccount) -[:IS_MEMBER]->(g) + CREATE (e:DiscordAccount) -[:IS_MEMBER]->(g) + SET a.userId = "1000" + SET b.userId = "1001" + SET c.userId = "1002" + SET d.userId = "1003" + SET e.userId = "1004" + MERGE (a) -[r:INTERACTED_WITH {{date: {yesterday}, weight: 1}}]->(b) + MERGE (a) -[r2:INTERACTED_WITH {{date: {today}, weight: 2}}]->(b) + MERGE (a) -[r3:INTERACTED_WITH {{date: {yesterday}, weight: 3}}]->(d) + MERGE (c) -[r4:INTERACTED_WITH {{date: {yesterday}, weight: 2}}]->(b) + MERGE (c) -[r5:INTERACTED_WITH {{date: {today}, weight: 1}}]->(b) + MERGE (c) -[r6:INTERACTED_WITH {{date: {yesterday}, weight: 2}}]->(d) + MERGE (d) -[r7:INTERACTED_WITH {{date: {yesterday}, weight: 1}}]->(b) + MERGE (c) -[r8:INTERACTED_WITH {{date: {today}, weight: 2}}]->(a) + MERGE (d) -[r9:INTERACTED_WITH {{date: {today}, weight: 1}}]->(c) + MERGE (b) -[r10:INTERACTED_WITH {{date: {today}, weight: 2}}]->(d) + MERGE (d) -[r11:INTERACTED_WITH {{date: {today}, weight: 1}}]->(c) + MERGE (e) -[r12:INTERACTED_WITH {{date: {today}, weight: 3}}]->(b) + + SET r.guildId = '{guildId}' + SET r2.guildId = '{guildId}' + SET r3.guildId = '{guildId}' + SET r4.guildId = '{guildId}' + SET r5.guildId = '{guildId}' + SET r6.guildId = '{guildId}' + SET r7.guildId = '{guildId}' + SET r8.guildId = '{guildId}' + SET r9.guildId = '{guildId}' + SET r10.guildId = '{guildId}' + SET r11.guildId = '{guildId}' + SET r12.guildId = '{guildId}' + """ + ) + + neo4j_analytics.compute_local_clustering_coefficient( + guildId=guildId, from_start=True + ) + fragmentation_score = neo4j_analytics.compute_fragmentation_score( + guildId=guildId, + past_window_date=past_window_date, + scale_fragmentation_score=100, + ) + + for score in fragmentation_score: + assert 0 <= score["fragmentation_score"] <= 100 diff --git a/tests/integration/test_generated_graph_period_1_year_run_once.py b/tests/integration/test_generated_graph_period_1_year_run_once.py new file mode 100644 index 0000000..5c17584 --- /dev/null +++ b/tests/integration/test_generated_graph_period_1_year_run_once.py @@ -0,0 +1,107 @@ +from datetime import datetime, timedelta, timezone + +import numpy as np + +from .utils.analyzer_setup import launch_db_access, setup_analyzer +from .utils.mock_heatmaps import create_empty_heatmaps_data +from .utils.mock_memberactivities import create_empty_memberactivities_data +from .utils.neo4j_conn import neo4j_setup +from .utils.remove_and_setup_guild import setup_db_guild + + +def test_networkgraph_one_year_period_run_once_available_analytics(): + """ + test the network graph for the whole analyzer pipeline + of a guild with a 1 year period + and use recompute method with some analytics data available + """ + # first create the collections + guildId = "1234" + db_access = launch_db_access(guildId) + neo4j_ops = neo4j_setup() + + neo4j_ops.gds.run_cypher( + """ + MATCH (n) DETACH DELETE (n) + """ + ) + + acc_id = [ + "973993299281076285", + "973993299281076286", + ] + + setup_db_guild(db_access, guildId, discordId_list=acc_id, days_ago_period=360) + + db_access.db_mongo_client[guildId].create_collection("heatmaps") + db_access.db_mongo_client[guildId].create_collection("memberactivities") + + # filling memberactivities with some data + # filling heatmaps with some data + # filling up to 2 days ago with 353 documents + memberactivity_data = create_empty_memberactivities_data( + datetime.now() - timedelta(days=354), count=350 + ) + db_access.db_mongo_client[guildId]["memberactivities"].insert_many( + memberactivity_data + ) + + # filling heatmaps with some data + # filling up to 2 days ago with 359 documents + # just yesterday is left to be analyzed + heatmaps_data = create_empty_heatmaps_data( + datetime.now() - timedelta(days=360), count=356 + ) + db_access.db_mongo_client[guildId]["heatmaps"].insert_many(heatmaps_data) + + # generating rawinfo samples + rawinfo_samples = [] + + # generating random rawinfo data + # 24 hours + # 360 days + for i in range(24 * 360): + sample = { + "type": 19, + "author": np.random.choice(acc_id), + "content": f"test{i}", + "user_mentions": [], + "role_mentions": [], + "reactions": [], + "replied_user": np.random.choice(acc_id), + "createdDate": (datetime.now() - timedelta(hours=i)), + "messageId": f"11188143219343360{i}", + "channelId": "1020707129214111827", + "channelName": "general", + "threadId": None, + "threadName": None, + } + rawinfo_samples.append(sample) + + db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) + + analyzer = setup_analyzer() + analyzer.run_once(guildId=guildId) + + results = neo4j_ops.gds.run_cypher( + f""" + MATCH (g:Guild {{guildId: '{guildId}'}})-[r:HAVE_METRICS]-> (g) + RETURN DISTINCT r.date as dates + ORDER BY dates DESC + """ + ) + dates = results.values.squeeze() + + print("dates[:2]: ", dates[:2]) + print("dates[-2:]: ", dates[-2:]) + + # our analysis started from 4 days ago + start_analytics_date = datetime.now().replace( + hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc + ) - timedelta(days=4) + end_analytics_date = datetime.now().replace( + hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc + ) - timedelta(days=1) + + assert dates[-1] == start_analytics_date.timestamp() * 1000 + assert dates[0] == end_analytics_date.timestamp() * 1000 diff --git a/tests/integration/test_generated_graph_period_1year.py b/tests/integration/test_generated_graph_period_1year.py new file mode 100644 index 0000000..50e63b7 --- /dev/null +++ b/tests/integration/test_generated_graph_period_1year.py @@ -0,0 +1,105 @@ +from datetime import datetime, timedelta, timezone + +import numpy as np + +from .utils.analyzer_setup import launch_db_access, setup_analyzer +from .utils.mock_heatmaps import create_empty_heatmaps_data +from .utils.mock_memberactivities import create_empty_memberactivities_data +from .utils.neo4j_conn import neo4j_setup +from .utils.remove_and_setup_guild import setup_db_guild + + +def test_networkgraph_one_year_period_recompute_available_analytics(): + """ + test the network graph for the whole analyzer pipeline + of a guild with a 1 year period + and use recompute method with some analytics data available + """ + # first create the collections + guildId = "1234" + db_access = launch_db_access(guildId) + neo4j_ops = neo4j_setup() + + neo4j_ops.gds.run_cypher( + """ + MATCH (n) DETACH DELETE (n) + """ + ) + + acc_id = [ + "973993299281076285", + "973993299281076286", + ] + + setup_db_guild(db_access, guildId, discordId_list=acc_id, days_ago_period=360) + + db_access.db_mongo_client[guildId].create_collection("heatmaps") + db_access.db_mongo_client[guildId].create_collection("memberactivities") + + # filling memberactivities with some data + # filling heatmaps with some data + # filling up to 2 days ago with 353 documents + memberactivity_data = create_empty_memberactivities_data( + datetime.now() - timedelta(days=354), count=353 + ) + db_access.db_mongo_client[guildId]["memberactivities"].insert_many( + memberactivity_data + ) + + # filling heatmaps with some data + # filling up to 2 days ago with 359 documents + # just yesterday is left to be analyzed + heatmaps_data = create_empty_heatmaps_data( + datetime.now() - timedelta(days=360), count=359 + ) + db_access.db_mongo_client[guildId]["heatmaps"].insert_many(heatmaps_data) + + # generating rawinfo samples + rawinfo_samples = [] + + # generating random rawinfo data + # 24 hours + # 360 days + for i in range(24 * 360): + sample = { + "type": 19, + "author": np.random.choice(acc_id), + "content": f"test{i}", + "user_mentions": [], + "role_mentions": [], + "reactions": [], + "replied_user": np.random.choice(acc_id), + "createdDate": (datetime.now() - timedelta(hours=i)), + "messageId": f"11188143219343360{i}", + "channelId": "1020707129214111827", + "channelName": "general", + "threadId": None, + "threadName": None, + } + rawinfo_samples.append(sample) + + db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) + + analyzer = setup_analyzer() + analyzer.recompute_analytics(guildId=guildId) + + results = neo4j_ops.gds.run_cypher( + f""" + MATCH (g:Guild {{guildId: '{guildId}'}})-[r:HAVE_METRICS]-> (g) + RETURN DISTINCT r.date as dates + ORDER BY dates DESC + """ + ) + dates = results.values.squeeze() + + print(dates) + + start_analytics_date = datetime.now().replace( + hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc + ) - timedelta(days=354) + end_analytics_date = datetime.now().replace( + hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc + ) - timedelta(days=1) + + assert dates[-1] == start_analytics_date.timestamp() * 1000 + assert dates[0] == end_analytics_date.timestamp() * 1000 diff --git a/tests/integration/test_generated_graph_period_35_days.py b/tests/integration/test_generated_graph_period_35_days.py new file mode 100644 index 0000000..602ee38 --- /dev/null +++ b/tests/integration/test_generated_graph_period_35_days.py @@ -0,0 +1,105 @@ +from datetime import datetime, timedelta, timezone + +import numpy as np + +from .utils.analyzer_setup import launch_db_access, setup_analyzer +from .utils.mock_heatmaps import create_empty_heatmaps_data +from .utils.mock_memberactivities import create_empty_memberactivities_data +from .utils.neo4j_conn import neo4j_setup +from .utils.remove_and_setup_guild import setup_db_guild + + +def test_networkgraph_35_days_period_recompute_available_analytics(): + """ + test the network graph for the whole analyzer pipeline + of a guild with a 35 days period + and use recompute method with some analytics data available + """ + # first create the collections + guildId = "1234" + db_access = launch_db_access(guildId) + neo4j_ops = neo4j_setup() + + neo4j_ops.gds.run_cypher( + """ + MATCH (n) DETACH DELETE (n) + """ + ) + + acc_id = [ + "973993299281076285", + "973993299281076286", + ] + + setup_db_guild(db_access, guildId, discordId_list=acc_id, days_ago_period=35) + + db_access.db_mongo_client[guildId].create_collection("heatmaps") + db_access.db_mongo_client[guildId].create_collection("memberactivities") + + # filling memberactivities with some data + # filling heatmaps with some data + # filling up to 2 days ago with 28 documents + memberactivity_data = create_empty_memberactivities_data( + datetime.now() - timedelta(days=28), count=27 + ) + db_access.db_mongo_client[guildId]["memberactivities"].insert_many( + memberactivity_data + ) + + # filling heatmaps with some data + # filling up to 2 days ago with 34 documents + # just yesterday is left to be analyzed + heatmaps_data = create_empty_heatmaps_data( + datetime.now() - timedelta(days=35), count=34 + ) + db_access.db_mongo_client[guildId]["heatmaps"].insert_many(heatmaps_data) + + # generating rawinfo samples + rawinfo_samples = [] + + # generating random rawinfo data + # 24 hours + # 35 days + for i in range(24 * 35): + sample = { + "type": 19, + "author": np.random.choice(acc_id), + "content": f"test{i}", + "user_mentions": [], + "role_mentions": [], + "reactions": [], + "replied_user": np.random.choice(acc_id), + "createdDate": (datetime.now() - timedelta(hours=i)), + "messageId": f"11188143219343360{i}", + "channelId": "1020707129214111827", + "channelName": "general", + "threadId": None, + "threadName": None, + } + rawinfo_samples.append(sample) + + db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) + + analyzer = setup_analyzer() + analyzer.recompute_analytics(guildId=guildId) + + results = neo4j_ops.gds.run_cypher( + f""" + MATCH (g:Guild {{guildId: '{guildId}'}})-[r:HAVE_METRICS]-> (g) + RETURN DISTINCT r.date as dates + ORDER BY dates DESC + """ + ) + dates = results.values.squeeze() + + print(dates) + + start_analytics_date = datetime.now().replace( + hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc + ) - timedelta(days=29) + end_analytics_date = datetime.now().replace( + hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc + ) - timedelta(days=1) + + assert dates[-1] == start_analytics_date.timestamp() * 1000 + assert dates[0] == end_analytics_date.timestamp() * 1000 diff --git a/tests/integration/test_generated_graph_period_35_days_run_once.py b/tests/integration/test_generated_graph_period_35_days_run_once.py new file mode 100644 index 0000000..d996bd6 --- /dev/null +++ b/tests/integration/test_generated_graph_period_35_days_run_once.py @@ -0,0 +1,106 @@ +from datetime import datetime, timedelta, timezone + +import numpy as np + +from .utils.analyzer_setup import launch_db_access, setup_analyzer +from .utils.mock_heatmaps import create_empty_heatmaps_data +from .utils.mock_memberactivities import create_empty_memberactivities_data +from .utils.neo4j_conn import neo4j_setup +from .utils.remove_and_setup_guild import setup_db_guild + + +def test_networkgraph_35_days_period_run_once_available_analytics(): + """ + test the network graph for the whole analyzer pipeline + of a guild with a 35 days period + and use run_once method with some analytics data available + """ + # first create the collections + guildId = "1234" + db_access = launch_db_access(guildId) + neo4j_ops = neo4j_setup() + + neo4j_ops.gds.run_cypher( + """ + MATCH (n) DETACH DELETE (n) + """ + ) + + acc_id = [ + "973993299281076285", + "973993299281076286", + ] + + setup_db_guild(db_access, guildId, discordId_list=acc_id, days_ago_period=35) + + db_access.db_mongo_client[guildId].create_collection("heatmaps") + db_access.db_mongo_client[guildId].create_collection("memberactivities") + + # filling memberactivities with some data + # filling heatmaps with some data + # filling up to 4 days ago with 24 documents + memberactivity_data = create_empty_memberactivities_data( + datetime.now() - timedelta(days=28), count=24 + ) + db_access.db_mongo_client[guildId]["memberactivities"].insert_many( + memberactivity_data + ) + + # filling heatmaps with some data + # filling up to 2 days ago with 31 documents + # 4 days ago are left to be analyzed + heatmaps_data = create_empty_heatmaps_data( + datetime.now() - timedelta(days=35), count=31 + ) + db_access.db_mongo_client[guildId]["heatmaps"].insert_many(heatmaps_data) + + # generating rawinfo samples + rawinfo_samples = [] + + # generating random rawinfo data + # 24 hours + # 35 days + for i in range(24 * 35): + sample = { + "type": 19, + "author": np.random.choice(acc_id), + "content": f"test{i}", + "user_mentions": [], + "role_mentions": [], + "reactions": [], + "replied_user": np.random.choice(acc_id), + "createdDate": (datetime.now() - timedelta(hours=i)), + "messageId": f"11188143219343360{i}", + "channelId": "1020707129214111827", + "channelName": "general", + "threadId": None, + "threadName": None, + } + rawinfo_samples.append(sample) + + db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) + + analyzer = setup_analyzer() + analyzer.run_once(guildId=guildId) + + results = neo4j_ops.gds.run_cypher( + f""" + MATCH (g:Guild {{guildId: '{guildId}'}})-[r:HAVE_METRICS]-> (g) + RETURN DISTINCT r.date as dates + ORDER BY dates DESC + """ + ) + dates = results.values.squeeze() + + print(dates) + + # we do run the analytics for 4 days ago + start_analytics_date = datetime.now().replace( + hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc + ) - timedelta(days=4) + end_analytics_date = datetime.now().replace( + hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc + ) - timedelta(days=1) + + assert dates[-1] == start_analytics_date.timestamp() * 1000 + assert dates[0] == end_analytics_date.timestamp() * 1000 diff --git a/tests/integration/test_generated_graph_period_3_months.py b/tests/integration/test_generated_graph_period_3_months.py new file mode 100644 index 0000000..8d08b4a --- /dev/null +++ b/tests/integration/test_generated_graph_period_3_months.py @@ -0,0 +1,105 @@ +from datetime import datetime, timedelta, timezone + +import numpy as np + +from .utils.analyzer_setup import launch_db_access, setup_analyzer +from .utils.mock_heatmaps import create_empty_heatmaps_data +from .utils.mock_memberactivities import create_empty_memberactivities_data +from .utils.neo4j_conn import neo4j_setup +from .utils.remove_and_setup_guild import setup_db_guild + + +def test_networkgraph_three_months_period_recompute_available_analytics(): + """ + test the network graph for the whole analyzer pipeline + of a guild with a 3 months period + and use recompute method with some analytics data available + """ + # first create the collections + guildId = "1234" + db_access = launch_db_access(guildId) + neo4j_ops = neo4j_setup() + + neo4j_ops.gds.run_cypher( + """ + MATCH (n) DETACH DELETE (n) + """ + ) + + acc_id = [ + "973993299281076285", + "973993299281076286", + ] + + setup_db_guild(db_access, guildId, discordId_list=acc_id, days_ago_period=90) + + db_access.db_mongo_client[guildId].create_collection("heatmaps") + db_access.db_mongo_client[guildId].create_collection("memberactivities") + + # filling memberactivities with some data + # filling heatmaps with some data + # filling up to 2 days ago with 83 documents + memberactivity_data = create_empty_memberactivities_data( + datetime.now() - timedelta(days=84), count=83 + ) + db_access.db_mongo_client[guildId]["memberactivities"].insert_many( + memberactivity_data + ) + + # filling heatmaps with some data + # filling up to 2 days ago with 89 documents + # just yesterday is left to be analyzed + heatmaps_data = create_empty_heatmaps_data( + datetime.now() - timedelta(days=90), count=89 + ) + db_access.db_mongo_client[guildId]["heatmaps"].insert_many(heatmaps_data) + + # generating rawinfo samples + rawinfo_samples = [] + + # generating random rawinfo data + # 24 hours + # 90 days + for i in range(24 * 90): + sample = { + "type": 19, + "author": np.random.choice(acc_id), + "content": f"test{i}", + "user_mentions": [], + "role_mentions": [], + "reactions": [], + "replied_user": np.random.choice(acc_id), + "createdDate": (datetime.now() - timedelta(hours=i)), + "messageId": f"11188143219343360{i}", + "channelId": "1020707129214111827", + "channelName": "general", + "threadId": None, + "threadName": None, + } + rawinfo_samples.append(sample) + + db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) + + analyzer = setup_analyzer() + analyzer.recompute_analytics(guildId=guildId) + + results = neo4j_ops.gds.run_cypher( + f""" + MATCH (g:Guild {{guildId: '{guildId}'}})-[r:HAVE_METRICS]-> (g) + RETURN DISTINCT r.date as dates + ORDER BY dates DESC + """ + ) + dates = results.values.squeeze() + + print(dates) + + start_analytics_date = datetime.now().replace( + hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc + ) - timedelta(days=84) + end_analytics_date = datetime.now().replace( + hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc + ) - timedelta(days=1) + + assert dates[-1] == start_analytics_date.timestamp() * 1000 + assert dates[0] == end_analytics_date.timestamp() * 1000 diff --git a/tests/integration/test_generated_graph_period_3_months_run_once.py b/tests/integration/test_generated_graph_period_3_months_run_once.py new file mode 100644 index 0000000..ad86f9a --- /dev/null +++ b/tests/integration/test_generated_graph_period_3_months_run_once.py @@ -0,0 +1,106 @@ +from datetime import datetime, timedelta, timezone + +import numpy as np + +from .utils.analyzer_setup import launch_db_access, setup_analyzer +from .utils.mock_heatmaps import create_empty_heatmaps_data +from .utils.mock_memberactivities import create_empty_memberactivities_data +from .utils.neo4j_conn import neo4j_setup +from .utils.remove_and_setup_guild import setup_db_guild + + +def test_networkgraph_three_months_period_run_once_available_analytics(): + """ + test the network graph for the whole analyzer pipeline + of a guild with a 3 months period + and use run_once method with some analytics data available + """ + # first create the collections + guildId = "1234" + db_access = launch_db_access(guildId) + neo4j_ops = neo4j_setup() + + neo4j_ops.gds.run_cypher( + """ + MATCH (n) DETACH DELETE (n) + """ + ) + + acc_id = [ + "973993299281076285", + "973993299281076286", + ] + + setup_db_guild(db_access, guildId, discordId_list=acc_id, days_ago_period=90) + + db_access.db_mongo_client[guildId].create_collection("heatmaps") + db_access.db_mongo_client[guildId].create_collection("memberactivities") + + # filling memberactivities with some data + # filling heatmaps with some data + # 4 days ago with 83 documents + memberactivity_data = create_empty_memberactivities_data( + datetime.now() - timedelta(days=84), count=80 + ) + db_access.db_mongo_client[guildId]["memberactivities"].insert_many( + memberactivity_data + ) + + # filling heatmaps with some data + # filling up to 2 days ago with 89 documents + # 4 days ago is left to be analyzed + heatmaps_data = create_empty_heatmaps_data( + datetime.now() - timedelta(days=90), count=86 + ) + db_access.db_mongo_client[guildId]["heatmaps"].insert_many(heatmaps_data) + + # generating rawinfo samples + rawinfo_samples = [] + + # generating random rawinfo data + # 24 hours + # 90 days + for i in range(24 * 90): + sample = { + "type": 19, + "author": np.random.choice(acc_id), + "content": f"test{i}", + "user_mentions": [], + "role_mentions": [], + "reactions": [], + "replied_user": np.random.choice(acc_id), + "createdDate": (datetime.now() - timedelta(hours=i)), + "messageId": f"11188143219343360{i}", + "channelId": "1020707129214111827", + "channelName": "general", + "threadId": None, + "threadName": None, + } + rawinfo_samples.append(sample) + + db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) + + analyzer = setup_analyzer() + analyzer.run_once(guildId=guildId) + + results = neo4j_ops.gds.run_cypher( + f""" + MATCH (g:Guild {{guildId: '{guildId}'}})-[r:HAVE_METRICS]-> (g) + RETURN DISTINCT r.date as dates + ORDER BY dates DESC + """ + ) + dates = results.values.squeeze() + + print("dates[:2]: ", dates[:2]) + print("dates[-2:]: ", dates[-2:]) + + start_analytics_date = datetime.now().replace( + hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc + ) - timedelta(days=4) + end_analytics_date = datetime.now().replace( + hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc + ) - timedelta(days=1) + + assert dates[-1] == start_analytics_date.timestamp() * 1000 + assert dates[0] == end_analytics_date.timestamp() * 1000 diff --git a/tests/integration/test_generated_graph_period_6_months.py b/tests/integration/test_generated_graph_period_6_months.py new file mode 100644 index 0000000..a320ae0 --- /dev/null +++ b/tests/integration/test_generated_graph_period_6_months.py @@ -0,0 +1,105 @@ +from datetime import datetime, timedelta, timezone + +import numpy as np + +from .utils.analyzer_setup import launch_db_access, setup_analyzer +from .utils.mock_heatmaps import create_empty_heatmaps_data +from .utils.mock_memberactivities import create_empty_memberactivities_data +from .utils.neo4j_conn import neo4j_setup +from .utils.remove_and_setup_guild import setup_db_guild + + +def test_networkgraph_six_months_period_recompute_available_analytics(): + """ + test the network graph for the whole analyzer pipeline + of a guild with a 6 months period + and use recompute method with some analytics data available + """ + # first create the collections + guildId = "1234" + db_access = launch_db_access(guildId) + neo4j_ops = neo4j_setup() + + neo4j_ops.gds.run_cypher( + """ + MATCH (n) DETACH DELETE (n) + """ + ) + + acc_id = [ + "973993299281076285", + "973993299281076286", + ] + + setup_db_guild(db_access, guildId, discordId_list=acc_id, days_ago_period=180) + + db_access.db_mongo_client[guildId].create_collection("heatmaps") + db_access.db_mongo_client[guildId].create_collection("memberactivities") + + # filling memberactivities with some data + # filling heatmaps with some data + # filling up to 2 days ago with 173 documents + memberactivity_data = create_empty_memberactivities_data( + datetime.now() - timedelta(days=174), count=173 + ) + db_access.db_mongo_client[guildId]["memberactivities"].insert_many( + memberactivity_data + ) + + # filling heatmaps with some data + # filling up to 2 days ago with 179 documents + # just yesterday is left to be analyzed + heatmaps_data = create_empty_heatmaps_data( + datetime.now() - timedelta(days=180), count=179 + ) + db_access.db_mongo_client[guildId]["heatmaps"].insert_many(heatmaps_data) + + # generating rawinfo samples + rawinfo_samples = [] + + # generating random rawinfo data + # 24 hours + # 180 days + for i in range(24 * 180): + sample = { + "type": 19, + "author": np.random.choice(acc_id), + "content": f"test{i}", + "user_mentions": [], + "role_mentions": [], + "reactions": [], + "replied_user": np.random.choice(acc_id), + "createdDate": (datetime.now() - timedelta(hours=i)), + "messageId": f"11188143219343360{i}", + "channelId": "1020707129214111827", + "channelName": "general", + "threadId": None, + "threadName": None, + } + rawinfo_samples.append(sample) + + db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) + + analyzer = setup_analyzer() + analyzer.recompute_analytics(guildId=guildId) + + results = neo4j_ops.gds.run_cypher( + f""" + MATCH (g:Guild {{guildId: '{guildId}'}})-[r:HAVE_METRICS]-> (g) + RETURN DISTINCT r.date as dates + ORDER BY dates DESC + """ + ) + dates = results.values.squeeze() + + print(dates) + + start_analytics_date = datetime.now().replace( + hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc + ) - timedelta(days=174) + end_analytics_date = datetime.now().replace( + hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc + ) - timedelta(days=1) + + assert dates[-1] == start_analytics_date.timestamp() * 1000 + assert dates[0] == end_analytics_date.timestamp() * 1000 diff --git a/tests/integration/test_generated_graph_period_6_months_run_once.py b/tests/integration/test_generated_graph_period_6_months_run_once.py new file mode 100644 index 0000000..1352146 --- /dev/null +++ b/tests/integration/test_generated_graph_period_6_months_run_once.py @@ -0,0 +1,107 @@ +from datetime import datetime, timedelta, timezone + +import numpy as np + +from .utils.analyzer_setup import launch_db_access, setup_analyzer +from .utils.mock_heatmaps import create_empty_heatmaps_data +from .utils.mock_memberactivities import create_empty_memberactivities_data +from .utils.neo4j_conn import neo4j_setup +from .utils.remove_and_setup_guild import setup_db_guild + + +def test_networkgraph_six_months_period_run_once_available_analytics(): + """ + test the network graph for the whole analyzer pipeline + of a guild with a 6 months period + and use recompute method with some analytics data available + """ + # first create the collections + guildId = "1234" + db_access = launch_db_access(guildId) + neo4j_ops = neo4j_setup() + + neo4j_ops.gds.run_cypher( + """ + MATCH (n) DETACH DELETE (n) + """ + ) + + acc_id = [ + "973993299281076285", + "973993299281076286", + ] + + setup_db_guild(db_access, guildId, discordId_list=acc_id, days_ago_period=180) + + db_access.db_mongo_client[guildId].create_collection("heatmaps") + db_access.db_mongo_client[guildId].create_collection("memberactivities") + + # filling memberactivities with some data + # filling heatmaps with some data + # filling up to 2 days ago with 170 documents + memberactivity_data = create_empty_memberactivities_data( + datetime.now() - timedelta(days=174), count=170 + ) + db_access.db_mongo_client[guildId]["memberactivities"].insert_many( + memberactivity_data + ) + + # filling heatmaps with some data + # filling up to 2 days ago with 179 documents + # 4 days ago are left to be analyzed + heatmaps_data = create_empty_heatmaps_data( + datetime.now() - timedelta(days=180), count=176 + ) + db_access.db_mongo_client[guildId]["heatmaps"].insert_many(heatmaps_data) + + # generating rawinfo samples + rawinfo_samples = [] + + # generating random rawinfo data + # 24 hours + # 180 days + for i in range(24 * 180): + sample = { + "type": 19, + "author": np.random.choice(acc_id), + "content": f"test{i}", + "user_mentions": [], + "role_mentions": [], + "reactions": [], + "replied_user": np.random.choice(acc_id), + "createdDate": (datetime.now() - timedelta(hours=i)), + "messageId": f"11188143219343360{i}", + "channelId": "1020707129214111827", + "channelName": "general", + "threadId": None, + "threadName": None, + } + rawinfo_samples.append(sample) + + db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) + + analyzer = setup_analyzer() + analyzer.run_once(guildId=guildId) + + results = neo4j_ops.gds.run_cypher( + f""" + MATCH (g:Guild {{guildId: '{guildId}'}})-[r:HAVE_METRICS]-> (g) + RETURN DISTINCT r.date as dates + ORDER BY dates DESC + """ + ) + dates = results.values.squeeze() + + print("dates[:2]: ", dates[:2]) + print("dates[-2:]: ", dates[-2:]) + + # we do analyzed from 4 days ago + start_analytics_date = datetime.now().replace( + hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc + ) - timedelta(days=4) + end_analytics_date = datetime.now().replace( + hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc + ) - timedelta(days=1) + + assert dates[-1] == start_analytics_date.timestamp() * 1000 + assert dates[0] == end_analytics_date.timestamp() * 1000 diff --git a/tests/integration/test_get_past_7_days_heatmaps.py b/tests/integration/test_get_past_7_days_heatmaps.py new file mode 100644 index 0000000..4fe5d2c --- /dev/null +++ b/tests/integration/test_get_past_7_days_heatmaps.py @@ -0,0 +1,82 @@ +from datetime import datetime, timedelta + +import numpy as np +from discord_analyzer.analysis.utils.member_activity_utils import get_users_past_window + +from .utils.analyzer_setup import launch_db_access + + +def test_get_past_7_days_heatmap_users_available_users(): + """ + test if we're getting the right heatmap users + """ + # first create the collections + guildId = "1234" + db_access = launch_db_access(guildId) + + start_date = datetime(2023, 1, 1) + + db_access.db_mongo_client[guildId].drop_collection("heatmaps") + + db_access.db_mongo_client[guildId].create_collection("heatmaps") + + heatmaps_data = [] + acc_names = [] + for i in range(250): + date = start_date + timedelta(days=i) + account = f"9739932992810762{i}" + document = { + "date": date.strftime("%Y-%m-%d"), + "channelId": "1020707129214111827", + "thr_messages": list(np.zeros(24)), + "lone_messages": list(np.zeros(24)), + "replier": list(np.zeros(24)), + "replied": list(np.zeros(24)), + "mentioner": list(np.zeros(24)), + "mentioned": list(np.zeros(24)), + "reacter": list(np.zeros(24)), + "reacted": list(np.zeros(24)), + "reacted_per_acc": [], + "mentioner_per_acc": [], + "replied_per_acc": [], + "account_name": account, + } + + heatmaps_data.append(document) + acc_names.append(account) + + db_access.db_mongo_client[guildId]["heatmaps"].insert_many(heatmaps_data) + + start_date = datetime(2023, 1, 1) + timedelta(days=243) + + user_names = get_users_past_window( + start_date.strftime("%Y-%m-%d"), db_access.db_mongo_client[guildId]["heatmaps"] + ) + + print(set(user_names)) + print(set(acc_names[-6:])) + + assert set(user_names) == set(acc_names[-7:]) + + +def test_get_past_7_days_heatmap_users_no_users(): + """ + test if we're getting the right heatmap users + """ + # first create the collections + guildId = "1234" + db_access = launch_db_access(guildId) + + start_date = datetime(2023, 1, 1) + + db_access.db_mongo_client[guildId].drop_collection("heatmaps") + + db_access.db_mongo_client[guildId].create_collection("heatmaps") + + start_date = datetime(2023, 1, 1) + timedelta(days=243) + + user_names = get_users_past_window( + start_date.strftime("%Y-%m-%d"), db_access.db_mongo_client[guildId]["heatmaps"] + ) + + assert user_names == [] diff --git a/tests/integration/test_interacted_in_deletion.py b/tests/integration/test_interacted_in_deletion.py new file mode 100644 index 0000000..01a854d --- /dev/null +++ b/tests/integration/test_interacted_in_deletion.py @@ -0,0 +1,76 @@ +from discord_analyzer.analyzer.neo4j_analytics import Neo4JAnalytics + +from .utils.neo4j_conn import neo4j_setup + + +def test_interacted_in_deletion(): + """ + test whether we're deleting the INTERACTED_IN relations or not + """ + + neo4j_ops = neo4j_setup() + neo4j_analytics = Neo4JAnalytics(neo4j_ops) + + neo4j_ops.gds.run_cypher("MATCH (n) DETACH DELETE (n)") + + # timestamps + today = 1689280200.0 + yesterday = 1689193800.0 + guildId = "1234" + + # creating some nodes with data + neo4j_ops.gds.run_cypher( + f""" + CREATE (a:DiscordAccount) -[:IS_MEMBER]->(g:Guild {{guildId: '{guildId}'}}) + CREATE (b:DiscordAccount) -[:IS_MEMBER]->(g) + CREATE (c:DiscordAccount) -[:IS_MEMBER]->(g) + CREATE (d:DiscordAccount) -[:IS_MEMBER]->(g) + CREATE (e:DiscordAccount) -[:IS_MEMBER]->(g) + SET a.userId = "1000" + SET b.userId = "1001" + SET c.userId = "1002" + SET d.userId = "1003" + SET e.userId = "1004" + MERGE (a) -[r:INTERACTED_WITH {{date: {yesterday}, weight: 1}}]->(b) + MERGE (a) -[r2:INTERACTED_WITH {{date: {today}, weight: 2}}]->(b) + MERGE (a) -[r3:INTERACTED_WITH {{date: {yesterday}, weight: 3}}]->(d) + MERGE (c) -[r4:INTERACTED_WITH {{date: {yesterday}, weight: 2}}]->(b) + MERGE (c) -[r5:INTERACTED_WITH {{date: {today}, weight: 1}}]->(b) + MERGE (c) -[r6:INTERACTED_WITH {{date: {yesterday}, weight: 2}}]->(d) + MERGE (d) -[r7:INTERACTED_WITH {{date: {yesterday}, weight: 1}}]->(b) + MERGE (c) -[r8:INTERACTED_WITH {{date: {today}, weight: 2}}]->(a) + MERGE (d) -[r9:INTERACTED_WITH {{date: {today}, weight: 1}}]->(c) + MERGE (b) -[r10:INTERACTED_WITH {{date: {today}, weight: 2}}]->(d) + MERGE (d) -[r11:INTERACTED_WITH {{date: {today}, weight: 1}}]->(c) + MERGE (e) -[r12:INTERACTED_WITH {{date: {today}, weight: 3}}]->(b) + + MERGE (a)-[:INTERACTED_IN {{date: {yesterday}}}]->(g) + MERGE (b)-[:INTERACTED_IN {{date: {yesterday}}}]->(g) + MERGE (c)-[:INTERACTED_IN {{date: {yesterday}}}]->(g) + MERGE (d)-[:INTERACTED_IN {{date: {yesterday}}}]->(g) + + SET r.guildId = '{guildId}' + SET r2.guildId = '{guildId}' + SET r3.guildId = '{guildId}' + SET r4.guildId = '{guildId}' + SET r5.guildId = '{guildId}' + SET r6.guildId = '{guildId}' + SET r7.guildId = '{guildId}' + SET r8.guildId = '{guildId}' + SET r9.guildId = '{guildId}' + SET r10.guildId = '{guildId}' + SET r11.guildId = '{guildId}' + SET r12.guildId = '{guildId}' + """ + ) + + neo4j_analytics._remove_analytics_interacted_in(guildId) + + results = neo4j_ops.gds.run_cypher( + f""" + MATCH (:DiscordAccount) -[r:INTERACTED_IN]->(:Guild {{guildId : '{guildId}'}}) + RETURN r + """ + ) + + assert results["r"].values.shape == (0,) diff --git a/tests/integration/test_lcc_all_connected.py b/tests/integration/test_lcc_all_connected.py new file mode 100644 index 0000000..f580c0a --- /dev/null +++ b/tests/integration/test_lcc_all_connected.py @@ -0,0 +1,80 @@ +# test out local clustering coefficient with all nodes connected +from discord_analyzer.analysis.neo4j_analysis.local_clustering_coefficient import ( + LocalClusteringCoeff, +) + +from .utils.neo4j_conn import neo4j_setup + + +def test_all_connected_coeffs(): + """ + 3 nodes all connected + using two dates: 166 and 167 + in date 166 the coeffs are 1.0 + and in date 167 the coeffs are 0.0 + + To see more info for this test: + https://miro.com/app/board/uXjVM7GdYqo=/?share_link_id=105382864070 + """ + neo4j_ops = neo4j_setup() + # deleting all data + neo4j_ops.gds.run_cypher("MATCH (n) DETACH DELETE (n)") + + # timestamps + today = 1689280200.0 + yesterday = 1689193800.0 + guildId = "1234" + + # creating some nodes with data + neo4j_ops.gds.run_cypher( + f""" + CREATE (a:DiscordAccount) -[:IS_MEMBER]->(g:Guild {{guildId: '{guildId}'}}) + CREATE (b:DiscordAccount) -[:IS_MEMBER]->(g) + CREATE (c:DiscordAccount) -[:IS_MEMBER]->(g) + SET a.userId = "1000" + SET b.userId = "1001" + SET c.userId = "1002" + MERGE (a) -[r:INTERACTED_WITH {{weight: 1, date: {yesterday}}}]->(b) + MERGE (a) -[r2:INTERACTED_WITH {{weight: 2, date: {today}}}]->(b) + MERGE (a) -[r3:INTERACTED_WITH {{weight: 3, date: {yesterday}}}]->(c) + MERGE (b) -[r4:INTERACTED_WITH {{weight: 2, date: {yesterday}}}]->(c) + SET r.guildId = '{guildId}' + SET r2.guildId = '{guildId}' + SET r3.guildId = '{guildId}' + SET r4.guildId = '{guildId}' + """ + ) + lcc = LocalClusteringCoeff(gds=neo4j_ops.gds) + lcc.compute(guildId=guildId, from_start=True) + + # getting the results + results = neo4j_ops.gds.run_cypher( + f""" + MATCH (a:DiscordAccount) -[r:INTERACTED_IN]-> (:Guild {{guildId: '{guildId}'}}) + RETURN + a.userId as userId, + r.date as date, + r.localClusteringCoefficient as lcc + """ + ) + + user0_id = "1000" + expected_results_user0 = [ + [user0_id, yesterday, 1.0], + [user0_id, today, 0.0], + ] + assert expected_results_user0 in results[results.userId == user0_id].values + + user1_id = "1001" + expected_results_user1 = [ + [user1_id, yesterday, 1.0], + [user1_id, today, 0.0], + ] + assert expected_results_user1 in results[results.userId == user1_id].values + + user2_id = "1002" + expected_results_user2 = [ + [user2_id, yesterday, 1.0], + [user2_id, today, 0.0], + ] + assert expected_results_user2 in results[results.userId == user2_id].values diff --git a/tests/integration/test_lcc_partially_connected.py b/tests/integration/test_lcc_partially_connected.py new file mode 100644 index 0000000..6ed6f78 --- /dev/null +++ b/tests/integration/test_lcc_partially_connected.py @@ -0,0 +1,114 @@ +# the nodes of the graph are partially connected +from discord_analyzer.analysis.neo4j_analysis.local_clustering_coefficient import ( + LocalClusteringCoeff, +) + +from .utils.neo4j_conn import neo4j_setup + + +def test_partially_connected_coeffs(): + """ + 5 nodes partially connected + using two dates: 166 and 167 + + To see more info for this test: + https://miro.com/app/board/uXjVM7GdYqo=/?share_link_id=105382864070 + """ + neo4j_ops = neo4j_setup() + # deleting all data + neo4j_ops.gds.run_cypher("MATCH (n) DETACH DELETE (n)") + + # timestamps + today = 1689280200.0 + yesterday = 1689193800.0 + guildId = "1234" + + # creating some nodes with data + neo4j_ops.gds.run_cypher( + f""" + CREATE (a:DiscordAccount) -[:IS_MEMBER]->(g:Guild {{guildId: '{guildId}'}}) + CREATE (b:DiscordAccount) -[:IS_MEMBER]->(g) + CREATE (c:DiscordAccount) -[:IS_MEMBER]->(g) + CREATE (d:DiscordAccount) -[:IS_MEMBER]->(g) + CREATE (e:DiscordAccount) -[:IS_MEMBER]->(g) + SET a.userId = "1000" + SET b.userId = "1001" + SET c.userId = "1002" + SET d.userId = "1003" + SET e.userId = "1004" + MERGE (a) -[r:INTERACTED_WITH {{date: {yesterday}, weight: 1}}]->(b) + MERGE (a) -[r2:INTERACTED_WITH {{date: {today}, weight: 2}}]->(b) + MERGE (a) -[r3:INTERACTED_WITH {{date: {yesterday}, weight: 3}}]->(d) + MERGE (c) -[r4:INTERACTED_WITH {{date: {yesterday}, weight: 2}}]->(b) + MERGE (c) -[r5:INTERACTED_WITH {{date: {today}, weight: 1}}]->(b) + MERGE (c) -[r6:INTERACTED_WITH {{date: {yesterday}, weight: 2}}]->(d) + MERGE (d) -[r7:INTERACTED_WITH {{date: {yesterday}, weight: 1}}]->(b) + MERGE (c) -[r8:INTERACTED_WITH {{date: {today}, weight: 2}}]->(a) + MERGE (d) -[r9:INTERACTED_WITH {{date: {today}, weight: 1}}]->(c) + MERGE (b) -[r10:INTERACTED_WITH {{date: {today}, weight: 2}}]->(d) + MERGE (d) -[r11:INTERACTED_WITH {{date: {today}, weight: 1}}]->(c) + MERGE (e) -[r12:INTERACTED_WITH {{date: {today}, weight: 3}}]->(b) + + SET r.guildId = '{guildId}' + SET r2.guildId = '{guildId}' + SET r3.guildId = '{guildId}' + SET r4.guildId = '{guildId}' + SET r5.guildId = '{guildId}' + SET r6.guildId = '{guildId}' + SET r7.guildId = '{guildId}' + SET r8.guildId = '{guildId}' + SET r9.guildId = '{guildId}' + SET r10.guildId = '{guildId}' + SET r11.guildId = '{guildId}' + SET r12.guildId = '{guildId}' + """ + ) + lcc = LocalClusteringCoeff(gds=neo4j_ops.gds) + lcc.compute(guildId=guildId) + + # getting the results + results = neo4j_ops.gds.run_cypher( + f""" + MATCH (a:DiscordAccount) -[r:INTERACTED_IN]-> (:Guild {{guildId: '{guildId}'}}) + RETURN + a.userId as userId, + r.date as date, + r.localClusteringCoefficient as lcc + """ + ) + print(results.values) + + user0_id = "1000" + expected_results_user0 = [ + [user0_id, yesterday, 1.0], + [user0_id, today, 1.0], + ] + assert expected_results_user0 in results[results.userId == user0_id].values + + user1_id = "1001" + expected_results_user1 = [ + [user1_id, yesterday, 2 / 3], + [user1_id, today, 1 / 3], + ] + assert expected_results_user1 in results[results.userId == user1_id].values + + user2_id = "1002" + expected_results_user2 = [ + [user2_id, yesterday, 1], + [user2_id, today, 2 / 3], + ] + assert expected_results_user2 in results[results.userId == user2_id].values + + user3_id = "1003" + expected_results_user3 = [ + [user3_id, yesterday, 2 / 3], + [user3_id, today, 1], + ] + assert expected_results_user3 in results[results.userId == user3_id].values + + user4_id = "1003" + expected_results_user4 = [ + [user4_id, yesterday, 2 / 3], + [user4_id, today, 1], + ] + assert expected_results_user4 in results[results.userId == user4_id].values diff --git a/tests/integration/test_member_activity_from_start_no_past_data.py b/tests/integration/test_member_activity_from_start_no_past_data.py new file mode 100644 index 0000000..441bad6 --- /dev/null +++ b/tests/integration/test_member_activity_from_start_no_past_data.py @@ -0,0 +1,90 @@ +# test analyzing memberactivities +from datetime import datetime, timedelta + +from .utils.analyzer_setup import launch_db_access, setup_analyzer + + +def test_analyzer_member_activities_from_start_empty_memberactivities(): + """ + run the analyzer for a specific guild with from_start option equal to True + assuming the memberactivities collection is empty + """ + # first create the collections + guildId = "1234" + db_access = launch_db_access(guildId) + + db_access.db_mongo_client["RnDAO"]["guilds"].delete_one({"guildId": guildId}) + db_access.db_mongo_client.drop_database(guildId) + + db_access.db_mongo_client["RnDAO"]["guilds"].insert_one( + { + "guildId": guildId, + "user": "1223455", + "name": "Loud place", + "connectedAt": (datetime.now() - timedelta(days=10)), + "isInProgress": True, + "isDisconnected": False, + "icon": "afd0d06fd12b2905c53708ca742e6c66", + "window": [7, 1], + "action": [1, 1, 1, 4, 3, 5, 5, 4, 3, 3, 2, 2, 1], + "selectedChannels": [ + { + "channelId": "41414262", + "channelName": "general", + }, + ], + "period": (datetime.now() - timedelta(days=30)), + } + ) + db_access.db_mongo_client[guildId].create_collection("heatmaps") + db_access.db_mongo_client[guildId].create_collection("memberactivities") + + db_access.db_mongo_client[guildId]["guildmembers"].insert_one( + { + "discordId": "3451791", + "username": "sample_user", + "roles": ["99909821"], + "joinedAt": (datetime.now() - timedelta(days=10)), + "avatar": "3ddd6e429f75d6a711d0a58ba3060694", + "isBot": False, + "discriminator": "0", + } + ) + + rawinfo_samples = [] + + for i in range(150): + sample = { + "type": 0, + "author": "3451791", + "content": "test10", + "user_mentions": [], + "role_mentions": [], + "reactions": [], + "replied_user": None, + "createdDate": (datetime.now() - timedelta(hours=i)), + "messageId": f"77776325{i}", + "channelId": "41414262", + "channelName": "general", + "threadId": None, + "threadName": None, + } + rawinfo_samples.append(sample) + + db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) + + analyzer = setup_analyzer() + analyzer.recompute_analytics(guildId=guildId) + + memberactivities_data = db_access.db_mongo_client[guildId][ + "memberactivities" + ].find_one({}) + heatmaps_data = db_access.db_mongo_client[guildId]["heatmaps"].find_one({}) + guild_document = db_access.db_mongo_client["RnDAO"]["guilds"].find_one( + {"guildId": guildId} + ) + + # testing whether any data is available + assert memberactivities_data is not None + assert heatmaps_data is not None + assert guild_document["isInProgress"] is False diff --git a/tests/integration/test_member_activity_from_start_with_guild_heatmaps_available.py b/tests/integration/test_member_activity_from_start_with_guild_heatmaps_available.py new file mode 100644 index 0000000..2b9f1dc --- /dev/null +++ b/tests/integration/test_member_activity_from_start_with_guild_heatmaps_available.py @@ -0,0 +1,72 @@ +# test analyzing memberactivities +from datetime import datetime, timedelta + +from .utils.analyzer_setup import launch_db_access, setup_analyzer +from .utils.mock_heatmaps import create_empty_heatmaps_data +from .utils.mock_memberactivities import create_empty_memberactivities_data +from .utils.remove_and_setup_guild import setup_db_guild + + +def test_analyzer_member_activities_from_start_available_heatmaps(): + """ + run the analyzer for a specific guild with from_start option equal to True + assuming the memberactivities collection is empty + """ + # first create the collections + guildId = "1234" + db_access = launch_db_access(guildId) + + setup_db_guild(db_access, guildId, discordId_list=["973993299281076285"]) + + db_access.db_mongo_client[guildId].create_collection("heatmaps") + db_access.db_mongo_client[guildId].create_collection("memberactivities") + + # filling memberactivities with some data + memberactivity_data = create_empty_memberactivities_data( + datetime(year=2023, month=6, day=5) + ) + db_access.db_mongo_client[guildId]["memberactivities"].insert_many( + memberactivity_data + ) + + # filling heatmaps with some data + heatmaps_data = create_empty_heatmaps_data(datetime(year=2023, month=6, day=5)) + db_access.db_mongo_client[guildId]["heatmaps"].insert_many(heatmaps_data) + + rawinfo_samples = [] + + for i in range(150): + sample = { + "type": 0, + "author": "973993299281076285", + "content": "test10", + "user_mentions": [], + "role_mentions": [], + "reactions": [], + "replied_user": None, + "createdDate": (datetime.now() - timedelta(hours=i)), + "messageId": f"11188143219343360{i}", + "channelId": "1020707129214111827", + "channelName": "general", + "threadId": None, + "threadName": None, + } + rawinfo_samples.append(sample) + + db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) + + analyzer = setup_analyzer() + analyzer.recompute_analytics(guildId=guildId) + + memberactivities_data = db_access.db_mongo_client[guildId][ + "memberactivities" + ].find_one({}) + heatmaps_data = db_access.db_mongo_client[guildId]["heatmaps"].find_one({}) + guild_document = db_access.db_mongo_client["RnDAO"]["guilds"].find_one( + {"guildId": guildId} + ) + + # testing whether any data is available + assert memberactivities_data is not None + assert heatmaps_data is not None + assert guild_document["isInProgress"] is False diff --git a/tests/integration/test_member_activity_from_start_with_guild_memberactivities_available.py b/tests/integration/test_member_activity_from_start_with_guild_memberactivities_available.py new file mode 100644 index 0000000..5de588d --- /dev/null +++ b/tests/integration/test_member_activity_from_start_with_guild_memberactivities_available.py @@ -0,0 +1,66 @@ +# test analyzing memberactivities +from datetime import datetime, timedelta + +from .utils.analyzer_setup import launch_db_access, setup_analyzer +from .utils.mock_memberactivities import create_empty_memberactivities_data +from .utils.remove_and_setup_guild import setup_db_guild + + +def test_analyzer_member_activities_from_start_available_member_activity(): + """ + run the analyzer for a specific guild with from_start option equal to True + assuming the memberactivities collection is empty + """ + # first create the collections + guildId = "1234" + db_access = launch_db_access(guildId) + + setup_db_guild(db_access, guildId, discordId_list=["973993299281076285"]) + + db_access.db_mongo_client[guildId].create_collection("heatmaps") + db_access.db_mongo_client[guildId].create_collection("memberactivities") + + memberactivity_data = create_empty_memberactivities_data( + datetime(year=2023, month=6, day=5) + ) + db_access.db_mongo_client[guildId]["memberactivities"].insert_many( + memberactivity_data + ) + + rawinfo_samples = [] + + for i in range(150): + sample = { + "type": 0, + "author": "973993299281076285", + "content": "test10", + "user_mentions": [], + "role_mentions": [], + "reactions": [], + "replied_user": None, + "createdDate": (datetime.now() - timedelta(hours=i)), + "messageId": f"11188143219343360{i}", + "channelId": "1020707129214111827", + "channelName": "general", + "threadId": None, + "threadName": None, + } + rawinfo_samples.append(sample) + + db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) + + analyzer = setup_analyzer() + analyzer.recompute_analytics(guildId=guildId) + + memberactivities_data = db_access.db_mongo_client[guildId][ + "memberactivities" + ].find_one({}) + heatmaps_data = db_access.db_mongo_client[guildId]["heatmaps"].find_one({}) + guild_document = db_access.db_mongo_client["RnDAO"]["guilds"].find_one( + {"guildId": guildId} + ) + + # testing whether any data is available + assert memberactivities_data is not None + assert heatmaps_data is not None + assert guild_document["isInProgress"] is False diff --git a/tests/integration/test_member_activity_from_start_with_one_interval.py b/tests/integration/test_member_activity_from_start_with_one_interval.py new file mode 100644 index 0000000..e065b7c --- /dev/null +++ b/tests/integration/test_member_activity_from_start_with_one_interval.py @@ -0,0 +1,57 @@ +# test analyzing memberactivities +from datetime import datetime, timedelta + +from .utils.analyzer_setup import launch_db_access, setup_analyzer +from .utils.remove_and_setup_guild import setup_db_guild + + +def test_analyzer_from_start_one_interval(): + """ + run the analyzer from start and just for one interval + """ + # first create the collections + guildId = "1234" + db_access = launch_db_access(guildId) + + setup_db_guild(db_access, guildId, discordId_list=["973993299281076285"]) + + rawinfo_samples = [] + + for i in range(150): + sample = { + "type": 0, + "author": "973993299281076285", + "content": "test10", + "user_mentions": [], + "role_mentions": [], + "reactions": [], + "replied_user": None, + "createdDate": (datetime.now() - timedelta(hours=i)), + "messageId": f"11188143219343360{i}", + "channelId": "1020707129214111827", + "channelName": "general", + "threadId": None, + "threadName": None, + } + rawinfo_samples.append(sample) + + db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) + + db_access.db_mongo_client[guildId].create_collection("heatmaps") + db_access.db_mongo_client[guildId].create_collection("memberactivities") + + analyzer = setup_analyzer() + analyzer.recompute_analytics(guildId=guildId) + + memberactivities_data = db_access.db_mongo_client[guildId][ + "memberactivities" + ].find_one({}) + heatmaps_data = db_access.db_mongo_client[guildId]["heatmaps"].find_one({}) + guild_document = db_access.db_mongo_client["RnDAO"]["guilds"].find_one( + {"guildId": guildId} + ) + + # testing whether any data is available + assert memberactivities_data is not None + assert heatmaps_data is not None + assert guild_document["isInProgress"] is False diff --git a/tests/integration/test_member_activity_utils.py b/tests/integration/test_member_activity_utils.py new file mode 100644 index 0000000..ce3c1f0 --- /dev/null +++ b/tests/integration/test_member_activity_utils.py @@ -0,0 +1,46 @@ +from datetime import datetime, timedelta + +from discord_analyzer.analyzer.memberactivity_utils import MemberActivityUtils + +from .utils.analyzer_setup import launch_db_access, setup_analyzer +from .utils.remove_and_setup_guild import setup_db_guild + + +def test_utils_get_members(): + analyzer = setup_analyzer() + guildId = "1012430565959553145" + db_access = launch_db_access(guildId) + users = ["973993299281076285"] + + setup_db_guild(db_access, guildId, discordId_list=users) + + rawinfo_samples = [] + for i in range(150): + sample = { + "type": 0, + "author": "973993299281076285", + "content": "test10", + "user_mentions": [], + "role_mentions": [], + "reactions": [], + "replied_user": None, + "createdDate": (datetime.now() - timedelta(hours=i)), + "messageId": f"11188143219343360{i}", + "channelId": "1020707129214111827", + "channelName": "general", + "threadId": None, + "threadName": None, + } + rawinfo_samples.append(sample) + + db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) + + db_access.db_mongo_client[guildId].create_collection("heatmaps") + db_access.db_mongo_client[guildId].create_collection("memberactivities") + + memberactivities_utils = MemberActivityUtils(analyzer.DB_connections) + + database_users = memberactivities_utils.get_all_users(guildId=guildId) + + print(f"database_users: {database_users}") + assert database_users == users diff --git a/tests/integration/test_mentioned_active_members_from_message.py b/tests/integration/test_mentioned_active_members_from_message.py new file mode 100644 index 0000000..def6ba9 --- /dev/null +++ b/tests/integration/test_mentioned_active_members_from_message.py @@ -0,0 +1,63 @@ +from datetime import datetime, timedelta + +from .utils.analyzer_setup import launch_db_access, setup_analyzer +from .utils.remove_and_setup_guild import setup_db_guild + + +def test_mention_active_members_from_rawinfo(): + """ + test whether the people are being mentioned are active or not + the shouldn't considered as active as we're not counting them + the rawinfos is used + """ + # first create the collections + guildId = "1234" + db_access = launch_db_access(guildId) + + acc_id = [ + "user1", + "user2", + ] + setup_db_guild(db_access, guildId, discordId_list=acc_id, days_ago_period=7) + + db_access.db_mongo_client[guildId].create_collection("heatmaps") + db_access.db_mongo_client[guildId].create_collection("memberactivities") + + # generating rawinfo samples + rawinfo_samples = [] + + # generating random rawinfo data + # all user1 mentioning user2 + for i in range(150): + sample = { + "type": 0, + "author": "user1", + "content": f"test{i}", + "user_mentions": ["user2"], + "role_mentions": [], + "reactions": [], + "replied_user": None, + "createdDate": (datetime.now() - timedelta(hours=i)), + "messageId": f"11188143219343360{i}", + "channelId": "1020707129214111827", + "channelName": "general", + "threadId": None, + "threadName": None, + } + rawinfo_samples.append(sample) + + db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) + + analyzer = setup_analyzer() + analyzer.run_once(guildId=guildId) + + memberactivities_cursor = db_access.query_db_find( + "memberactivities", {}, sorting=("date", -1) + ) + memberactivities_data = list(memberactivities_cursor) + + print("memberactivities_data: ", memberactivities_data) + + # just user1 was mentioning others + # user2 didn't do anything + assert memberactivities_data[0]["all_active"] == ["user1"] diff --git a/tests/integration/test_neo4j_compute_metrics.py b/tests/integration/test_neo4j_compute_metrics.py new file mode 100644 index 0000000..9eb185e --- /dev/null +++ b/tests/integration/test_neo4j_compute_metrics.py @@ -0,0 +1,100 @@ +import numpy as np +from discord_analyzer.analyzer.neo4j_analytics import Neo4JAnalytics + +from .utils.neo4j_conn import neo4j_setup + + +def test_guild_results_available(): + """ + test with default behaviour + + test whether the averageClusteringCoefficeint + and decentralization scores are available in guild node + and localClustetingCoefficient is available in DiscordAccount nodes + """ + neo4j_ops = neo4j_setup() + # deleting all data + neo4j_ops.gds.run_cypher("MATCH (n) DETACH DELETE (n)") + + # timestamps + today = 1689280200.0 + yesterday = 1689193800.0 + guildId = "1234" + + # creating some nodes with data + neo4j_ops.gds.run_cypher( + f""" + CREATE (a:DiscordAccount) -[:IS_MEMBER]->(g:Guild {{guildId: '{guildId}'}}) + CREATE (b:DiscordAccount) -[:IS_MEMBER]->(g) + CREATE (c:DiscordAccount) -[:IS_MEMBER]->(g) + CREATE (d:DiscordAccount) -[:IS_MEMBER]->(g) + CREATE (e:DiscordAccount) -[:IS_MEMBER]->(g) + SET a.userId = "1000" + SET b.userId = "1001" + SET c.userId = "1002" + SET d.userId = "1003" + SET e.userId = "1004" + MERGE (a) -[r:INTERACTED_WITH {{date: {yesterday}, weight: 1}}]->(b) + MERGE (a) -[r2:INTERACTED_WITH {{date: {today}, weight: 2}}]->(b) + MERGE (a) -[r3:INTERACTED_WITH {{date: {yesterday}, weight: 3}}]->(d) + MERGE (c) -[r4:INTERACTED_WITH {{date: {yesterday}, weight: 2}}]->(b) + MERGE (c) -[r5:INTERACTED_WITH {{date: {today}, weight: 1}}]->(b) + MERGE (c) -[r6:INTERACTED_WITH {{date: {yesterday}, weight: 2}}]->(d) + MERGE (d) -[r7:INTERACTED_WITH {{date: {yesterday}, weight: 1}}]->(b) + MERGE (c) -[r8:INTERACTED_WITH {{date: {today}, weight: 2}}]->(a) + MERGE (d) -[r9:INTERACTED_WITH {{date: {today}, weight: 1}}]->(c) + MERGE (b) -[r10:INTERACTED_WITH {{date: {today}, weight: 2}}]->(d) + MERGE (d) -[r11:INTERACTED_WITH {{date: {today}, weight: 1}}]->(c) + MERGE (e) -[r12:INTERACTED_WITH {{date: {today}, weight: 3}}]->(b) + + SET r.guildId = '{guildId}' + SET r2.guildId = '{guildId}' + SET r3.guildId = '{guildId}' + SET r4.guildId = '{guildId}' + SET r5.guildId = '{guildId}' + SET r6.guildId = '{guildId}' + SET r7.guildId = '{guildId}' + SET r8.guildId = '{guildId}' + SET r9.guildId = '{guildId}' + SET r10.guildId = '{guildId}' + SET r11.guildId = '{guildId}' + SET r12.guildId = '{guildId}' + """ + ) + + analytics = Neo4JAnalytics(neo4j_ops) + + analytics.compute_metrics(guildId=guildId, from_start=False) + + accounts_result = neo4j_ops.gds.run_cypher( + f""" + MATCH (a:DiscordAccount) -[r:INTERACTED_IN]-> (g:Guild {{guildId: '{guildId}'}}) + MATCH (g) -[r2:HAVE_METRICS]->(g) + RETURN + a.userId AS userId, + r.date AS date, + r.localClusteringCoefficient AS localClusteringCoefficient, + r.status AS status + """ + ) + + for _, row in accounts_result.iterrows(): + print(row) + assert row["userId"] is not None + assert row["date"] in [yesterday, today] + assert bool(np.isnan(row["localClusteringCoefficient"])) is False + + guild_results = neo4j_ops.gds.run_cypher( + f""" + MATCH (g:Guild {{guildId: '{guildId}'}}) -[r:HAVE_METRICS]->(g) + RETURN + r.date as date, + g.guildId as guildId, + r.decentralizationScore as decentralizationScore + """ + ) + for _, row in guild_results.iterrows(): + print(row) + assert row["date"] in [yesterday, today] + assert row["guildId"] == guildId + assert bool(np.isnan(row["decentralizationScore"])) is False diff --git a/tests/integration/test_neo4j_compute_metrics_from_start.py b/tests/integration/test_neo4j_compute_metrics_from_start.py new file mode 100644 index 0000000..33d0d7d --- /dev/null +++ b/tests/integration/test_neo4j_compute_metrics_from_start.py @@ -0,0 +1,117 @@ +import numpy as np +from discord_analyzer.analyzer.neo4j_analytics import Neo4JAnalytics + +from .utils.neo4j_conn import neo4j_setup + + +def test_neo4j_compute_metrics_from_start(): + """ + test with default behaviour + + test whether the averageClusteringCoefficeint + and decentralization scores are available in guild node + and localClustetingCoefficient is available in DiscordAccount nodes + """ + neo4j_ops = neo4j_setup() + # deleting all data + neo4j_ops.gds.run_cypher("MATCH (n) DETACH DELETE (n)") + + # timestamps + today = 1689280200.0 + yesterday = 1689193800.0 + guildId = "1234" + + # creating some nodes with data + neo4j_ops.gds.run_cypher( + f""" + CREATE (a:DiscordAccount) -[:IS_MEMBER]->(g:Guild {{guildId: '{guildId}'}}) + CREATE (b:DiscordAccount) -[:IS_MEMBER]->(g) + CREATE (c:DiscordAccount) -[:IS_MEMBER]->(g) + CREATE (d:DiscordAccount) -[:IS_MEMBER]->(g) + CREATE (e:DiscordAccount) -[:IS_MEMBER]->(g) + SET a.userId = "1000" + SET b.userId = "1001" + SET c.userId = "1002" + SET d.userId = "1003" + SET e.userId = "1004" + MERGE (a) -[r:INTERACTED_WITH {{date: {yesterday}, weight: 1}}]->(b) + MERGE (a) -[r2:INTERACTED_WITH {{date: {today}, weight: 2}}]->(b) + MERGE (a) -[r3:INTERACTED_WITH {{date: {yesterday}, weight: 3}}]->(d) + MERGE (c) -[r4:INTERACTED_WITH {{date: {yesterday}, weight: 2}}]->(b) + MERGE (c) -[r5:INTERACTED_WITH {{date: {today}, weight: 1}}]->(b) + MERGE (c) -[r6:INTERACTED_WITH {{date: {yesterday}, weight: 2}}]->(d) + MERGE (d) -[r7:INTERACTED_WITH {{date: {yesterday}, weight: 1}}]->(b) + MERGE (c) -[r8:INTERACTED_WITH {{date: {today}, weight: 2}}]->(a) + MERGE (d) -[r9:INTERACTED_WITH {{date: {today}, weight: 1}}]->(c) + MERGE (b) -[r10:INTERACTED_WITH {{date: {today}, weight: 2}}]->(d) + MERGE (d) -[r11:INTERACTED_WITH {{date: {today}, weight: 1}}]->(c) + MERGE (e) -[r12:INTERACTED_WITH {{date: {today}, weight: 3}}]->(b) + + SET r.guildId = '{guildId}' + SET r2.guildId = '{guildId}' + SET r3.guildId = '{guildId}' + SET r4.guildId = '{guildId}' + SET r5.guildId = '{guildId}' + SET r6.guildId = '{guildId}' + SET r7.guildId = '{guildId}' + SET r8.guildId = '{guildId}' + SET r9.guildId = '{guildId}' + SET r10.guildId = '{guildId}' + SET r11.guildId = '{guildId}' + SET r12.guildId = '{guildId}' + """ + ) + + analytics = Neo4JAnalytics(neo4j_ops) + + analytics.compute_metrics(guildId=guildId, from_start=True) + + accounts_result = neo4j_ops.gds.run_cypher( + f""" + MATCH (a:DiscordAccount) -[r:INTERACTED_IN]-> (g:Guild {{guildId: '{guildId}'}}) + RETURN + a.userId AS userId, + r.date AS date, + r.localClusteringCoefficient AS localClusteringCoefficient, + r.status AS status + """ + ) + + # we don't have 1004 interacting on yesterday (1689193800.0) + assert len(accounts_result.values) == 9 + + for _, row in accounts_result.iterrows(): + print(row) + lcc = row["localClusteringCoefficient"] + date = row["date"] + userId = row["userId"] + status = row["status"] + + assert userId is not None + + assert date in [yesterday, today] + assert bool(np.isnan(lcc)) is False + assert lcc is not None + + assert status is not None + + assert bool(np.isnan(status)) is False + + guild_results = neo4j_ops.gds.run_cypher( + f""" + MATCH (g:Guild {{guildId: '{guildId}'}}) -[r:HAVE_METRICS]->(g) + RETURN + r.date as date, + g.guildId as guildId, + r.decentralizationScore as decentralizationScore + """ + ) + + # for 2 dates + assert len(guild_results.values) == 2 + for _, row in guild_results.iterrows(): + print(row) + assert row["date"] in [yesterday, today] + assert row["guildId"] == guildId + assert row["decentralizationScore"] is not None + assert bool(np.isnan(row["decentralizationScore"])) is False diff --git a/tests/integration/test_neo4j_projection_utils_computed_dates.py b/tests/integration/test_neo4j_projection_utils_computed_dates.py new file mode 100644 index 0000000..ecbf3e3 --- /dev/null +++ b/tests/integration/test_neo4j_projection_utils_computed_dates.py @@ -0,0 +1,79 @@ +from discord_analyzer.analysis.neo4j_utils.projection_utils import ProjectionUtils + +from .utils.neo4j_conn import neo4j_setup + + +def test_neo4j_projection_utils_get_computed_dates(): + """ + testing the projection utils get_computed_dates + """ + neo4j_ops = neo4j_setup() + # deleting all data + neo4j_ops.gds.run_cypher("MATCH (n) DETACH DELETE (n)") + + # timestamps + today = 1689280200.0 + yesterday = 1689193800.0 + guildId = "1234" + + # creating some nodes with data + neo4j_ops.gds.run_cypher( + f""" + CREATE (a:DiscordAccount) -[:IS_MEMBER]->(g:Guild {{guildId: '{guildId}'}}) + CREATE (b:DiscordAccount) -[:IS_MEMBER]->(g) + CREATE (c:DiscordAccount) -[:IS_MEMBER]->(g) + CREATE (d:DiscordAccount) -[:IS_MEMBER]->(g) + CREATE (e:DiscordAccount) -[:IS_MEMBER]->(g) + SET a.userId = "1000" + SET b.userId = "1001" + SET c.userId = "1002" + SET d.userId = "1003" + SET e.userId = "1004" + MERGE (a) -[r:INTERACTED_WITH {{date: {yesterday}, weight: 1}}]->(b) + MERGE (a) -[r2:INTERACTED_WITH {{date: {today}, weight: 2}}]->(b) + MERGE (a) -[r3:INTERACTED_WITH {{date: {yesterday}, weight: 3}}]->(d) + MERGE (c) -[r4:INTERACTED_WITH {{date: {yesterday}, weight: 2}}]->(b) + MERGE (c) -[r5:INTERACTED_WITH {{date: {today}, weight: 1}}]->(b) + MERGE (c) -[r6:INTERACTED_WITH {{date: {yesterday}, weight: 2}}]->(d) + MERGE (d) -[r7:INTERACTED_WITH {{date: {yesterday}, weight: 1}}]->(b) + MERGE (c) -[r8:INTERACTED_WITH {{date: {today}, weight: 2}}]->(a) + MERGE (d) -[r9:INTERACTED_WITH {{date: {today}, weight: 1}}]->(c) + MERGE (b) -[r10:INTERACTED_WITH {{date: {today}, weight: 2}}]->(d) + MERGE (d) -[r11:INTERACTED_WITH {{date: {today}, weight: 1}}]->(c) + MERGE (e) -[r12:INTERACTED_WITH {{date: {today}, weight: 3}}]->(b) + MERGE (a)-[:INTERACTED_IN {{date: {yesterday}}}]->(g) + MERGE (a)-[:INTERACTED_IN {{date: {today}, localClusteringCoefficient: 1}}]->(g) + MERGE (b)-[:INTERACTED_IN {{date: {yesterday}}}]->(g) + MERGE (b)-[:INTERACTED_IN {{date: {today}, localClusteringCoefficient: 1}}]->(g) + MERGE (c)-[:INTERACTED_IN {{date: {yesterday}}}]->(g) + MERGE (c)-[:INTERACTED_IN {{date: {today}, localClusteringCoefficient: 1}}]->(g) + MERGE (d)-[:INTERACTED_IN {{date: {yesterday}}}]->(g) + MERGE (e)-[:INTERACTED_IN {{date: {today}, localClusteringCoefficient: 1}}]->(g) + + + SET r.guildId = '{guildId}' + SET r2.guildId = '{guildId}' + SET r3.guildId = '{guildId}' + SET r4.guildId = '{guildId}' + SET r5.guildId = '{guildId}' + SET r6.guildId = '{guildId}' + SET r7.guildId = '{guildId}' + SET r8.guildId = '{guildId}' + SET r9.guildId = '{guildId}' + SET r10.guildId = '{guildId}' + SET r11.guildId = '{guildId}' + SET r12.guildId = '{guildId}' + """ + ) + projection_utils = ProjectionUtils(neo4j_ops.gds, guildId=guildId) + computed_dates = projection_utils.get_computed_dates( + f""" + MATCH (:DiscordAccount)-[r:INTERACTED_IN]->(g:Guild {{guildId: '{guildId}'}}) + WHERE r.localClusteringCoefficient is NOT NULL + RETURN r.date as computed_dates + """ + ) + + print(computed_dates) + + assert computed_dates == {today} diff --git a/tests/integration/test_network_graph_creation.py b/tests/integration/test_network_graph_creation.py new file mode 100644 index 0000000..591c794 --- /dev/null +++ b/tests/integration/test_network_graph_creation.py @@ -0,0 +1,176 @@ +# test out local clustering coefficient with all nodes connected +from datetime import datetime, timedelta + +import networkx as nx +import numpy as np +from discord_analyzer.analysis.utils.activity import Activity + +from .utils.mock_graph import generate_mock_graph, store_mock_data_in_neo4j +from .utils.neo4j_conn import neo4j_setup + + +def test_network_graph_create(): + """ """ + neo4j_ops = neo4j_setup() + # deleting all data + neo4j_ops.gds.run_cypher("MATCH (n) DETACH DELETE (n)") + + guildId = "1234" + acc_names = np.array(["1000", "1001", "1002"]) + graph_dict = {} + + # saving the desired outputs + desired_outputs = [] + + # Generating 1st graph + np.random.seed(123) + int_matrix = {} + int_matrix[Activity.Reply] = np.array( + [ + [0, 1, 2], + [0, 0, 3], + [0, 4, 0], + ] + ) + + int_matrix[Activity.Mention] = np.array( + [ + [0, 1, 2], + [0, 0, 3], + [0, 4, 0], + ] + ) + + int_matrix[Activity.Reaction] = np.array( + [ + [0, 1, 2], + [0, 0, 3], + [0, 4, 0], + ] + ) + + graph = generate_mock_graph(int_matrix, acc_names) + + node_att = {} + for i, node in enumerate(list(graph)): + node_att[node] = acc_names[i] + + nx.set_node_attributes(graph, node_att, "acc_name") + + graph_date = datetime.now() + graph_date_timestamp = graph_date.replace( + hour=0, minute=0, second=0, microsecond=0 + ).timestamp() + graph_dict[graph_date] = graph + + desired_outputs.extend( + [ + ["1000", 1, graph_date_timestamp, "1001"], + ["1000", 2, graph_date_timestamp, "1002"], + ["1001", 3, graph_date_timestamp, "1002"], + ["1002", 4, graph_date_timestamp, "1001"], + ] + ) + + # Generating 2nd graph + int_matrix = {} + int_matrix[Activity.Reply] = np.array( + [ + [0, 0, 1], + [2, 0, 5], + [0, 0, 0], + ] + ) + + int_matrix[Activity.Mention] = np.array( + [ + [0, 0, 1], + [2, 0, 5], + [0, 0, 0], + ] + ) + + int_matrix[Activity.Reaction] = np.array( + [ + [0, 0, 1], + [2, 0, 5], + [0, 0, 0], + ] + ) + + graph = generate_mock_graph(int_matrix, acc_names) + + nx.set_node_attributes(graph, node_att, "acc_name") + + graph_date = datetime.now() + timedelta(days=-1) + graph_date_timestamp = graph_date.replace( + hour=0, minute=0, second=0, microsecond=0 + ).timestamp() + graph_dict[graph_date] = graph + + desired_outputs.extend( + [ + ["1000", 1, graph_date_timestamp, "1002"], + ["1001", 2, graph_date_timestamp, "1000"], + ["1001", 5, graph_date_timestamp, "1002"], + ] + ) + + # generating 3rd graph + int_matrix = {} + int_matrix[Activity.Reply] = np.array( + [ + [0, 0, 3], + [0, 0, 0], + [1, 0, 0], + ] + ) + int_matrix[Activity.Mention] = np.array( + [ + [0, 0, 3], + [0, 0, 0], + [1, 0, 0], + ] + ) + int_matrix[Activity.Reaction] = np.array( + [ + [0, 0, 3], + [0, 0, 0], + [1, 0, 0], + ] + ) + + graph = generate_mock_graph(int_matrix, acc_names) + nx.set_node_attributes(graph, node_att, "acc_name") + + graph_date = datetime.now() + timedelta(days=-8) + graph_date_timestamp = graph_date.replace( + hour=0, minute=0, second=0, microsecond=0 + ).timestamp() + graph_dict[graph_date] = graph + + desired_outputs.extend( + [ + ["1000", 3, graph_date_timestamp, "1002"], + ["1002", 1, graph_date_timestamp, "1000"], + ] + ) + + # DATABASE SAVING + + store_mock_data_in_neo4j(graph_dict=graph_dict, guildId=guildId) + + results = neo4j_ops.gds.run_cypher( + f""" + MATCH (a:DiscordAccount) -[:IS_MEMBER] -> (g:Guild {{guildId: '{guildId}'}}) + MATCH (a)-[r:INTERACTED_WITH]-> (b:DiscordAccount) + RETURN + a.userId as fromUserId, + r.weight as weight, + r.date as date, + b.userId as toUserId + """ + ) + print(desired_outputs) + print(results) + assert desired_outputs in results.values diff --git a/tests/integration/test_node_stats.py b/tests/integration/test_node_stats.py new file mode 100644 index 0000000..430cf18 --- /dev/null +++ b/tests/integration/test_node_stats.py @@ -0,0 +1,113 @@ +# test out local clustering coefficient with all nodes connected +from discord_analyzer.analysis.neo4j_analysis.analyzer_node_stats import NodeStats + +from .utils.neo4j_conn import neo4j_setup + + +def test_node_stats(): + """ + 5 nodes partially connected + using two dates: 166 and 167 + + To see the graph for this test: + https://miro.com/app/board/uXjVM7GdYqo=/?share_link_id=105382864070 + """ + neo4j_ops = neo4j_setup() + # deleting all data + neo4j_ops.gds.run_cypher("MATCH (n) DETACH DELETE (n)") + + # timestamps + today = 1689280200.0 + yesterday = 1689193800.0 + guildId = "1234" + + # creating some nodes with data + neo4j_ops.gds.run_cypher( + f""" + CREATE (a:DiscordAccount) -[:IS_MEMBER]->(g:Guild {{guildId: '{guildId}'}}) + CREATE (b:DiscordAccount) -[:IS_MEMBER]->(g) + CREATE (c:DiscordAccount) -[:IS_MEMBER]->(g) + CREATE (d:DiscordAccount) -[:IS_MEMBER]->(g) + CREATE (e:DiscordAccount) -[:IS_MEMBER]->(g) + SET a.userId = "1000" + SET b.userId = "1001" + SET c.userId = "1002" + SET d.userId = "1003" + SET e.userId = "1004" + MERGE (a) -[r:INTERACTED_WITH {{date: {yesterday}, weight: 1}}]->(b) + MERGE (a) -[r2:INTERACTED_WITH {{date: {today}, weight: 2}}]->(b) + MERGE (a) -[r3:INTERACTED_WITH {{date: {yesterday}, weight: 3}}]->(d) + MERGE (c) -[r4:INTERACTED_WITH {{date: {yesterday}, weight: 2}}]->(b) + MERGE (c) -[r5:INTERACTED_WITH {{date: {today}, weight: 1}}]->(b) + MERGE (c) -[r6:INTERACTED_WITH {{date: {yesterday}, weight: 2}}]->(d) + MERGE (d) -[r7:INTERACTED_WITH {{date: {yesterday}, weight: 1}}]->(b) + MERGE (c) -[r8:INTERACTED_WITH {{date: {today}, weight: 2}}]->(a) + MERGE (d) -[r9:INTERACTED_WITH {{date: {today}, weight: 1}}]->(c) + MERGE (b) -[r10:INTERACTED_WITH {{date: {today}, weight: 2}}]->(d) + MERGE (d) -[r11:INTERACTED_WITH {{date: {today}, weight: 1}}]->(c) + MERGE (e) -[r12:INTERACTED_WITH {{date: {today}, weight: 3}}]->(b) + + SET r.guildId = '{guildId}' + SET r2.guildId = '{guildId}' + SET r3.guildId = '{guildId}' + SET r4.guildId = '{guildId}' + SET r5.guildId = '{guildId}' + SET r6.guildId = '{guildId}' + SET r7.guildId = '{guildId}' + SET r8.guildId = '{guildId}' + SET r9.guildId = '{guildId}' + SET r10.guildId = '{guildId}' + SET r11.guildId = '{guildId}' + SET r12.guildId = '{guildId}' + """ + ) + + node_stats = NodeStats(neo4j_ops, threshold=2) + node_stats.compute_stats(guildId="1234", from_start=True) + + # getting the results + results = neo4j_ops.gds.run_cypher( + f""" + MATCH (a:DiscordAccount) + -[r:INTERACTED_IN] -> (g:Guild {{guildId: '{guildId}'}}) + RETURN a.userId as userId, r.date as date, r.status as status + """ + ) + + # we had 5 discord accounts and 2 dates for each + # just the "1004" user did not interact yesterday + # so 9 status results + assert len(results) == 9 + print(results) + + results_user0 = results[results["userId"] == "1000"] + expected_results = [ + ["1000", today, 2], + ["1000", yesterday, 0], + ] + assert expected_results in results_user0.values + + results_user1 = results[results["userId"] == "1001"] + expected_results = [ + ["1001", today, 1], + ["1001", yesterday, 1], + ] + assert expected_results in results_user1.values + + results_user2 = results[results["userId"] == "1002"] + expected_results = [ + ["1002", today, 0], + ["1002", yesterday, 0], + ] + assert expected_results in results_user2.values + + results_user3 = results[results["userId"] == "1003"] + expected_results = [ + ["1003", today, 2], + ["1004", yesterday, 1], + ] + assert expected_results in results_user3.values + + results_user4 = results[results["userId"] == "1004"] + expected_results = [["1004", today, 0]] + assert expected_results in results_user4.values diff --git a/tests/integration/test_service_connection.py b/tests/integration/test_service_connection.py new file mode 100644 index 0000000..5e42f64 --- /dev/null +++ b/tests/integration/test_service_connection.py @@ -0,0 +1,17 @@ +from tc_messageBroker.message_broker import RabbitMQ +from utils.daolytics_uitls import get_rabbit_mq_credentials + + +def test_rabbit_mq_connect(): + rabbit_creds = get_rabbit_mq_credentials() + rabbit_mq = RabbitMQ( + broker_url=rabbit_creds["broker_url"], + port=rabbit_creds["port"], + username=rabbit_creds["username"], + password=rabbit_creds["password"], + ) + + connect = rabbit_mq.connect("sample_queue") + + # when no rabbitmq instance is running it should be False + assert connect is not False diff --git a/tests/integration/utils/__init__.py b/tests/integration/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/integration/utils/activity_params.py b/tests/integration/utils/activity_params.py new file mode 100644 index 0000000..76c662c --- /dev/null +++ b/tests/integration/utils/activity_params.py @@ -0,0 +1,32 @@ +def prepare_activity_params(): + INT_THR = 1 # minimum number of interactions to be active + UW_DEG_THR = 1 # minimum number of accounts interacted with to be active + PAUSED_T_THR = 1 # time period to remain paused + CON_T_THR = 4 # time period to be consistent active + CON_O_THR = 3 # time period to be consistent active + EDGE_STR_THR = 5 # minimum number of interactions for connected + UW_THR_DEG_THR = 5 # minimum number of accounts for connected + VITAL_T_THR = 4 # time period to assess for vital + VITAL_O_THR = 3 # times to be connected within VITAL_T_THR to be vital + STILL_T_THR = 2 # time period to assess for still active + STILL_O_THR = 2 # times to be active within STILL_T_THR to be still active + DROP_H_THR = 2 + DROP_I_THR = 1 + + act_param = [ + INT_THR, + UW_DEG_THR, + PAUSED_T_THR, + CON_T_THR, + CON_O_THR, + EDGE_STR_THR, + UW_THR_DEG_THR, + VITAL_T_THR, + VITAL_O_THR, + STILL_T_THR, + STILL_O_THR, + DROP_H_THR, + DROP_I_THR, + ] + + return act_param diff --git a/tests/integration/utils/analyzer_setup.py b/tests/integration/utils/analyzer_setup.py new file mode 100644 index 0000000..e48f473 --- /dev/null +++ b/tests/integration/utils/analyzer_setup.py @@ -0,0 +1,50 @@ +import os + +from discord_analyzer.DB_operations.mongodb_access import DB_access +from discord_analyzer.rn_analyzer import RnDaoAnalyzer +from dotenv import load_dotenv + + +def setup_analyzer() -> RnDaoAnalyzer: + load_dotenv() + analyzer = RnDaoAnalyzer() + + user = os.getenv("MONGODB_USER", "") + password = os.getenv("MONGODB_PASS", "") + host = os.getenv("MONGODB_HOST", "") + port = os.getenv("MONGODB_PORT", "") + + neo4j_creds = {} + neo4j_creds["db_name"] = os.getenv("NEO4J_DB", "") + neo4j_creds["protocol"] = os.getenv("NEO4J_PROTOCOL", "") + neo4j_creds["host"] = os.getenv("NEO4J_HOST", "") + neo4j_creds["port"] = os.getenv("NEO4J_PORT", "") + neo4j_creds["password"] = os.getenv("NEO4J_PASSWORD", "") + neo4j_creds["user"] = os.getenv("NEO4J_USER", "") + + analyzer.set_mongo_database_info( + mongo_db_host=host, + mongo_db_password=password, + mongo_db_user=user, + mongo_db_port=port, + ) + + analyzer.set_neo4j_database_info(neo4j_creds=neo4j_creds) + analyzer.database_connect() + analyzer.setup_neo4j_metrics() + + return analyzer + + +def launch_db_access(guildId: str): + load_dotenv() + user = os.getenv("MONGODB_USER") + password = os.getenv("MONGODB_PASS") + host = os.getenv("MONGODB_HOST") + port = os.getenv("MONGODB_PORT") + + connection_str = f"mongodb://{user}:{password}@{host}:{port}" + + db_access = DB_access(guildId, connection_str) + print("CONNECTED to MongoDB!") + return db_access diff --git a/tests/integration/utils/mock_graph.py b/tests/integration/utils/mock_graph.py new file mode 100644 index 0000000..d244e3a --- /dev/null +++ b/tests/integration/utils/mock_graph.py @@ -0,0 +1,108 @@ +import os + +from discord_analyzer import RnDaoAnalyzer +from dotenv import load_dotenv +from tc_core_analyzer_lib.assess_engagement import EngagementAssessment +from tc_core_analyzer_lib.utils.activity import DiscordActivity + +from .activity_params import prepare_activity_params + + +def generate_mock_graph(int_matrix, acc_names): + # preparing some parameters + act_param = prepare_activity_params() + + activity_dict = { + "all_joined": {}, + "all_joined_day": {"0": set()}, + "all_consistent": {}, + "all_vital": {}, + "all_active": {}, + "all_connected": {}, + "all_paused": {}, + "all_new_disengaged": {}, + "all_disengaged": {}, + "all_unpaused": {}, + "all_returned": {}, + "all_new_active": {}, + "all_still_active": {}, + "all_dropped": {}, + "all_disengaged_were_newly_active": {}, + "all_disengaged_were_consistently_active": {}, + "all_disengaged_were_vital": {}, + "all_lurker": {}, + "all_about_to_disengage": {}, + "all_disengaged_in_past": {}, + } + + WINDOW_D = 7 + + # window index + w_i = 0 + + act_param = prepare_activity_params() + + assess_engagment = EngagementAssessment( + activities=[ + DiscordActivity.Mention, + DiscordActivity.Reply, + DiscordActivity.Reaction, + ], + activities_ignore_0_axis=[DiscordActivity.Mention], + activities_ignore_1_axis=[], + ) + + (graph, *computed_activities) = assess_engagment.compute( + int_mat=int_matrix, + w_i=w_i, + acc_names=acc_names, + act_param=act_param, + WINDOW_D=WINDOW_D, + **activity_dict, + ) + + return graph + + +def store_mock_data_in_neo4j(graph_dict, guildId): + # CREDS + load_dotenv() + user = os.getenv("MONGODB_USER") + password = os.getenv("MONGODB_PASS") + host = os.getenv("MONGODB_HOST") + port = os.getenv("MONGODB_PORT") + + neo4j_creds = {} + neo4j_creds["db_name"] = os.getenv("NEO4J_DB") + neo4j_creds["protocol"] = os.getenv("NEO4J_PROTOCOL") + neo4j_creds["host"] = os.getenv("NEO4J_HOST") + neo4j_creds["port"] = os.getenv("NEO4J_PORT") + neo4j_creds["password"] = os.getenv("NEO4J_PASSWORD") + neo4j_creds["user"] = os.getenv("NEO4J_USER") + + analyzer = RnDaoAnalyzer() + + analyzer.set_mongo_database_info( + mongo_db_host=host, + mongo_db_password=password, + mongo_db_user=user, + mongo_db_port=port, + ) + analyzer.set_neo4j_database_info(neo4j_creds=neo4j_creds) + analyzer.database_connect() + + guilds_data = {} + + guilds_data[guildId] = { + "heatmaps": None, + "memberactivities": ( + None, + graph_dict, + ), + } + + analyzer.DB_connections.store_analytics_data( + analytics_data=guilds_data, + remove_heatmaps=False, + remove_memberactivities=False, + ) diff --git a/tests/integration/utils/mock_heatmaps.py b/tests/integration/utils/mock_heatmaps.py new file mode 100644 index 0000000..bc421f1 --- /dev/null +++ b/tests/integration/utils/mock_heatmaps.py @@ -0,0 +1,34 @@ +from datetime import datetime, timedelta +from typing import Any + +import numpy as np + + +def create_empty_heatmaps_data( + start_date: datetime, count: int = 10 +) -> list[dict[str, Any]]: + """ + create empty documents of heatmaps + """ + data: list[dict[str, Any]] = [] + for i in range(count): + date = start_date + timedelta(days=i) + document = { + "date": date.strftime("%Y-%m-%d"), + "channelId": "1020707129214111827", + "thr_messages": list(np.zeros(24)), + "lone_messages": list(np.zeros(24)), + "replier": list(np.zeros(24)), + "replied": list(np.zeros(24)), + "mentioner": list(np.zeros(24)), + "mentioned": list(np.zeros(24)), + "reacter": list(np.zeros(24)), + "reacted": list(np.zeros(24)), + "reacted_per_acc": [], + "mentioner_per_acc": [], + "replied_per_acc": [], + "account_name": "973993299281076285", + } + data.append(document) + + return data diff --git a/tests/integration/utils/mock_memberactivities.py b/tests/integration/utils/mock_memberactivities.py new file mode 100644 index 0000000..a1fa351 --- /dev/null +++ b/tests/integration/utils/mock_memberactivities.py @@ -0,0 +1,42 @@ +from datetime import datetime, timedelta +from typing import Any + + +def create_empty_memberactivities_data( + start_date: datetime, count: int = 10 +) -> list[dict[str, Any]]: + """ + create empty documents of memberactivities + """ + data: list[dict[str, Any]] = [] + + for i in range(count): + date = start_date + timedelta(days=i) + date = date.replace(hour=0, minute=0, second=0, microsecond=0) + document = { + "date": date.isoformat(), + "all_joined": [], + "all_joined_day": [], + "all_consistent": [], + "all_vital": [], + "all_active": [], + "all_connected": [], + "all_paused": [], + "all_new_disengaged": [], + "all_disengaged": [], + "all_unpaused": [], + "all_returned": [], + "all_new_active": [], + "all_still_active": [], + "all_dropped": [], + "all_disengaged_were_newly_active": [], + "all_disengaged_were_consistently_active": [], + "all_disengaged_were_vital": [], + "all_lurker": [], + "all_about_to_disengage": [], + "all_disengaged_in_past": [], + } + + data.append(document) + + return data diff --git a/tests/integration/utils/neo4j_conn.py b/tests/integration/utils/neo4j_conn.py new file mode 100644 index 0000000..8a534be --- /dev/null +++ b/tests/integration/utils/neo4j_conn.py @@ -0,0 +1,29 @@ +import os + +from dotenv import load_dotenv +from tc_neo4j_lib.neo4j_ops import Neo4jOps + + +def neo4j_setup() -> Neo4jOps: + load_dotenv() + + protocol = os.getenv("NEO4J_PROTOCOL") + host = os.getenv("NEO4J_HOST") + port = os.getenv("NEO4J_PORT") + db_name = os.getenv("NEO4J_DB") + + user = os.getenv("NEO4J_USER") + password = os.getenv("NEO4J_PASSWORD") + + neo4j_ops = Neo4jOps() + neo4j_ops.set_neo4j_db_info( + neo4j_db_name=db_name, + neo4j_protocol=protocol, + neo4j_user=user, + neo4j_password=password, + neo4j_host=host, + neo4j_port=port, + ) + neo4j_ops.neo4j_database_connect() + + return neo4j_ops diff --git a/tests/integration/utils/remove_and_setup_guild.py b/tests/integration/utils/remove_and_setup_guild.py new file mode 100644 index 0000000..bf4b697 --- /dev/null +++ b/tests/integration/utils/remove_and_setup_guild.py @@ -0,0 +1,76 @@ +from datetime import datetime, timedelta +from typing import Optional + +import numpy as np +from discord_analyzer.DB_operations.mongodb_access import DB_access + + +def setup_db_guild( + db_access: DB_access, + guildId: str = "1234", + discordId_list: list[str] = ["973993299281076285"], + discordId_isbot: list[bool] = [False], + dates: Optional[list[datetime]] = None, + days_ago_period: int = 30, +): + """ + Remove the guild from RnDAO databse and then insert it there + also drop the guildId database and re-create + it then create the guildmembers collection in it + + `discordId_isbot` is representative if each user is bot or not + """ + + db_access.db_mongo_client["RnDAO"]["guilds"].delete_one({"guildId": guildId}) + db_access.db_mongo_client.drop_database(guildId) + + db_access.db_mongo_client["RnDAO"]["guilds"].insert_one( + { + "guildId": guildId, + "user": "876487027099582524", + "name": "Sample Guild", + "connectedAt": (datetime.now() - timedelta(days=10)), + "isInProgress": True, + "isDisconnected": False, + "icon": "afd0d06fd12b2905c53708ca742e6c66", + "window": [7, 1], + "action": [1, 1, 1, 4, 3, 5, 5, 4, 3, 3, 2, 2, 1], + "selectedChannels": [ + { + "channelId": "1020707129214111827", + "channelName": "general", + }, + ], + "period": (datetime.now() - timedelta(days=days_ago_period)), + } + ) + + if dates is None: + dates_using = np.repeat( + datetime.now() - timedelta(days=10), len(discordId_list) + ) + else: + dates_using = dates + + # just to create the data we're inserting one by one + # it's not the most efficient way + + # if the isBot parameters was not set + # set all the users to not to be a bot + if len(discordId_isbot) != len(discordId_list): + user_data = zip(discordId_list, [False] * len(discordId_list)) + else: + user_data = zip(discordId_list, discordId_isbot) + + for idx, (discordId, isbot) in enumerate(user_data): + db_access.db_mongo_client[guildId]["guildmembers"].insert_one( + { + "discordId": discordId, + "username": f"sample_user_{idx}", + "roles": ["1012430565959553145"], + "joinedAt": dates_using[idx], + "avatar": "3ddd6e429f75d6a711d0a58ba3060694", + "isBot": isbot, + "discriminator": "0", + } + ) diff --git a/tests/test_sample.py b/tests/test_sample.py deleted file mode 100644 index 5b4adb4..0000000 --- a/tests/test_sample.py +++ /dev/null @@ -1,2 +0,0 @@ -def test_sample(): - assert 3 != 5 diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/unit/test_analytics_interaction_refine_dictionary.py b/tests/unit/test_analytics_interaction_refine_dictionary.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/unit/test_converting_to_dict.py b/tests/unit/test_converting_to_dict.py new file mode 100644 index 0000000..4fd9a68 --- /dev/null +++ b/tests/unit/test_converting_to_dict.py @@ -0,0 +1,21 @@ +from discord_analyzer.analysis.utils.member_activity_utils import convert_to_dict + + +def test_empty(): + results = convert_to_dict(data=(), dict_keys=[]) + + assert results == {} + + +def test_single_data(): + results = convert_to_dict(data=["value1"], dict_keys=["var1"]) + + assert results == {"var1": "value1"} + + +def test_multiple_data(): + results = convert_to_dict( + data=["value1", "value2", "value3"], dict_keys=["var1", "var2", "var3"] + ) + + assert results == {"var1": "value1", "var2": "value2", "var3": "value3"} diff --git a/tests/unit/test_creds_loading.py b/tests/unit/test_creds_loading.py new file mode 100644 index 0000000..2819fb7 --- /dev/null +++ b/tests/unit/test_creds_loading.py @@ -0,0 +1,121 @@ +from utils.daolytics_uitls import ( + get_mongo_credentials, + get_neo4j_credentials, + get_rabbit_mq_credentials, + get_redis_credentials, + get_saga_db_location, + get_sentryio_service_creds, +) + + +def test_mongo_creds_keys(): + """ + test whether the keys of dictionaries is created or not + """ + mongo_creds = get_mongo_credentials() + + credential_keys = list(mongo_creds.keys()) + + assert "user" in credential_keys + assert "password" in credential_keys + assert "host" in credential_keys + assert "port" in credential_keys + + +def test_mongo_creds_values(): + mongo_creds = get_mongo_credentials() + + assert mongo_creds["user"] is not None + assert mongo_creds["password"] is not None + assert mongo_creds["host"] is not None + assert mongo_creds["port"] is not None + + +def test_rabbit_creds_keys(): + rabbit_creds = get_rabbit_mq_credentials() + + credential_keys = list(rabbit_creds.keys()) + + assert "broker_url" in credential_keys + assert "port" in credential_keys + assert "password" in credential_keys + assert "username" in credential_keys + + +def test_rabbit_creds_values(): + rabbit_creds = get_rabbit_mq_credentials() + + assert rabbit_creds["broker_url"] is not None + assert rabbit_creds["port"] is not None + assert rabbit_creds["password"] is not None + assert rabbit_creds["username"] is not None + + +def test_no4j_creds_keys(): + neo4j_creds = get_neo4j_credentials() + + credential_keys = list(neo4j_creds.keys()) + + assert "user" in credential_keys + assert "password" in credential_keys + assert "db_name" in credential_keys + assert "protocol" in credential_keys + assert "port" in credential_keys + assert "host" in credential_keys + + +def test_neo4j_creds_values(): + neo4j_creds = get_neo4j_credentials() + + assert neo4j_creds["user"] is not None + assert neo4j_creds["password"] is not None + assert neo4j_creds["protocol"] is not None + assert neo4j_creds["port"] is not None + assert neo4j_creds["db_name"] is not None + assert neo4j_creds["host"] is not None + + +def test_redis_creds_keys(): + redis_creds = get_redis_credentials() + + credential_keys = list(redis_creds.keys()) + + assert "pass" in credential_keys + assert "port" in credential_keys + assert "host" in credential_keys + + +def test_redis_creds_values(): + redis_creds = get_redis_credentials() + + assert redis_creds["pass"] is not None + assert redis_creds["port"] is not None + assert redis_creds["host"] is not None + + +def test_saga_location(): + saga_creds = get_saga_db_location() + + assert "db_name" in saga_creds.keys() + assert "collection_name" in saga_creds.keys() + + +def test_saga_location_values(): + saga_creds = get_saga_db_location() + + assert saga_creds["db_name"] is not None + assert saga_creds["collection_name"] is not None + + +def test_sentryio_creds(): + sentry_creds = get_sentryio_service_creds() + + assert "dsn" in sentry_creds + assert "env" in sentry_creds + + +def test_sentryio_creds_values(): + sentry_creds = get_sentryio_service_creds() + + assert sentry_creds["dsn"] is not None + assert sentry_creds["env"] is not None diff --git a/tests/unit/test_example.py b/tests/unit/test_example.py new file mode 100644 index 0000000..2137401 --- /dev/null +++ b/tests/unit/test_example.py @@ -0,0 +1,48 @@ +from discord_analyzer import RnDaoAnalyzer + + +def test_mongo_db_info_set(): + analyzer = RnDaoAnalyzer() + + port = 1234 + host = "http://www.google.com" + password = "sample_passxyz" + user = "sample_user" + + analyzer.set_mongo_database_info( + mongo_db_host=host, + mongo_db_password=password, + mongo_db_user=user, + mongo_db_port=port, + ) + assert analyzer.mongo_host == host + assert analyzer.mongo_pass == password + assert analyzer.mongo_user == user + assert analyzer.mongo_port == port + + +def test_neo4j_db_info_set(): + port = 1234 + db_name = "db" + protocol = "bolt" + user = "user" + host = "localhost" + password = "password" + neo4j_creds = { + "db_name": db_name, + "password": password, + "port": port, + "protocol": protocol, + "host": host, + "user": user, + } + + analyzer = RnDaoAnalyzer() + analyzer.set_neo4j_database_info(neo4j_creds=neo4j_creds) + + assert analyzer.neo4j_port == port + assert analyzer.neo4j_host == host + assert analyzer.neo4j_protocol == protocol + assert analyzer.neo4j_db_name == db_name + assert analyzer.neo4j_password == password + assert analyzer.neo4j_user == user diff --git a/tests/unit/test_filter_channel_name_id.py b/tests/unit/test_filter_channel_name_id.py new file mode 100644 index 0000000..27dbb48 --- /dev/null +++ b/tests/unit/test_filter_channel_name_id.py @@ -0,0 +1,107 @@ +from discord_analyzer.analysis.analytics_interactions_script import ( + filter_channel_name_id, +) + + +def test_filter_channel_name_empty_input(): + sample_input = [] + + output = filter_channel_name_id(sample_input) + + assert output == {} + + +def test_filter_channel_name_one_synthesized_input(): + sample_input = [ + { + "channelId": "123", + "channelName": "welcome-and-rules", + } + ] + + output = filter_channel_name_id(sample_input) + + assert output == {"123": "welcome-and-rules"} + + +def test_filter_channel_name_multiple_synthesized_input(): + sample_input = [ + { + "channelId": "123", + "channelName": "welcome-and-rules", + }, + { + "channelId": "1234", + "channelName": "welcome-and-rules2", + }, + { + "channelId": "12345", + "channelName": "welcome-and-rules3", + }, + ] + + output = filter_channel_name_id(sample_input) + + assert output == { + "123": "welcome-and-rules", + "1234": "welcome-and-rules2", + "12345": "welcome-and-rules3", + } + + +def test_filter_channel_name_one_real_input(): + sample_input = [ + { + "_id": {"$oid": "6436d6ab47ce0ae8b83f25fc"}, + "channelId": "993163081939165236", + "__v": 0, + "channelName": "welcome-and-rules", + "last_update": {"$date": "2023-05-10T01:00:05.379Z"}, + } + ] + + output = filter_channel_name_id(sample_input) + + assert output == {"993163081939165236": "welcome-and-rules"} + + +def test_filter_channel_name_multiple_real_input(): + sample_input = [ + { + "_id": {"$oid": "6436d6ab47ce0ae8b83f25fc"}, + "channelId": "993163081939165236", + "__v": 0, + "channelName": "welcome-and-rules", + "last_update": {"$date": "2023-05-10T01:00:05.379Z"}, + }, + { + "_id": {"$oid": "6436d6ab47ce0ae8b83f2600"}, + "channelId": "993163081939165237", + "__v": 0, + "channelName": "announcements", + "last_update": {"$date": "2023-05-10T01:00:05.382Z"}, + }, + { + "_id": {"$oid": "6436d6ab47ce0ae8b83f260a"}, + "channelId": "993163081939165238", + "__v": 0, + "channelName": "resources", + "last_update": {"$date": "2023-05-10T01:00:05.385Z"}, + }, + { + "_id": {"$oid": "6436d6ab47ce0ae8b83f2613"}, + "channelId": "993163081939165240", + "__v": 0, + "channelName": "general", + "last_update": {"$date": "2023-05-10T01:00:05.407Z"}, + }, + ] + + output = filter_channel_name_id(sample_input) + + assert output == { + "993163081939165236": "welcome-and-rules", + "993163081939165237": "announcements", + "993163081939165238": "resources", + "993163081939165240": "general", + } diff --git a/tests/unit/test_filter_channel_thread.py b/tests/unit/test_filter_channel_thread.py new file mode 100644 index 0000000..20993c9 --- /dev/null +++ b/tests/unit/test_filter_channel_thread.py @@ -0,0 +1,160 @@ +from discord_analyzer.analysis.analytics_interactions_script import ( + filter_channel_thread, +) + + +def test_filter_channel_thread_single_empty_input(): + sample_input = [] + + output = filter_channel_thread(sample_input) + + assert output == {} + + +def test_filter_channel_thread_multiple_empty_inputs(): + sample_input = [] + + output = filter_channel_thread( + sample_input, + ) + + assert output == {} + + +def test_filter_channel_thread_single_channel_single_message(): + sample_input = [ + { + "author": "ahmadyazdanii#7517", + "content": "test", + "createdDate": "2023-04-19 07:05:17", + "channelId": "993163081939165240", + "channelName": "off-topic", + "threadId": None, + "threadName": None, + } + ] + + output = filter_channel_thread( + sample_input, + ) + + sample_output = {"off-topic": {None: {"1:ahmadyazdanii#7517": "test"}}} + + assert output == sample_output + + +# flake8: noqa +def test_filter_channel_thread_multiple_channel_multiple_message_single_user_all_channels(): + sample_input = [ + { + "author": "ahmadyazdanii#7517", + "content": "test", + "createdDate": "2023-04-19 07:05:17", + "channelId": "993163081939165240", + "channelName": "off-topic", + "threadId": None, + "threadName": None, + }, + { + "author": "ahmadyazdanii#7517", + "content": "hi", + "createdDate": "2023-04-19 07:05:18", + "channelId": "993163081939165240", + "channelName": "off-topic", + "threadId": "1098202658390691930", + "threadName": "test", + }, + { + "author": "ahmadyazdanii#7517", + "content": "test2", + "createdDate": "2023-04-19 07:14:57", + "channelId": "993163081939165240", + "channelName": "off-topic", + "threadId": "1098202658390691930", + "threadName": "test", + }, + ] + + output = filter_channel_thread( + sample_input, + ) + + sample_output = { + "off-topic": { + None: {"1:ahmadyazdanii#7517": "test"}, + "test": { + "1:ahmadyazdanii#7517": "hi", + "2:ahmadyazdanii#7517": "test2", + }, + } + } + + assert output == sample_output + + +def test_filter_channel_thread_single_channel_multiple_message_multiple_user_all_channels(): # flake8: noqa + sample_input = [ + { + "author": "ahmadyazdanii#7517", + "content": "test", + "createdDate": "2023-03-10 07:05:17", + "channelId": "993163081939165240", + "channelName": "off-topic", + "threadId": None, + "threadName": None, + }, + { + "author": "Ene", + "content": "Hello", + "createdDate": "2023-03-11 07:05:17", + "channelId": "993163081939165240", + "channelName": "off-topic", + "threadId": "1098202658390691930", + "threadName": "test-thread", + }, + { + "author": "Amin", + "content": "Hi", + "createdDate": "2023-03-12 07:05:18", + "channelId": "993163081939165240", + "channelName": "off-topic", + "threadId": "1098202658390691930", + "threadName": "test-thread", + }, + { + "author": "Behzad", + "content": "Ola!", + "createdDate": "2023-04-07 07:14:57", + "channelId": "993163081939165240", + "channelName": "off-topic", + "threadId": "1098202658390691930", + "threadName": "test-thread", + }, + { + "author": "Nima", + "content": "Salam!", + "createdDate": "2023-04-12 07:14:57", + "channelId": "993163081939165222", + "channelName": "off-topic-2", + "threadId": "1098202658390691931", + "threadName": "test-thread2", + }, + ] + + output = filter_channel_thread( + sample_input, + ) + + sample_output = { + "off-topic": { + None: {"1:ahmadyazdanii#7517": "test"}, + "test-thread": { + "1:Ene": "Hello", + "2:Amin": "Hi", + "3:Behzad": "Ola!", + }, + }, + "off-topic-2": {"test-thread2": {"1:Nima": "Salam!"}}, + } + + assert output == sample_output diff --git a/tests/unit/test_generate_interaction_mtx.py b/tests/unit/test_generate_interaction_mtx.py new file mode 100644 index 0000000..bb663d9 --- /dev/null +++ b/tests/unit/test_generate_interaction_mtx.py @@ -0,0 +1,140 @@ +from discord_analyzer.analysis.utils.activity import Activity +from discord_analyzer.analysis.utils.compute_interaction_mtx_utils import ( + generate_interaction_matrix, +) + + +def test_empty_inputs(): + per_acc_interactions = {} + int_mtx = generate_interaction_matrix( + per_acc_interactions, + acc_names=[], + activities=[Activity.Mention, Activity.Reply, Activity.Reaction], + ) + assert int_mtx.shape == (0, 0) + + +def test_single_account(): + per_acc_interactions = { + "968122690118512720": [ + { + "account_name": "968122690118512720", + "reacted_per_acc": [[{"account": "968122690118512720", "count": 1}]], + "mentioner_per_acc": [[{"account": "968122690118512720", "count": 1}]], + "replied_per_acc": [], + }, + { + "account_name": "968122690118512720", + "reacted_per_acc": [[{"account": "968122690118512720", "count": 1}]], + "mentioner_per_acc": [[{"account": "968122690118512720", "count": 1}]], + "replied_per_acc": [], + }, + ] + } + int_mtx = generate_interaction_matrix( + per_acc_interactions, + acc_names=["968122690118512720"], + activities=[Activity.Mention, Activity.Reply, Activity.Reaction], + ) + + # converting `numpy.bool_` to python `bool` + is_match = bool((int_mtx == [[4]]).all()) + assert is_match is True + + +def test_two_accounts(): + acc_names = ["968122690118512720", "968122690118512799"] + per_acc_interactions = { + "968122690118512720": [ + { + "account_name": "968122690118512720", + "reacted_per_acc": [[{"account": "968122690118512799", "count": 1}]], + "mentioner_per_acc": [[{"account": "968122690118512799", "count": 1}]], + "replied_per_acc": [], + }, + { + "account_name": "968122690118512720", + "reacted_per_acc": [[{"account": "968122690118512720", "count": 2}]], + "mentioner_per_acc": [[{"account": "968122690118512720", "count": 1}]], + "replied_per_acc": [], + }, + ] + } + + int_mtx = generate_interaction_matrix( + per_acc_interactions, + acc_names=acc_names, + activities=[Activity.Mention, Activity.Reply, Activity.Reaction], + ) + + # converting `numpy.bool_` to python `bool` + is_match = bool((int_mtx == [[3, 2], [0, 0]]).all()) + assert is_match is True + + +def test_multiple_interactions(): + acc_names = [ + "968122690118512720", + "795295822534148096", + "968122690118512721", + "7952958225341480444", + "7952958225341480433", + ] + per_acc_interactions = { + "968122690118512720": [ + { + "account_name": "968122690118512720", + "reacted_per_acc": [[{"account": "795295822534148096", "count": 9}]], + "mentioner_per_acc": [[{"account": "795295822534148096", "count": 2}]], + "replied_per_acc": [], + }, + { + "account_name": "968122690118512720", + "reacted_per_acc": [], + "mentioner_per_acc": [], + "replied_per_acc": [], + }, + { + "account_name": "968122690118512720", + "reacted_per_acc": [], + "mentioner_per_acc": [ + [{"account": "7952958225341480444", "count": 5}], + [{"account": "7952958225341480433", "count": 2}], + ], + "replied_per_acc": [], + }, + ], + "968122690118512721": [ + { + "account_name": "968122690118512721", + "reacted_per_acc": [[{"account": "795295822534148096", "count": 3}]], + "mentioner_per_acc": [[{"account": "795295822534148096", "count": 4}]], + "replied_per_acc": [], + }, + { + "account_name": "968122690118512721", + "reacted_per_acc": [], + "mentioner_per_acc": [], + "replied_per_acc": [[{"account": "7952958225341480444", "count": 8}]], + }, + ], + } + + int_mtx = generate_interaction_matrix( + per_acc_interactions, + acc_names=acc_names, + activities=[Activity.Mention, Activity.Reply, Activity.Reaction], + ) + + assert int_mtx.shape == (5, 5) + is_match = ( + int_mtx + == [ + [0.0, 11.0, 0.0, 5.0, 2.0], + [0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 7.0, 0.0, 8.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0], + ] + ).all() + assert bool(is_match) is True diff --git a/tests/unit/test_generate_interaction_mtx_mention.py b/tests/unit/test_generate_interaction_mtx_mention.py new file mode 100644 index 0000000..fd18db1 --- /dev/null +++ b/tests/unit/test_generate_interaction_mtx_mention.py @@ -0,0 +1,127 @@ +from discord_analyzer.analysis.utils.activity import Activity +from discord_analyzer.analysis.utils.compute_interaction_mtx_utils import ( + generate_interaction_matrix, +) + + +def test_single_account_mention(): + per_acc_interactions = { + "968122690118512720": [ + { + "account_name": "968122690118512720", + "reacted_per_acc": [[{"account": "968122690118512720", "count": 1}]], + "mentioner_per_acc": [[{"account": "968122690118512720", "count": 1}]], + "replied_per_acc": [], + }, + { + "account_name": "968122690118512720", + "reacted_per_acc": [[{"account": "968122690118512720", "count": 1}]], + "mentioner_per_acc": [[{"account": "968122690118512720", "count": 1}]], + "replied_per_acc": [], + }, + ] + } + int_mtx = generate_interaction_matrix( + per_acc_interactions, + acc_names=["968122690118512720"], + activities=[Activity.Mention], + ) + + # converting `numpy.bool_` to python `bool` + is_match = bool((int_mtx == [[2]]).all()) + assert is_match is True + + +def test_two_accounts_mention(): + acc_names = ["968122690118512720", "968122690118512799"] + per_acc_interactions = { + "968122690118512720": [ + { + "account_name": "968122690118512720", + "reacted_per_acc": [[{"account": "968122690118512799", "count": 1}]], + "mentioner_per_acc": [[{"account": "968122690118512799", "count": 3}]], + "replied_per_acc": [], + }, + { + "account_name": "968122690118512720", + "reacted_per_acc": [[{"account": "968122690118512720", "count": 2}]], + "mentioner_per_acc": [[{"account": "968122690118512720", "count": 1}]], + "replied_per_acc": [], + }, + ] + } + + int_mtx = generate_interaction_matrix( + per_acc_interactions, acc_names=acc_names, activities=[Activity.Mention] + ) + # converting `numpy.bool_` to python `bool` + is_match = bool((int_mtx == [[1, 3], [0, 0]]).all()) + assert is_match is True + + +def test_multiple_interactions_mention(): + acc_names = [ + "968122690118512720", + "795295822534148096", + "968122690118512721", + "7952958225341480444", + "7952958225341480433", + ] + per_acc_interactions = { + "968122690118512720": [ + { + "account_name": "968122690118512720", + "reacted_per_acc": [[{"account": "795295822534148096", "count": 9}]], + "mentioner_per_acc": [[{"account": "795295822534148096", "count": 2}]], + "replied_per_acc": [], + }, + { + "account_name": "968122690118512720", + "reacted_per_acc": [], + "mentioner_per_acc": [], + "replied_per_acc": [], + }, + { + "account_name": "968122690118512720", + "reacted_per_acc": [ + [{"account": "7952958225341480444", "count": 7}], + [{"account": "7952958225341480433", "count": 1}], + ], + "mentioner_per_acc": [ + [{"account": "7952958225341480444", "count": 5}], + [{"account": "7952958225341480433", "count": 2}], + ], + "replied_per_acc": [], + }, + ], + "968122690118512721": [ + { + "account_name": "968122690118512721", + "reacted_per_acc": [[{"account": "795295822534148096", "count": 3}]], + "mentioner_per_acc": [[{"account": "795295822534148096", "count": 4}]], + "replied_per_acc": [], + }, + { + "account_name": "968122690118512721", + "reacted_per_acc": [], + "mentioner_per_acc": [], + "replied_per_acc": [[{"account": "7952958225341480444", "count": 8}]], + }, + ], + } + + int_mtx = generate_interaction_matrix( + per_acc_interactions, acc_names=acc_names, activities=[Activity.Mention] + ) + assert int_mtx.shape == (5, 5) + is_match = ( + int_mtx + == [ + [0.0, 2.0, 0.0, 5.0, 2.0], + [0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 4.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0], + ] + ).all() + assert bool(is_match) is True diff --git a/tests/unit/test_generate_interaction_mtx_reaction.py b/tests/unit/test_generate_interaction_mtx_reaction.py new file mode 100644 index 0000000..49b6c36 --- /dev/null +++ b/tests/unit/test_generate_interaction_mtx_reaction.py @@ -0,0 +1,128 @@ +from discord_analyzer.analysis.utils.activity import Activity +from discord_analyzer.analysis.utils.compute_interaction_mtx_utils import ( + generate_interaction_matrix, +) + + +def test_single_account_reaction(): + per_acc_interactions = { + "968122690118512720": [ + { + "account_name": "968122690118512720", + "reacted_per_acc": [[{"account": "968122690118512720", "count": 1}]], + "mentioner_per_acc": [[{"account": "968122690118512720", "count": 2}]], + "replied_per_acc": [], + }, + { + "account_name": "968122690118512720", + "reacted_per_acc": [[{"account": "968122690118512720", "count": 7}]], + "mentioner_per_acc": [[{"account": "968122690118512720", "count": 4}]], + "replied_per_acc": [[{"account": "968122690118512720", "count": 3}]], + }, + ] + } + int_mtx = generate_interaction_matrix( + per_acc_interactions, + acc_names=["968122690118512720"], + activities=[Activity.Reaction], + ) + + # converting `numpy.bool_` to python `bool` + is_match = bool((int_mtx == [[8]]).all()) + assert is_match is True + + +def test_two_accounts_reaction(): + acc_names = ["968122690118512720", "968122690118512799"] + per_acc_interactions = { + "968122690118512720": [ + { + "account_name": "968122690118512720", + "reacted_per_acc": [[{"account": "968122690118512799", "count": 1}]], + "mentioner_per_acc": [[{"account": "968122690118512799", "count": 3}]], + "replied_per_acc": [], + }, + { + "account_name": "968122690118512720", + "reacted_per_acc": [[{"account": "968122690118512720", "count": 2}]], + "mentioner_per_acc": [[{"account": "968122690118512720", "count": 1}]], + "replied_per_acc": [], + }, + ] + } + + int_mtx = generate_interaction_matrix( + per_acc_interactions, acc_names=acc_names, activities=[Activity.Reaction] + ) + # converting `numpy.bool_` to python `bool` + is_match = bool((int_mtx == [[2, 1], [0, 0]]).all()) + assert is_match is True + + +def test_multiple_interactions_reaction(): + acc_names = [ + "968122690118512720", + "795295822534148096", + "968122690118512721", + "7952958225341480444", + "7952958225341480433", + ] + per_acc_interactions = { + "968122690118512720": [ + { + "account_name": "968122690118512720", + "reacted_per_acc": [[{"account": "795295822534148096", "count": 9}]], + "mentioner_per_acc": [[{"account": "795295822534148096", "count": 2}]], + "replied_per_acc": [], + }, + { + "account_name": "968122690118512720", + "reacted_per_acc": [], + "mentioner_per_acc": [], + "replied_per_acc": [], + }, + { + "account_name": "968122690118512720", + "reacted_per_acc": [ + [{"account": "7952958225341480444", "count": 7}], + [{"account": "7952958225341480433", "count": 1}], + ], + "mentioner_per_acc": [ + [{"account": "7952958225341480444", "count": 5}], + [{"account": "7952958225341480433", "count": 2}], + ], + "replied_per_acc": [], + }, + ], + "968122690118512721": [ + { + "account_name": "968122690118512721", + "reacted_per_acc": [[{"account": "795295822534148096", "count": 3}]], + "mentioner_per_acc": [[{"account": "795295822534148096", "count": 4}]], + "replied_per_acc": [], + }, + { + "account_name": "968122690118512721", + "reacted_per_acc": [], + "mentioner_per_acc": [], + "replied_per_acc": [[{"account": "7952958225341480444", "count": 8}]], + }, + ], + } + + int_mtx = generate_interaction_matrix( + per_acc_interactions, acc_names=acc_names, activities=[Activity.Reaction] + ) + print(int_mtx) + assert int_mtx.shape == (5, 5) + is_match = ( + int_mtx + == [ + [0.0, 9.0, 0.0, 7.0, 1.0], + [0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 3.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0], + ] + ).all() + assert bool(is_match) is True diff --git a/tests/unit/test_generate_interaction_mtx_reply.py b/tests/unit/test_generate_interaction_mtx_reply.py new file mode 100644 index 0000000..e82ff8e --- /dev/null +++ b/tests/unit/test_generate_interaction_mtx_reply.py @@ -0,0 +1,131 @@ +from discord_analyzer.analysis.utils.activity import Activity +from discord_analyzer.analysis.utils.compute_interaction_mtx_utils import ( + generate_interaction_matrix, +) + + +def test_single_account_reply(): + per_acc_interactions = { + "968122690118512720": [ + { + "account_name": "968122690118512720", + "reacted_per_acc": [[{"account": "968122690118512720", "count": 1}]], + "mentioner_per_acc": [[{"account": "968122690118512720", "count": 2}]], + "replied_per_acc": [], + }, + { + "account_name": "968122690118512720", + "reacted_per_acc": [[{"account": "968122690118512720", "count": 7}]], + "mentioner_per_acc": [[{"account": "968122690118512720", "count": 4}]], + "replied_per_acc": [[{"account": "968122690118512720", "count": 3}]], + }, + ] + } + int_mtx = generate_interaction_matrix( + per_acc_interactions, + acc_names=["968122690118512720"], + activities=[Activity.Reply], + ) + + # converting `numpy.bool_` to python `bool` + is_match = bool((int_mtx == [[3]]).all()) + assert is_match is True + + +def test_two_accounts_reply(): + acc_names = ["968122690118512720", "968122690118512799"] + per_acc_interactions = { + "968122690118512720": [ + { + "account_name": "968122690118512720", + "reacted_per_acc": [[{"account": "968122690118512799", "count": 1}]], + "mentioner_per_acc": [[{"account": "968122690118512799", "count": 3}]], + "replied_per_acc": [], + }, + { + "account_name": "968122690118512720", + "reacted_per_acc": [[{"account": "968122690118512720", "count": 2}]], + "mentioner_per_acc": [[{"account": "968122690118512720", "count": 1}]], + "replied_per_acc": [[{"account": "968122690118512799", "count": 7}]], + }, + ] + } + + int_mtx = generate_interaction_matrix( + per_acc_interactions, acc_names=acc_names, activities=[Activity.Reply] + ) + # converting `numpy.bool_` to python `bool` + is_match = bool((int_mtx == [[0, 7], [0, 0]]).all()) + assert is_match is True + + +def test_multiple_interactions_reply(): + acc_names = [ + "968122690118512720", + "795295822534148096", + "968122690118512721", + "7952958225341480444", + "7952958225341480433", + ] + per_acc_interactions = { + "968122690118512720": [ + { + "account_name": "968122690118512720", + "reacted_per_acc": [[{"account": "795295822534148096", "count": 9}]], + "mentioner_per_acc": [[{"account": "795295822534148096", "count": 2}]], + "replied_per_acc": [], + }, + { + "account_name": "968122690118512720", + "reacted_per_acc": [], + "mentioner_per_acc": [], + "replied_per_acc": [[{"account": "7952958225341480444", "count": 7}]], + }, + { + "account_name": "968122690118512720", + "reacted_per_acc": [ + [{"account": "7952958225341480444", "count": 7}], + [{"account": "7952958225341480433", "count": 1}], + ], + "mentioner_per_acc": [ + [{"account": "7952958225341480444", "count": 5}], + [{"account": "7952958225341480433", "count": 2}], + ], + "replied_per_acc": [ + [{"account": "7952958225341480444", "count": 1}], + [{"account": "7952958225341480433", "count": 1}], + ], + }, + ], + "968122690118512721": [ + { + "account_name": "968122690118512721", + "reacted_per_acc": [[{"account": "795295822534148096", "count": 3}]], + "mentioner_per_acc": [[{"account": "795295822534148096", "count": 4}]], + "replied_per_acc": [], + }, + { + "account_name": "968122690118512721", + "reacted_per_acc": [], + "mentioner_per_acc": [], + "replied_per_acc": [[{"account": "7952958225341480444", "count": 8}]], + }, + ], + } + + int_mtx = generate_interaction_matrix( + per_acc_interactions, acc_names=acc_names, activities=[Activity.Reply] + ) + print(int_mtx) + assert int_mtx.shape == (5, 5) + is_match = ( + int_mtx + == [ + [0.0, 0.0, 0.0, 8.0, 1.0], + [0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 8.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0], + ] + ).all() + assert bool(is_match) is True diff --git a/tests/unit/test_member_activity_utils.py b/tests/unit/test_member_activity_utils.py new file mode 100644 index 0000000..3ce2fe4 --- /dev/null +++ b/tests/unit/test_member_activity_utils.py @@ -0,0 +1,127 @@ +from datetime import datetime, timedelta + +from discord_analyzer.analysis.utils.member_activity_history_utils import ( + MemberActivityPastUtils, +) + + +def test_zero_joined(): + db_access = None + + start_dt = datetime(2022, 1, 1) + end_dt = datetime(2023, 4, 15) + + all_joined_day = {} + joined_acc = [ + {"joinedAt": (start_dt + timedelta(days=5)), "discordId": "000000000"}, + {"joinedAt": (start_dt + timedelta(days=6)), "discordId": "000000001"}, + {"joinedAt": (start_dt + timedelta(days=8)), "discordId": "000000002"}, + ] + + member_activitiy_utils = MemberActivityPastUtils(db_access=db_access) + starting_key = 0 + + all_joined_day = member_activitiy_utils.update_all_joined_day( + start_dt=start_dt, + end_dt=end_dt, + all_joined_day=all_joined_day, + starting_key=starting_key, + joined_acc=joined_acc, + ) + + assert all_joined_day["0"] == set([]) + assert all_joined_day["1"] == set([]) + assert all_joined_day["2"] == set([]) + assert all_joined_day["3"] == set([]) + assert all_joined_day["4"] == set([]) + assert all_joined_day["5"] == set(["000000000"]) + assert all_joined_day["6"] == set(["000000001"]) + assert all_joined_day["7"] == set([]) + assert all_joined_day["8"] == set(["000000002"]) + for i in range(9, (end_dt - start_dt).days): + assert all_joined_day[str(i)] == set([]) + + # len would show 1 more + assert len(all_joined_day.keys()) - 1 == (end_dt - start_dt).days + starting_key + + +def test_single_joined(): + db_access = None + + start_dt = datetime(2022, 1, 1) + end_dt = datetime(2023, 4, 15) + + all_joined_day = { + "0": set(["000000000", "000000001"]), + } + + joined_acc = [ + {"joinedAt": (start_dt + timedelta(days=0)), "discordId": "000000002"}, + {"joinedAt": (start_dt + timedelta(days=1)), "discordId": "000000003"}, + {"joinedAt": (start_dt + timedelta(days=2)), "discordId": "000000004"}, + ] + + member_activitiy_utils = MemberActivityPastUtils(db_access=db_access) + starting_key = 1 + + all_joined_day = member_activitiy_utils.update_all_joined_day( + start_dt=start_dt, + end_dt=end_dt, + all_joined_day=all_joined_day, + starting_key=starting_key, + joined_acc=joined_acc, + ) + + assert all_joined_day["0"] == set(["000000000", "000000001"]) + assert all_joined_day["1"] == set(["000000002"]) + assert all_joined_day["2"] == set(["000000003"]) + assert all_joined_day["3"] == set(["000000004"]) + for i in range(4, (end_dt - start_dt).days): + assert all_joined_day[str(i)] == set([]) + + # len would show 1 more + assert len(all_joined_day.keys()) - 1 == (end_dt - start_dt).days + starting_key + + +def test_multiple_joined(): + """Test multiple accounts joined in a day""" + db_access = None + + start_dt = datetime(2022, 1, 1) + end_dt = datetime(2023, 4, 15) + + all_joined_day = { + "0": set(["000000000", "000000001"]), + "1": set(["000000002", "000000003"]), + } + + joined_acc = [ + {"joinedAt": (start_dt + timedelta(days=0)), "discordId": "000000004"}, + {"joinedAt": (start_dt + timedelta(days=0)), "discordId": "000000005"}, + {"joinedAt": (start_dt + timedelta(days=2)), "discordId": "000000006"}, + {"joinedAt": (start_dt + timedelta(days=2)), "discordId": "000000007"}, + {"joinedAt": (start_dt + timedelta(days=2)), "discordId": "000000008"}, + ] + + member_activitiy_utils = MemberActivityPastUtils(db_access=db_access) + starting_key = 2 + + all_joined_day = member_activitiy_utils.update_all_joined_day( + start_dt=start_dt, + end_dt=end_dt, + all_joined_day=all_joined_day, + starting_key=starting_key, + joined_acc=joined_acc, + ) + + assert all_joined_day["0"] == set(["000000000", "000000001"]) + assert all_joined_day["1"] == set(["000000002", "000000003"]) + assert all_joined_day["2"] == set(["000000004", "000000005"]) + assert all_joined_day["3"] == set([]) + assert all_joined_day["4"] == set(["000000006", "000000007", "000000008"]) + + for i in range(5, (end_dt - start_dt).days): + assert all_joined_day[str(i)] == set([]) + + # len would show 1 more + assert len(all_joined_day.keys()) - 1 == (end_dt - start_dt).days + starting_key diff --git a/tests/unit/test_parse_raction.py b/tests/unit/test_parse_raction.py new file mode 100644 index 0000000..8f65ec4 --- /dev/null +++ b/tests/unit/test_parse_raction.py @@ -0,0 +1,66 @@ +from discord_analyzer.analysis.activity_hourly import parse_reaction + + +def test_parse_raction_no_input(): + sample_input = [] + output = parse_reaction(sample_input) + + assert output == [] + + +def test_parse_reaction_partial_single_input(): + sample_input = ["user1,"] + + output = parse_reaction(sample_input) + + assert output == [["user1", ""]] + + +def test_parese_reaction_multiple_input_with_empty_reactions(): + sample_input = ["item1,item2|item3,,item4|item5,item6,item7|,"] + + output = parse_reaction(sample_input) + + assert output == [ + ["item1", "item2|item3", "", "item4|item5", "item6", "item7|", ""] + ] + + +def test_parese_reaction_multiple_input_with_space_reactions(): + sample_input = ["item1,item2|item3, ,item4|item5,item6,item7|, "] + + output = parse_reaction(sample_input) + + assert output == [ + ["item1", "item2|item3", " ", "item4|item5", "item6", "item7|", " "] + ] + + +def test_parse_raction_single_input(): + sample_input = ["emoji1"] + + output = parse_reaction(sample_input) + + assert len(output) == 1 + assert len(output[0]) == 1 + assert output == [["emoji1"]] + + +def test_parse_raction_multiple_input_with_singleComma(): + sample_input = ["mehrdad_mms#8600,😁", "mehrdad_mms#8600,🙌", "mehrdad_mms#8600,🤌"] + output = parse_reaction(sample_input) + + assert len(output) == 3 + assert output[0] == ["mehrdad_mms#8600", "😁"] + assert output[1] == ["mehrdad_mms#8600", "🙌"] + assert output[2] == ["mehrdad_mms#8600", "🤌"] + + +def test_parse_raction_multiple_input_with_multipleComma(): + sample_input = ["sepehr#3795,thegadget.eth#3374,👍", "sepehr#3795,❤️"] + + output = parse_reaction(sample_input) + + assert len(output) == 2 + assert output[0] == ["sepehr#3795", "thegadget.eth#3374", "👍"] + assert output[1] == ["sepehr#3795", "❤️"] diff --git a/tests/unit/test_per_account_interaction.py b/tests/unit/test_per_account_interaction.py new file mode 100644 index 0000000..b44b581 --- /dev/null +++ b/tests/unit/test_per_account_interaction.py @@ -0,0 +1,187 @@ +from discord_analyzer.analysis.analytics_interactions_script import ( + per_account_interactions, +) + + +def test_per_account_interaction_no_inputs(): + sample_input = [] + + results = per_account_interactions(sample_input) + + assert results["mentioner_accounts"] == {} + assert results["reacter_accounts"] == {} + assert results["replier_accounts"] == {} + assert results["all_interaction_accounts"] == {} + + +def test_per_account_interaction_empty_inputs(): + sample_input = [ + { + "account_name": "acc1", + "channelId": "1234", + "mentioner_accounts": [], + "reacter_accounts": [], + "replier_accounts": [], + }, + { + "account_name": "acc2", + "channelId": "321", + "mentioner_accounts": [], + "reacter_accounts": [], + "replier_accounts": [], + }, + { + "account_name": "acc2", + "channelId": "555", + "mentioner_accounts": [], + "reacter_accounts": [], + "replier_accounts": [], + }, + ] + + results = per_account_interactions(sample_input) + + assert results["mentioner_accounts"] == {} + assert results["reacter_accounts"] == {} + assert results["replier_accounts"] == {} + assert results["all_interaction_accounts"] == {} + + +def test_per_account_interaction_accounts(): + sample_input = [ + { + "account_name": "acc1", + "channelId": "1234", + "mentioner_accounts": [[{"account": "Ene SS Rawa#0855", "count": 1}]], + "reacter_accounts": [[{"account": "ahmadyazdanii#7517", "count": 1}]], + "replier_accounts": [[{"account": "ahmadyazdanii#7517", "count": 5}]], + }, + { + "account_name": "acc1", + "channelId": "1234", + "mentioner_accounts": [[{"account": "Ene SS Rawa#0855", "count": 1}]], + "reacter_accounts": [[{"account": "Mehrdad", "count": 1}]], + "replier_accounts": [[{"account": "ahmadyazdanii#7517", "count": 5}]], + }, + { + "account_name": "acc1", + "channelId": "1234", + "mentioner_accounts": [[{"account": "Ene SS Rawa#0855", "count": 10}]], + "reacter_accounts": [[{"account": "ahmadyazdanii#7517", "count": 2}]], + "replier_accounts": [[{"account": "ahmadyazdanii#7517", "count": 5}]], + }, + { + "account_name": "acc1", + "channelId": "546", + "mentioner_accounts": [[{"account": "mramin22#1669", "count": 10}]], + "reacter_accounts": [[{"account": "ahmadyazdanii#7517", "count": 2}]], + "replier_accounts": [[{"account": "ahmadyazdanii#7517", "count": 5}]], + }, + { + "account_name": "acc1", + "channelId": "000", + "mentioner_accounts": [[{"account": "mramin22#1669", "count": 10}]], + "reacter_accounts": [[{"account": "Behzad", "count": 6}]], + "replier_accounts": [[{"account": "Behzad", "count": 7}]], + }, + ] + + # the accounts used above + mentioner_accounts_names = ["mramin22#1669", "Ene SS Rawa#0855"] + reacter_accounts_names = ["ahmadyazdanii#7517", "Mehrdad", "Behzad"] + replier_accounts_names = ["Behzad", "ahmadyazdanii#7517"] + + results = per_account_interactions(sample_input) + + # the whole results assersion + assert list(results.keys()) == [ + "replier_accounts", + "reacter_accounts", + "mentioner_accounts", + "all_interaction_accounts", + ] + + # mentioner_accounts assersions + action_type = "mentioner_accounts" + assert len(results[action_type].values()) == len(mentioner_accounts_names) + assert results[action_type]["0"]["account"] in mentioner_accounts_names + assert results[action_type]["1"]["account"] in mentioner_accounts_names + + # reacter_accounts assersions + action_type = "reacter_accounts" + assert len(results[action_type].values()) == len(reacter_accounts_names) + assert results[action_type]["0"]["account"] in reacter_accounts_names + assert results[action_type]["1"]["account"] in reacter_accounts_names + assert results[action_type]["2"]["account"] in reacter_accounts_names + + # replier_accounts assersions + action_type = "replier_accounts" + assert len(results[action_type].values()) == len(replier_accounts_names) + assert results[action_type]["0"]["account"] in replier_accounts_names + assert results[action_type]["1"]["account"] in replier_accounts_names + + +def test_per_account_interaction_numbers(): + sample_input = [ + { + "account_name": "acc1", + "channelId": "1234", + "mentioner_accounts": [[{"account": "Ene SS Rawa#0855", "count": 1}]], + "reacter_accounts": [[{"account": "ahmadyazdanii#7517", "count": 1}]], + "replier_accounts": [[{"account": "ahmadyazdanii#7517", "count": 5}]], + }, + { + "account_name": "acc1", + "channelId": "1234", + "mentioner_accounts": [[{"account": "Ene SS Rawa#0855", "count": 1}]], + "reacter_accounts": [[{"account": "Mehrdad", "count": 1}]], + "replier_accounts": [[{"account": "ahmadyazdanii#7517", "count": 5}]], + }, + { + "account_name": "acc1", + "channelId": "1234", + "mentioner_accounts": [[{"account": "Ene SS Rawa#0855", "count": 10}]], + "reacter_accounts": [[{"account": "ahmadyazdanii#7517", "count": 2}]], + "replier_accounts": [[{"account": "ahmadyazdanii#7517", "count": 5}]], + }, + { + "account_name": "acc1", + "channelId": "546", + "mentioner_accounts": [[{"account": "mramin22#1669", "count": 10}]], + "reacter_accounts": [[{"account": "ahmadyazdanii#7517", "count": 2}]], + "replier_accounts": [[{"account": "ahmadyazdanii#7517", "count": 5}]], + }, + { + "account_name": "acc1", + "channelId": "000", + "mentioner_accounts": [[{"account": "mramin22#1669", "count": 10}]], + "reacter_accounts": [[{"account": "Behzad", "count": 6}]], + "replier_accounts": [[{"account": "Behzad", "count": 7}]], + }, + ] + + account_sum_interaction = { + "Ene SS Rawa#0855": 12, + "ahmadyazdanii#7517": 25, + "Mehrdad": 1, + "mramin22#1669": 20, + "Behzad": 13, + } + + results = per_account_interactions(sample_input) + + # 5 users we had + assert len(results["all_interaction_accounts"].values()) == 5 + + # check each user interaction + for i in range(5): + account_res = list(results["all_interaction_accounts"].values()) + + acc_name = account_res[i]["account"] + acc_interaction_count = account_res[i]["count"] + assert acc_name in account_sum_interaction.keys() + assert account_sum_interaction[acc_name] == acc_interaction_count + + +if __name__ == "__main__": + test_per_account_interaction_accounts() diff --git a/tests/unit/test_prepare_results_per_acc.py b/tests/unit/test_prepare_results_per_acc.py new file mode 100644 index 0000000..c845828 --- /dev/null +++ b/tests/unit/test_prepare_results_per_acc.py @@ -0,0 +1,151 @@ +from discord_analyzer.analysis.utils.compute_interaction_mtx_utils import ( + prepare_per_account, +) + + +def test_empty_db_results(): + db_results_sample = [] + + results = prepare_per_account(db_results=db_results_sample) + + assert results == {} + + +def test_single_document_db_results(): + db_results_sample = [ + { + "account_name": "968122690118512720", + "reacted_per_acc": [[{"account": "795295822534148096", "count": 1}]], + "mentioner_per_acc": [[{"account": "795295822534148096", "count": 1}]], + "replied_per_acc": [], + } + ] + + results = prepare_per_account(db_results=db_results_sample) + + assert list(results.keys()) == ["968122690118512720"] + assert results["968122690118512720"] == db_results_sample + + +def test_multiple_document_single_acc_db_results(): + db_results_sample = [ + { + "account_name": "968122690118512720", + "reacted_per_acc": [[{"account": "795295822534148096", "count": 1}]], + "mentioner_per_acc": [[{"account": "795295822534148096", "count": 1}]], + "replied_per_acc": [], + }, + { + "account_name": "968122690118512720", + "reacted_per_acc": [[{"account": "795295822534148096", "count": 1}]], + "mentioner_per_acc": [[{"account": "795295822534148096", "count": 1}]], + "replied_per_acc": [], + }, + ] + + results = prepare_per_account(db_results=db_results_sample) + + assert list(results.keys()) == ["968122690118512720"] + assert results["968122690118512720"] == db_results_sample + + +def test_single_document_multiple_acc_db_results(): + db_results_sample = [ + { + "account_name": "968122690118512720", + "reacted_per_acc": [[{"account": "795295822534148096", "count": 1}]], + "mentioner_per_acc": [[{"account": "795295822534148096", "count": 1}]], + "replied_per_acc": [], + }, + { + "account_name": "968122690118512721", + "reacted_per_acc": [[{"account": "795295822534148096", "count": 1}]], + "mentioner_per_acc": [[{"account": "795295822534148096", "count": 1}]], + "replied_per_acc": [], + }, + ] + + results = prepare_per_account(db_results=db_results_sample) + + assert list(results.keys()) == ["968122690118512720", "968122690118512721"] + assert results["968122690118512720"] == [db_results_sample[0]] + assert results["968122690118512721"] == [db_results_sample[1]] + + +def test_multiple_document_multiple_acc_db_results(): + db_results_sample = [ + { + "account_name": "968122690118512720", + "reacted_per_acc": [[{"account": "795295822534148096", "count": 9}]], + "mentioner_per_acc": [[{"account": "795295822534148096", "count": 2}]], + "replied_per_acc": [], + }, + { + "account_name": "968122690118512720", + "reacted_per_acc": [], + "mentioner_per_acc": [], + "replied_per_acc": [], + }, + { + "account_name": "968122690118512721", + "reacted_per_acc": [[{"account": "795295822534148096", "count": 3}]], + "mentioner_per_acc": [[{"account": "795295822534148096", "count": 4}]], + "replied_per_acc": [], + }, + { + "account_name": "968122690118512721", + "reacted_per_acc": [], + "mentioner_per_acc": [], + "replied_per_acc": [[{"account": "7952958225341480444", "count": 8}]], + }, + { + "account_name": "968122690118512720", + "reacted_per_acc": [], + "mentioner_per_acc": [ + [{"account": "7952958225341480444", "count": 5}], + [{"account": "7952958225341480433", "count": 2}], + ], + "replied_per_acc": [], + }, + ] + + results = prepare_per_account(db_results=db_results_sample) + + assert list(results.keys()) == ["968122690118512720", "968122690118512721"] + assert results["968122690118512720"] == [ + { + "account_name": "968122690118512720", + "reacted_per_acc": [[{"account": "795295822534148096", "count": 9}]], + "mentioner_per_acc": [[{"account": "795295822534148096", "count": 2}]], + "replied_per_acc": [], + }, + { + "account_name": "968122690118512720", + "reacted_per_acc": [], + "mentioner_per_acc": [], + "replied_per_acc": [], + }, + { + "account_name": "968122690118512720", + "reacted_per_acc": [], + "mentioner_per_acc": [ + [{"account": "7952958225341480444", "count": 5}], + [{"account": "7952958225341480433", "count": 2}], + ], + "replied_per_acc": [], + }, + ] + assert results["968122690118512721"] == [ + { + "account_name": "968122690118512721", + "reacted_per_acc": [[{"account": "795295822534148096", "count": 3}]], + "mentioner_per_acc": [[{"account": "795295822534148096", "count": 4}]], + "replied_per_acc": [], + }, + { + "account_name": "968122690118512721", + "reacted_per_acc": [], + "mentioner_per_acc": [], + "replied_per_acc": [[{"account": "7952958225341480444", "count": 8}]], + }, + ] diff --git a/tests/unit/test_sum_interactions_features.py b/tests/unit/test_sum_interactions_features.py new file mode 100644 index 0000000..4dbf531 --- /dev/null +++ b/tests/unit/test_sum_interactions_features.py @@ -0,0 +1,724 @@ +from discord_analyzer.analysis.analytics_interactions_script import ( + sum_interactions_features, +) + + +def test_sum_interactions_features_out_length(): + interactions = [ + "thr_messages", + "lone_messages", + "replier", + "replied", + "replied", + "mentioner", + "mentioned", + "reacter", + "reacted", + ] + sample_input = [] + output = sum_interactions_features(cursor_list=sample_input, dict_keys=interactions) + + for action in interactions: + # 24 hours + assert len(output[action]) == 24 + + +def test_sum_interactions_features_empty_input(): + interactions = [ + "thr_messages", + "lone_messages", + "replier", + "replied", + "replied", + "mentioner", + "mentioned", + "reacter", + "reacted", + ] + sample_input = [] + output = sum_interactions_features(cursor_list=sample_input, dict_keys=interactions) + + for action in interactions: + assert sum(output[action]) == 0 + + +def test_sum_interactions_features_single_input(): + interactions = [ + "thr_messages", + "lone_messages", + "replier", + "replied", + "mentioner", + "mentioned", + "reacter", + "reacted", + ] + sample_input = [ + { + "thr_messages": [ + 0, + 0, + 5, + 107, + 0, + 1, + 0, + 0, + 0, + 0, + 4, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ], + "lone_messages": [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 80, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ], + "replier": [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 5, + 0, + ], + "replied": [ + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + ], + "mentioner": [ + 0, + 0, + 0, + 1, + 0, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ], + "mentioned": [ + 0, + 0, + 0, + 0, + 0, + 3, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ], + "reacter": [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ], + "reacted": [ + 50000, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 100000, + 0, + 0, + 0, + 0, + 0, + 0, + ], + } + ] + + output = sum_interactions_features(cursor_list=sample_input, dict_keys=interactions) + + assert sum(output["thr_messages"]) == 117 + assert sum(output["lone_messages"]) == 80 + assert sum(output["replier"]) == 5 + assert sum(output["replied"]) == 24 + assert sum(output["mentioner"]) == 2 + assert sum(output["mentioned"]) == 3 + assert sum(output["reacter"]) == 1 + assert sum(output["reacted"]) == 150000 + + +def test_sum_interactions_features_multiple_input(): + interactions = [ + "thr_messages", + "lone_messages", + "replier", + "replied", + "mentioner", + "mentioned", + "reacter", + "reacted", + ] + sample_input = [ + { + "thr_messages": [ + 0, + 0, + 5, + 107, + 0, + 1, + 0, + 0, + 0, + 0, + 4, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ], + "lone_messages": [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 80, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ], + "replier": [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 5, + 0, + ], + "replied": [ + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + ], + "mentioner": [ + 0, + 0, + 0, + 1, + 0, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ], + "mentioned": [ + 0, + 0, + 0, + 0, + 0, + 3, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ], + "reacter": [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ], + "reacted": [ + 50000, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 100000, + 0, + 0, + 0, + 0, + 0, + 0, + ], + }, + { + "thr_messages": [ + 0, + 0, + 5, + 100, + 0, + 1, + 0, + 0, + 0, + 0, + 4, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ], + "lone_messages": [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 80, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ], + "replier": [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 5, + 0, + ], + "replied": [ + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + ], + "mentioner": [ + 0, + 0, + 0, + 1, + 0, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ], + "mentioned": [ + 0, + 0, + 0, + 0, + 0, + 3, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ], + "reacter": [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ], + "reacted": [ + 50000, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 100000, + 0, + 0, + 0, + 0, + 0, + 0, + ], + }, + ] + + output = sum_interactions_features(cursor_list=sample_input, dict_keys=interactions) + + assert sum(output["thr_messages"]) == 227 + assert sum(output["lone_messages"]) == 160 + assert sum(output["replier"]) == 10 + assert sum(output["replied"]) == 48 + assert sum(output["mentioner"]) == 4 + assert sum(output["mentioned"]) == 6 + assert sum(output["reacter"]) == 2 + assert sum(output["reacted"]) == 300000 diff --git a/utils/__init__.py b/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/utils/daolytics_uitls.py b/utils/daolytics_uitls.py new file mode 100644 index 0000000..63583d6 --- /dev/null +++ b/utils/daolytics_uitls.py @@ -0,0 +1,123 @@ +import os +from typing import Any + +from dotenv import load_dotenv + + +def get_rabbit_mq_credentials() -> dict[str, Any]: + """ + returns the rabbitMQ connection credentials + + Retuns: + ---------- + rabbit_mq_creds : dict[str, Any] + rabbitMQ credentials, + a dictionary representive of + `broker_url` : str + `port` : int + `username` : str + `password` : str + """ + load_dotenv() + + rabbit_mq_creds = {} + + rabbit_mq_creds["broker_url"] = os.getenv("RABBIT_HOST") + rabbit_mq_creds["port"] = os.getenv("RABBIT_PORT") + rabbit_mq_creds["password"] = os.getenv("RABBIT_PASSWORD") + rabbit_mq_creds["username"] = os.getenv("RABBIT_USER") + + return rabbit_mq_creds + + +def get_mongo_credentials(): + """ + load mongo db credentials from .env + + Returns: + --------- + mongo_creds : dict[str, Any] + mongodb credentials + a dictionary representive of + `user`: str + `password` : str + `host` : str + `port` : int + """ + load_dotenv() + + mongo_creds = {} + + mongo_creds["user"] = os.getenv("MONGODB_USER") + mongo_creds["password"] = os.getenv("MONGODB_PASS") + mongo_creds["host"] = os.getenv("MONGODB_HOST") + mongo_creds["port"] = os.getenv("MONGODB_PORT") + + return mongo_creds + + +def get_neo4j_credentials(): + """ + load neo4j credentials from .env + + Returns: + --------- + neo4j_creds : dict[str, Any] + neo4j credentials + a dictionary representive of + `user` : str + `pass` : str + `db_name` : str + `url` : str + """ + + load_dotenv() + + neo4j_creds = {} + neo4j_creds["db_name"] = os.getenv("NEO4J_DB") + neo4j_creds["protocol"] = os.getenv("NEO4J_PROTOCOL") + neo4j_creds["host"] = os.getenv("NEO4J_HOST") + neo4j_creds["port"] = os.getenv("NEO4J_PORT") + neo4j_creds["password"] = os.getenv("NEO4J_PASSWORD") + neo4j_creds["user"] = os.getenv("NEO4J_USER") + + return neo4j_creds + + +def get_saga_db_location(): + """ + get the saga location in database + """ + load_dotenv() + + saga_db = {} + + saga_db["db_name"] = os.getenv("SAGA_DB_NAME") + saga_db["collection_name"] = os.getenv("SAGA_DB_COLLECTION") + + return saga_db + + +def get_sentryio_service_creds(): + load_dotenv() + + sentry_creds = {} + sentry_creds["dsn"] = os.getenv("SENTRY_DSN") + sentry_creds["env"] = os.getenv("SENTRY_ENV") + + return sentry_creds + + +def get_redis_credentials(): + """ + get redis credentials + """ + load_dotenv() + + redis_creds = {} + + redis_creds["host"] = os.getenv("REDIS_HOST") + redis_creds["port"] = os.getenv("REDIS_PORT") + redis_creds["pass"] = os.getenv("REDIS_PASSWORD") + + return redis_creds diff --git a/utils/get_rabbitmq.py b/utils/get_rabbitmq.py new file mode 100644 index 0000000..71a5f22 --- /dev/null +++ b/utils/get_rabbitmq.py @@ -0,0 +1,14 @@ +from tc_messageBroker import RabbitMQ +from tc_messageBroker.rabbit_mq.queue import Queue + + +def prepare_rabbit_mq(rabbit_creds): + rabbitmq = RabbitMQ( + broker_url=rabbit_creds["broker_url"], + port=rabbit_creds["port"], + username=rabbit_creds["username"], + password=rabbit_creds["password"], + ) + rabbitmq.connect(queue_name=Queue.DISCORD_ANALYZER) + + return rabbitmq diff --git a/utils/sentryio_service.py b/utils/sentryio_service.py new file mode 100644 index 0000000..422c9ac --- /dev/null +++ b/utils/sentryio_service.py @@ -0,0 +1,12 @@ +import sentry_sdk + + +def set_up_sentryio(dsn, environment, sample_rate=1.0): + sentry_sdk.init( + dsn=dsn, + environment=environment, + # Set traces_sample_rate to 1.0 to capture 100% + # of transactions for performance monitoring. + # We recommend adjusting this value in production. + traces_sample_rate=sample_rate, + ) diff --git a/utils/transactions_ordering.py b/utils/transactions_ordering.py new file mode 100644 index 0000000..d806736 --- /dev/null +++ b/utils/transactions_ordering.py @@ -0,0 +1,61 @@ +import numpy as np +from tc_messageBroker.rabbit_mq.status import Status + + +def sort_transactions(transactions: list): + """ + sort transactions by their order and status + the NOT_STARTED ones would be at the first of the list + and they are ordered by `order` property + + Parameters: + ------------ + transactions : list[ITransaction] + the list of transactions to order + + Returns: + --------- + transactions_ordered : ndarray(ITransaction) + the transactions ordered by status + the `NOT_STARTED` ones are the firsts + it is actually a numpy array for us to be able to + change the properties in deep memory + tx_not_started_count : int + the not started transactions count + """ + tx_not_started = [] + tx_other = [] + + for tx in transactions: + if tx.status == Status.NOT_STARTED: + tx_not_started.append(tx) + else: + tx_other.append(tx) + + tx_not_started_count = len(tx_not_started) + tx_not_started_sorted = sort_transactions_orderly(tx_not_started) + + transactions_ordered = list(tx_not_started_sorted) + transactions_ordered.extend(tx_other) + + return np.array(transactions_ordered), tx_not_started_count + + +def sort_transactions_orderly(transactions: list): + """ + sort transactions by their `order` property + + Parameters: + ------------ + transactions : list[ITransaction] + the list of transactions to order + + Returns: + --------- + transactions_orderly_sorted : list[ITransaction] + transactions sorted by their order + """ + orders = [tx.order for tx in transactions] + sorted_indices = np.argsort(orders) + + return np.array(transactions)[sorted_indices]