[rocketchat] Patch for PR chaoss#882

Signed-off-by: Obaro Ikoh <[email protected]>
obaroikoh · Jun 17, 2020 · 4b2b76c · 4b2b76c
1 parent 5ed7c54
commit 4b2b76c
Show file tree

Hide file tree

Showing 4 changed files with 1,292 additions and 67 deletions.
diff --git a/grimoire_elk/enriched/rocketchat.py b/grimoire_elk/enriched/rocketchat.py
@@ -17,6 +17,7 @@
 #
 # Authors:
 #   Animesh Kumar<[email protected]>
+#   Obaro Ikoh <[email protected]>
 #
 
 import logging
@@ -42,7 +43,7 @@ def get_elastic_mappings(es_major):
         mapping = """
         {
             "properties": {
-                "text_analyzed": {
+                "msg_analyzed": {
                   "type": "text",
                   "fielddata": true,
                   "index": true
@@ -56,11 +57,6 @@ def get_elastic_mappings(es_major):
 class RocketChatEnrich(Enrich):
     mapping = Mapping
 
-    def __init__(self, db_sortinghat=None, db_projects_map=None, json_projects_map=None,
-                 db_user='', db_password='', db_host=''):
-        super().__init__(db_sortinghat, db_projects_map, json_projects_map,
-                         db_user, db_password, db_host)
-
     def get_field_author(self):
         return "u"
 
@@ -99,104 +95,119 @@ def get_rich_item(self, item):
 
         message = item['data']
 
-        eitem['text_analyzed'] = message['msg']
+        eitem['msg_analyzed'] = message['msg']
         eitem['msg'] = message['msg']
         eitem['rid'] = message['rid']
-        eitem['id'] = message['_id']
+        eitem['msg_id'] = message['_id']
         # parent exists in case message is a reply
-        eitem['parent'] = message.get('parent', None)
+        eitem['msg_parent'] = message.get('parent', None)
 
-        if 'u' in message and message['u']:
-            author = message['u']
+        author = message.get('u', None)
+        if author:
             eitem['user_id'] = author.get('_id', None)
             eitem['user_name'] = author.get('name', None)
             eitem['user_username'] = author.get('username', None)
 
-        if 'editedBy' in message and message['editedBy']:
+        eitem['is_edited'] = 0
+        editor = message.get('editedBy', None)
+        if editor:
             eitem['edited_at'] = str_to_datetime(message['editedAt']).isoformat()
-            editor = message['editedBy']
             eitem['edited_by_username'] = editor.get('username', None)
             eitem['edited_by_user_id'] = editor.get('_id', None)
-
-        if 'file' in message and message['file']:
-            eitem['file_id'] = message['file'].get('_id', None)
-            eitem['file_name'] = message['file'].get('name', None)
-            eitem['file_type'] = message['file'].get('type', None)
-
-        if 'replies' in message and message['replies']:
-            eitem['replies'] = message['replies']
-
-        if 'reactions' in message and message['reactions']:
-            eitem.update(self.__get_reactions(message))
-
-        if 'mentions' in message and message['mentions']:
-            eitem['mentions'] = self.__get_mentions(message['mentions'])
-
-        if 'channel_info' in message and message['channel_info']:
-            eitem.update(self.__get_channel_info(message['channel_info']))
-
-        if 'urls' in message and message['urls']:
-            eitem['message_urls'] = self.__get_urls(message['urls'])
+            eitem['is_edited'] = 1
+
+        file = message.get('file', None)
+        if file:
+            eitem['file_id'] = file.get('_id', None)
+            eitem['file_name'] = file.get('name', None)
+            eitem['file_type'] = file.get('type', None)
+
+        eitem['replies'] = len(message['replies']) if message.get('replies', None) else 0
+
+        eitem['total_reactions'] = 0
+        reactions = message.get('reactions', None)
+        if reactions:
+            reaction_types, total_reactions = self.__get_reactions(reactions)
+            eitem.update({'reactions': reaction_types})
+            eitem['total_reactions'] = total_reactions
+
+        eitem['total_mentions'] = 0
+        mentions = message.get('mentions', None)
+        if mentions:
+            eitem['mentions'] = self.__get_mentions(mentions)
+            eitem['total_mentions'] = len(mentions)
+
+        channel_info = message.get('channel_info', None)
+        if channel_info:
+            eitem.update(self.__get_channel_info(channel_info))
+
+        eitem['total_urls'] = 0
+        urls = message.get('urls', None)
+        if urls:
+            urls = [{'url': url['url']} for url in urls]
+            eitem['message_urls'] = urls
+            eitem['total_urls'] = len(urls)
 
         if self.sortinghat:
             eitem.update(self.get_item_sh(item))
 
         if self.prjs_map:
             eitem.update(self.get_item_project(eitem))
 
+        eitem.update(self.get_grimoire_fields(item["metadata__updated_on"], "message"))
+
         self.add_repository_labels(eitem)
         self.add_metadata_filter_raw(eitem)
         return eitem
 
-    def __get_reactions(self, item):
+    def __get_reactions(self, reactions):
         """Enrich reactions for the message"""
 
-        reactions = {}
-
-        item_reactions = item.get('reactions', {})
-        for reaction in item_reactions:
-            reactions['reaction_{}'.format(reaction)] = item_reactions[reaction]
-
-        return reactions
+        reaction_types = []
+        total_reactions = 0
+        for reaction_type in reactions:
+            reaction_data = reactions[reaction_type]
+            usernames = reaction_data.get('usernames', [])
+            names = reaction_data.get('names', [])
+            reaction_type = {
+                "type": reaction_type,
+                "username": usernames,
+                "names": names,
+                "count": len(usernames)
+            }
+            total_reactions += len(usernames)
+            reaction_types.append(reaction_type)
+
+        return reaction_types, total_reactions
 
     def __get_mentions(self, mentioned):
         """Enrich users mentioned in the message"""
 
         rich_mentions = []
 
         for usr in mentioned:
-            if '_id' in usr.keys():
-                rich_mentions.append({'username': usr['username'], 'id': usr['_id'],
-                                      'name': usr['name']})
+            rich_mention = {
+                'username': usr.get('username', None),
+                'id': usr.get('_id', None),
+                'name': usr.get('name', None)
+            }
+            rich_mentions.append(rich_mention)
 
         return rich_mentions
 
     def __get_channel_info(self, channel):
         """Enrich channel info of the message"""
 
-        rich_channel = {'channel_id': channel['_id'],
-                        'channel_updated_at': str_to_datetime(channel['_updatedAt']).isoformat(),
-                        'channel_num_messages': channel['msgs'],
-                        'channel_name': channel['name'],
-                        'channel_num_users': channel['usersCount'],
-                        }
+        rich_channel = {
+            'channel_id': channel['_id'],
+            'channel_updated_at': str_to_datetime(channel['_updatedAt']).isoformat(),
+            'channel_num_messages': channel['msgs'],
+            'channel_name': channel['name'],
+            'channel_num_users': channel['usersCount'],
+            'channel_topic': channel['topic'],
+        }
+        rich_channel['avatar'] = ''
         if 'lastMessage' in channel and channel['lastMessage']:
-            rich_channel['channel_last_message_id'] = channel['lastMessage']['_id']
-            rich_channel['channel_last_message'] = channel['lastMessage']['msg']
+            rich_channel['avatar'] = channel['lastMessage']['avatar']
 
         return rich_channel
-
-    def __get_urls(self, urls):
-        """Enrich urls mentioned in the message"""
-
-        rich_urls = []
-        for url in urls:
-            rich_url = {}
-            if 'meta' in url:
-                rich_url['url_metadata_description'] = url['meta'].get('description', None)
-                rich_url['url_metadata_page_title'] = url['meta'].get('pageTitle', None)
-            rich_url['url'] = url['url']
-
-            rich_urls.append(rich_url)
-
-        return rich_urls
diff --git a/schema/rocketchat.csv b/schema/rocketchat.csv
@@ -0,0 +1,64 @@
+name,type,aggregatable,description
+author_bot,boolean,true,"True if the given author is identified as a bot."
+author_domain,keyword,true,"Domain associated to the author in SortingHat profile."
+author_gender,keyword,true,"Author gender."
+author_gender_acc,keyword,true,"Accuracy to assess author gender."
+author_id,keyword,true,"Author Id from SortingHat."
+author_multi_org_names,keyword,true,"List of the author organizations from SortingHat profile."
+author_name,keyword,true,"Author name."
+author_org_name,keyword,true,"Author organization name from SortingHat profile."
+author_user_name,keyword,true,"Author username from Sortinghat profile."
+author_uuid,keyword,true,"Author UUID from SortingHat."
+avatar,keyword,true,"Avatar associated with User profile."
+channel_updated_at,date,true,"Date when channel was updated in UNIX timestamp format."
+channel_id,keyword,long,true,"Channel Id of a Slack channel."
+channel_num_users,long,true,"Number of members in a Rocketchat channel."
+channel_num_messages,long,true,"Number of messages in a Rocketchat channel."
+channel_name,keyword,true,"Channel Name."
+channel_topic,keyword,true,"Channel Topic."
+edited_at,date,true,"Date message was updated in UNIX timestamp format."
+edited_by_user_id,keyword,true,"Message editor's user id."
+edited_by_username,keyword,true,"Message editor's username."
+grimoire_creation_date,date,true,"Message creation date."
+file_id,keyword,true,"File id."
+file_name,keyword,true,"File name."
+file_type,keyword,true,"File type"
+is_rocketchat_message,long,true,"1 indicating the item is a message."
+is_edited,long,true,"1 indicating the message has been edited"
+metadata__enriched_on,date,true,"Date when the item was enriched."
+metadata__gelk_backend_name,keyword,true,"Name of the backend used to enrich information."
+metadata__gelk_version,keyword,true,"Version of the backend used to enrich information."
+metadata__timestamp,date,true,"Date when the item was stored in RAW index."
+metadata__updated_on,date,true,"Date when the item was updated on its original data source."
+metadata__filter_raw,keyword,true,"Raw filter"
+message_urls.url,keyword,true,"Url on a message."
+mentions.id,keyword,true,"Mentions id"
+mentions.name,keyword,true,"Mentions name"
+mentions.username,keyword,true,"Mentions username"
+msg,keyword,true,"Message text."
+msg_analyzed,keyword,true,"Message body in plain text."
+msg_id,keyword,true,"Message id."
+msg_parent,keyword,true,"Message parent."
+origin,keyword,true,"Original URL where the channel was retrieved from."
+reactions.names,keyword,true,"Names of users who used a reaction type on a message."
+reactions.type,keyword,true,"reaction type on a message."
+reactions.username,keyword,true,"Usernames of users who used a reaction type on a message."
+replies,long,true,"Number of replies on a message."
+repository_labels,keyword,true,"Custom repository labels defined by the user."
+rid,keyword,true,"Channel id."
+tag,keyword,true,"Perceval tag."
+total_reactions,long,true,"Number of reactions in a message"
+total_urls,long,true,"Number of urls in a message"
+total_mentions,long,true,"Number of mentions in a message"
+user_username,keyword,true,"Rocketchat user."
+u_bot,long,true,"1 if the given user is identified as a bot."
+u_domain,keyword,true,"Domain associated to the user in SortingHat profile."
+u_gender,keyword,true,"User gender, based on her name (disabled by default)."
+u_gender_acc,long,true,"User gender accuracy (disabled by default)."
+u_id,keyword,true,"User Id from SortingHat."
+u_multi_org_names,keyword,true,"List of the user organizations from SortingHat profile."
+u_name,keyword,true,"User's name from SortingHat profile"
+u_org_name,keyword,true,"User's organization name from SortingHat profile."
+u_user_name,keyword,true,"User's username from SortingHat profile."
+u_uuid,keyword,true,"User's UUID from SortingHat profile."
+uuid,keyword,true,"Perceval UUID."