Skip to content

Commit

Permalink
[rocketchat] Patch for PR chaoss#882
Browse files Browse the repository at this point in the history
Signed-off-by: Obaro Ikoh <[email protected]>
  • Loading branch information
obaroikoh committed Jun 17, 2020
1 parent 5ed7c54 commit 4b2b76c
Show file tree
Hide file tree
Showing 4 changed files with 1,292 additions and 67 deletions.
145 changes: 78 additions & 67 deletions grimoire_elk/enriched/rocketchat.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#
# Authors:
# Animesh Kumar<[email protected]>
# Obaro Ikoh <[email protected]>
#

import logging
Expand All @@ -42,7 +43,7 @@ def get_elastic_mappings(es_major):
mapping = """
{
"properties": {
"text_analyzed": {
"msg_analyzed": {
"type": "text",
"fielddata": true,
"index": true
Expand All @@ -56,11 +57,6 @@ def get_elastic_mappings(es_major):
class RocketChatEnrich(Enrich):
mapping = Mapping

def __init__(self, db_sortinghat=None, db_projects_map=None, json_projects_map=None,
db_user='', db_password='', db_host=''):
super().__init__(db_sortinghat, db_projects_map, json_projects_map,
db_user, db_password, db_host)

def get_field_author(self):
return "u"

Expand Down Expand Up @@ -99,104 +95,119 @@ def get_rich_item(self, item):

message = item['data']

eitem['text_analyzed'] = message['msg']
eitem['msg_analyzed'] = message['msg']
eitem['msg'] = message['msg']
eitem['rid'] = message['rid']
eitem['id'] = message['_id']
eitem['msg_id'] = message['_id']
# parent exists in case message is a reply
eitem['parent'] = message.get('parent', None)
eitem['msg_parent'] = message.get('parent', None)

if 'u' in message and message['u']:
author = message['u']
author = message.get('u', None)
if author:
eitem['user_id'] = author.get('_id', None)
eitem['user_name'] = author.get('name', None)
eitem['user_username'] = author.get('username', None)

if 'editedBy' in message and message['editedBy']:
eitem['is_edited'] = 0
editor = message.get('editedBy', None)
if editor:
eitem['edited_at'] = str_to_datetime(message['editedAt']).isoformat()
editor = message['editedBy']
eitem['edited_by_username'] = editor.get('username', None)
eitem['edited_by_user_id'] = editor.get('_id', None)

if 'file' in message and message['file']:
eitem['file_id'] = message['file'].get('_id', None)
eitem['file_name'] = message['file'].get('name', None)
eitem['file_type'] = message['file'].get('type', None)

if 'replies' in message and message['replies']:
eitem['replies'] = message['replies']

if 'reactions' in message and message['reactions']:
eitem.update(self.__get_reactions(message))

if 'mentions' in message and message['mentions']:
eitem['mentions'] = self.__get_mentions(message['mentions'])

if 'channel_info' in message and message['channel_info']:
eitem.update(self.__get_channel_info(message['channel_info']))

if 'urls' in message and message['urls']:
eitem['message_urls'] = self.__get_urls(message['urls'])
eitem['is_edited'] = 1

file = message.get('file', None)
if file:
eitem['file_id'] = file.get('_id', None)
eitem['file_name'] = file.get('name', None)
eitem['file_type'] = file.get('type', None)

eitem['replies'] = len(message['replies']) if message.get('replies', None) else 0

eitem['total_reactions'] = 0
reactions = message.get('reactions', None)
if reactions:
reaction_types, total_reactions = self.__get_reactions(reactions)
eitem.update({'reactions': reaction_types})
eitem['total_reactions'] = total_reactions

eitem['total_mentions'] = 0
mentions = message.get('mentions', None)
if mentions:
eitem['mentions'] = self.__get_mentions(mentions)
eitem['total_mentions'] = len(mentions)

channel_info = message.get('channel_info', None)
if channel_info:
eitem.update(self.__get_channel_info(channel_info))

eitem['total_urls'] = 0
urls = message.get('urls', None)
if urls:
urls = [{'url': url['url']} for url in urls]
eitem['message_urls'] = urls
eitem['total_urls'] = len(urls)

if self.sortinghat:
eitem.update(self.get_item_sh(item))

if self.prjs_map:
eitem.update(self.get_item_project(eitem))

eitem.update(self.get_grimoire_fields(item["metadata__updated_on"], "message"))

self.add_repository_labels(eitem)
self.add_metadata_filter_raw(eitem)
return eitem

def __get_reactions(self, item):
def __get_reactions(self, reactions):
"""Enrich reactions for the message"""

reactions = {}

item_reactions = item.get('reactions', {})
for reaction in item_reactions:
reactions['reaction_{}'.format(reaction)] = item_reactions[reaction]

return reactions
reaction_types = []
total_reactions = 0
for reaction_type in reactions:
reaction_data = reactions[reaction_type]
usernames = reaction_data.get('usernames', [])
names = reaction_data.get('names', [])
reaction_type = {
"type": reaction_type,
"username": usernames,
"names": names,
"count": len(usernames)
}
total_reactions += len(usernames)
reaction_types.append(reaction_type)

return reaction_types, total_reactions

def __get_mentions(self, mentioned):
"""Enrich users mentioned in the message"""

rich_mentions = []

for usr in mentioned:
if '_id' in usr.keys():
rich_mentions.append({'username': usr['username'], 'id': usr['_id'],
'name': usr['name']})
rich_mention = {
'username': usr.get('username', None),
'id': usr.get('_id', None),
'name': usr.get('name', None)
}
rich_mentions.append(rich_mention)

return rich_mentions

def __get_channel_info(self, channel):
"""Enrich channel info of the message"""

rich_channel = {'channel_id': channel['_id'],
'channel_updated_at': str_to_datetime(channel['_updatedAt']).isoformat(),
'channel_num_messages': channel['msgs'],
'channel_name': channel['name'],
'channel_num_users': channel['usersCount'],
}
rich_channel = {
'channel_id': channel['_id'],
'channel_updated_at': str_to_datetime(channel['_updatedAt']).isoformat(),
'channel_num_messages': channel['msgs'],
'channel_name': channel['name'],
'channel_num_users': channel['usersCount'],
'channel_topic': channel['topic'],
}
rich_channel['avatar'] = ''
if 'lastMessage' in channel and channel['lastMessage']:
rich_channel['channel_last_message_id'] = channel['lastMessage']['_id']
rich_channel['channel_last_message'] = channel['lastMessage']['msg']
rich_channel['avatar'] = channel['lastMessage']['avatar']

return rich_channel

def __get_urls(self, urls):
"""Enrich urls mentioned in the message"""

rich_urls = []
for url in urls:
rich_url = {}
if 'meta' in url:
rich_url['url_metadata_description'] = url['meta'].get('description', None)
rich_url['url_metadata_page_title'] = url['meta'].get('pageTitle', None)
rich_url['url'] = url['url']

rich_urls.append(rich_url)

return rich_urls
64 changes: 64 additions & 0 deletions schema/rocketchat.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
name,type,aggregatable,description
author_bot,boolean,true,"True if the given author is identified as a bot."
author_domain,keyword,true,"Domain associated to the author in SortingHat profile."
author_gender,keyword,true,"Author gender."
author_gender_acc,keyword,true,"Accuracy to assess author gender."
author_id,keyword,true,"Author Id from SortingHat."
author_multi_org_names,keyword,true,"List of the author organizations from SortingHat profile."
author_name,keyword,true,"Author name."
author_org_name,keyword,true,"Author organization name from SortingHat profile."
author_user_name,keyword,true,"Author username from Sortinghat profile."
author_uuid,keyword,true,"Author UUID from SortingHat."
avatar,keyword,true,"Avatar associated with User profile."
channel_updated_at,date,true,"Date when channel was updated in UNIX timestamp format."
channel_id,keyword,long,true,"Channel Id of a Slack channel."
channel_num_users,long,true,"Number of members in a Rocketchat channel."
channel_num_messages,long,true,"Number of messages in a Rocketchat channel."
channel_name,keyword,true,"Channel Name."
channel_topic,keyword,true,"Channel Topic."
edited_at,date,true,"Date message was updated in UNIX timestamp format."
edited_by_user_id,keyword,true,"Message editor's user id."
edited_by_username,keyword,true,"Message editor's username."
grimoire_creation_date,date,true,"Message creation date."
file_id,keyword,true,"File id."
file_name,keyword,true,"File name."
file_type,keyword,true,"File type"
is_rocketchat_message,long,true,"1 indicating the item is a message."
is_edited,long,true,"1 indicating the message has been edited"
metadata__enriched_on,date,true,"Date when the item was enriched."
metadata__gelk_backend_name,keyword,true,"Name of the backend used to enrich information."
metadata__gelk_version,keyword,true,"Version of the backend used to enrich information."
metadata__timestamp,date,true,"Date when the item was stored in RAW index."
metadata__updated_on,date,true,"Date when the item was updated on its original data source."
metadata__filter_raw,keyword,true,"Raw filter"
message_urls.url,keyword,true,"Url on a message."
mentions.id,keyword,true,"Mentions id"
mentions.name,keyword,true,"Mentions name"
mentions.username,keyword,true,"Mentions username"
msg,keyword,true,"Message text."
msg_analyzed,keyword,true,"Message body in plain text."
msg_id,keyword,true,"Message id."
msg_parent,keyword,true,"Message parent."
origin,keyword,true,"Original URL where the channel was retrieved from."
reactions.names,keyword,true,"Names of users who used a reaction type on a message."
reactions.type,keyword,true,"reaction type on a message."
reactions.username,keyword,true,"Usernames of users who used a reaction type on a message."
replies,long,true,"Number of replies on a message."
repository_labels,keyword,true,"Custom repository labels defined by the user."
rid,keyword,true,"Channel id."
tag,keyword,true,"Perceval tag."
total_reactions,long,true,"Number of reactions in a message"
total_urls,long,true,"Number of urls in a message"
total_mentions,long,true,"Number of mentions in a message"
user_username,keyword,true,"Rocketchat user."
u_bot,long,true,"1 if the given user is identified as a bot."
u_domain,keyword,true,"Domain associated to the user in SortingHat profile."
u_gender,keyword,true,"User gender, based on her name (disabled by default)."
u_gender_acc,long,true,"User gender accuracy (disabled by default)."
u_id,keyword,true,"User Id from SortingHat."
u_multi_org_names,keyword,true,"List of the user organizations from SortingHat profile."
u_name,keyword,true,"User's name from SortingHat profile"
u_org_name,keyword,true,"User's organization name from SortingHat profile."
u_user_name,keyword,true,"User's username from SortingHat profile."
u_uuid,keyword,true,"User's UUID from SortingHat profile."
uuid,keyword,true,"Perceval UUID."
Loading

0 comments on commit 4b2b76c

Please sign in to comment.