forked from chaoss/grimoirelab-elk
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[rocketchat] Patch for PR chaoss#882
Signed-off-by: Obaro Ikoh <[email protected]>
- Loading branch information
Showing
4 changed files
with
1,292 additions
and
67 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,6 +17,7 @@ | |
# | ||
# Authors: | ||
# Animesh Kumar<[email protected]> | ||
# Obaro Ikoh <[email protected]> | ||
# | ||
|
||
import logging | ||
|
@@ -42,7 +43,7 @@ def get_elastic_mappings(es_major): | |
mapping = """ | ||
{ | ||
"properties": { | ||
"text_analyzed": { | ||
"msg_analyzed": { | ||
"type": "text", | ||
"fielddata": true, | ||
"index": true | ||
|
@@ -56,11 +57,6 @@ def get_elastic_mappings(es_major): | |
class RocketChatEnrich(Enrich): | ||
mapping = Mapping | ||
|
||
def __init__(self, db_sortinghat=None, db_projects_map=None, json_projects_map=None, | ||
db_user='', db_password='', db_host=''): | ||
super().__init__(db_sortinghat, db_projects_map, json_projects_map, | ||
db_user, db_password, db_host) | ||
|
||
def get_field_author(self): | ||
return "u" | ||
|
||
|
@@ -99,104 +95,119 @@ def get_rich_item(self, item): | |
|
||
message = item['data'] | ||
|
||
eitem['text_analyzed'] = message['msg'] | ||
eitem['msg_analyzed'] = message['msg'] | ||
eitem['msg'] = message['msg'] | ||
eitem['rid'] = message['rid'] | ||
eitem['id'] = message['_id'] | ||
eitem['msg_id'] = message['_id'] | ||
# parent exists in case message is a reply | ||
eitem['parent'] = message.get('parent', None) | ||
eitem['msg_parent'] = message.get('parent', None) | ||
|
||
if 'u' in message and message['u']: | ||
author = message['u'] | ||
author = message.get('u', None) | ||
if author: | ||
eitem['user_id'] = author.get('_id', None) | ||
eitem['user_name'] = author.get('name', None) | ||
eitem['user_username'] = author.get('username', None) | ||
|
||
if 'editedBy' in message and message['editedBy']: | ||
eitem['is_edited'] = 0 | ||
editor = message.get('editedBy', None) | ||
if editor: | ||
eitem['edited_at'] = str_to_datetime(message['editedAt']).isoformat() | ||
editor = message['editedBy'] | ||
eitem['edited_by_username'] = editor.get('username', None) | ||
eitem['edited_by_user_id'] = editor.get('_id', None) | ||
|
||
if 'file' in message and message['file']: | ||
eitem['file_id'] = message['file'].get('_id', None) | ||
eitem['file_name'] = message['file'].get('name', None) | ||
eitem['file_type'] = message['file'].get('type', None) | ||
|
||
if 'replies' in message and message['replies']: | ||
eitem['replies'] = message['replies'] | ||
|
||
if 'reactions' in message and message['reactions']: | ||
eitem.update(self.__get_reactions(message)) | ||
|
||
if 'mentions' in message and message['mentions']: | ||
eitem['mentions'] = self.__get_mentions(message['mentions']) | ||
|
||
if 'channel_info' in message and message['channel_info']: | ||
eitem.update(self.__get_channel_info(message['channel_info'])) | ||
|
||
if 'urls' in message and message['urls']: | ||
eitem['message_urls'] = self.__get_urls(message['urls']) | ||
eitem['is_edited'] = 1 | ||
|
||
file = message.get('file', None) | ||
if file: | ||
eitem['file_id'] = file.get('_id', None) | ||
eitem['file_name'] = file.get('name', None) | ||
eitem['file_type'] = file.get('type', None) | ||
|
||
eitem['replies'] = len(message['replies']) if message.get('replies', None) else 0 | ||
|
||
eitem['total_reactions'] = 0 | ||
reactions = message.get('reactions', None) | ||
if reactions: | ||
reaction_types, total_reactions = self.__get_reactions(reactions) | ||
eitem.update({'reactions': reaction_types}) | ||
eitem['total_reactions'] = total_reactions | ||
|
||
eitem['total_mentions'] = 0 | ||
mentions = message.get('mentions', None) | ||
if mentions: | ||
eitem['mentions'] = self.__get_mentions(mentions) | ||
eitem['total_mentions'] = len(mentions) | ||
|
||
channel_info = message.get('channel_info', None) | ||
if channel_info: | ||
eitem.update(self.__get_channel_info(channel_info)) | ||
|
||
eitem['total_urls'] = 0 | ||
urls = message.get('urls', None) | ||
if urls: | ||
urls = [{'url': url['url']} for url in urls] | ||
eitem['message_urls'] = urls | ||
eitem['total_urls'] = len(urls) | ||
|
||
if self.sortinghat: | ||
eitem.update(self.get_item_sh(item)) | ||
|
||
if self.prjs_map: | ||
eitem.update(self.get_item_project(eitem)) | ||
|
||
eitem.update(self.get_grimoire_fields(item["metadata__updated_on"], "message")) | ||
|
||
self.add_repository_labels(eitem) | ||
self.add_metadata_filter_raw(eitem) | ||
return eitem | ||
|
||
def __get_reactions(self, item): | ||
def __get_reactions(self, reactions): | ||
"""Enrich reactions for the message""" | ||
|
||
reactions = {} | ||
|
||
item_reactions = item.get('reactions', {}) | ||
for reaction in item_reactions: | ||
reactions['reaction_{}'.format(reaction)] = item_reactions[reaction] | ||
|
||
return reactions | ||
reaction_types = [] | ||
total_reactions = 0 | ||
for reaction_type in reactions: | ||
reaction_data = reactions[reaction_type] | ||
usernames = reaction_data.get('usernames', []) | ||
names = reaction_data.get('names', []) | ||
reaction_type = { | ||
"type": reaction_type, | ||
"username": usernames, | ||
"names": names, | ||
"count": len(usernames) | ||
} | ||
total_reactions += len(usernames) | ||
reaction_types.append(reaction_type) | ||
|
||
return reaction_types, total_reactions | ||
|
||
def __get_mentions(self, mentioned): | ||
"""Enrich users mentioned in the message""" | ||
|
||
rich_mentions = [] | ||
|
||
for usr in mentioned: | ||
if '_id' in usr.keys(): | ||
rich_mentions.append({'username': usr['username'], 'id': usr['_id'], | ||
'name': usr['name']}) | ||
rich_mention = { | ||
'username': usr.get('username', None), | ||
'id': usr.get('_id', None), | ||
'name': usr.get('name', None) | ||
} | ||
rich_mentions.append(rich_mention) | ||
|
||
return rich_mentions | ||
|
||
def __get_channel_info(self, channel): | ||
"""Enrich channel info of the message""" | ||
|
||
rich_channel = {'channel_id': channel['_id'], | ||
'channel_updated_at': str_to_datetime(channel['_updatedAt']).isoformat(), | ||
'channel_num_messages': channel['msgs'], | ||
'channel_name': channel['name'], | ||
'channel_num_users': channel['usersCount'], | ||
} | ||
rich_channel = { | ||
'channel_id': channel['_id'], | ||
'channel_updated_at': str_to_datetime(channel['_updatedAt']).isoformat(), | ||
'channel_num_messages': channel['msgs'], | ||
'channel_name': channel['name'], | ||
'channel_num_users': channel['usersCount'], | ||
'channel_topic': channel['topic'], | ||
} | ||
rich_channel['avatar'] = '' | ||
if 'lastMessage' in channel and channel['lastMessage']: | ||
rich_channel['channel_last_message_id'] = channel['lastMessage']['_id'] | ||
rich_channel['channel_last_message'] = channel['lastMessage']['msg'] | ||
rich_channel['avatar'] = channel['lastMessage']['avatar'] | ||
|
||
return rich_channel | ||
|
||
def __get_urls(self, urls): | ||
"""Enrich urls mentioned in the message""" | ||
|
||
rich_urls = [] | ||
for url in urls: | ||
rich_url = {} | ||
if 'meta' in url: | ||
rich_url['url_metadata_description'] = url['meta'].get('description', None) | ||
rich_url['url_metadata_page_title'] = url['meta'].get('pageTitle', None) | ||
rich_url['url'] = url['url'] | ||
|
||
rich_urls.append(rich_url) | ||
|
||
return rich_urls |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
name,type,aggregatable,description | ||
author_bot,boolean,true,"True if the given author is identified as a bot." | ||
author_domain,keyword,true,"Domain associated to the author in SortingHat profile." | ||
author_gender,keyword,true,"Author gender." | ||
author_gender_acc,keyword,true,"Accuracy to assess author gender." | ||
author_id,keyword,true,"Author Id from SortingHat." | ||
author_multi_org_names,keyword,true,"List of the author organizations from SortingHat profile." | ||
author_name,keyword,true,"Author name." | ||
author_org_name,keyword,true,"Author organization name from SortingHat profile." | ||
author_user_name,keyword,true,"Author username from Sortinghat profile." | ||
author_uuid,keyword,true,"Author UUID from SortingHat." | ||
avatar,keyword,true,"Avatar associated with User profile." | ||
channel_updated_at,date,true,"Date when channel was updated in UNIX timestamp format." | ||
channel_id,keyword,long,true,"Channel Id of a Slack channel." | ||
channel_num_users,long,true,"Number of members in a Rocketchat channel." | ||
channel_num_messages,long,true,"Number of messages in a Rocketchat channel." | ||
channel_name,keyword,true,"Channel Name." | ||
channel_topic,keyword,true,"Channel Topic." | ||
edited_at,date,true,"Date message was updated in UNIX timestamp format." | ||
edited_by_user_id,keyword,true,"Message editor's user id." | ||
edited_by_username,keyword,true,"Message editor's username." | ||
grimoire_creation_date,date,true,"Message creation date." | ||
file_id,keyword,true,"File id." | ||
file_name,keyword,true,"File name." | ||
file_type,keyword,true,"File type" | ||
is_rocketchat_message,long,true,"1 indicating the item is a message." | ||
is_edited,long,true,"1 indicating the message has been edited" | ||
metadata__enriched_on,date,true,"Date when the item was enriched." | ||
metadata__gelk_backend_name,keyword,true,"Name of the backend used to enrich information." | ||
metadata__gelk_version,keyword,true,"Version of the backend used to enrich information." | ||
metadata__timestamp,date,true,"Date when the item was stored in RAW index." | ||
metadata__updated_on,date,true,"Date when the item was updated on its original data source." | ||
metadata__filter_raw,keyword,true,"Raw filter" | ||
message_urls.url,keyword,true,"Url on a message." | ||
mentions.id,keyword,true,"Mentions id" | ||
mentions.name,keyword,true,"Mentions name" | ||
mentions.username,keyword,true,"Mentions username" | ||
msg,keyword,true,"Message text." | ||
msg_analyzed,keyword,true,"Message body in plain text." | ||
msg_id,keyword,true,"Message id." | ||
msg_parent,keyword,true,"Message parent." | ||
origin,keyword,true,"Original URL where the channel was retrieved from." | ||
reactions.names,keyword,true,"Names of users who used a reaction type on a message." | ||
reactions.type,keyword,true,"reaction type on a message." | ||
reactions.username,keyword,true,"Usernames of users who used a reaction type on a message." | ||
replies,long,true,"Number of replies on a message." | ||
repository_labels,keyword,true,"Custom repository labels defined by the user." | ||
rid,keyword,true,"Channel id." | ||
tag,keyword,true,"Perceval tag." | ||
total_reactions,long,true,"Number of reactions in a message" | ||
total_urls,long,true,"Number of urls in a message" | ||
total_mentions,long,true,"Number of mentions in a message" | ||
user_username,keyword,true,"Rocketchat user." | ||
u_bot,long,true,"1 if the given user is identified as a bot." | ||
u_domain,keyword,true,"Domain associated to the user in SortingHat profile." | ||
u_gender,keyword,true,"User gender, based on her name (disabled by default)." | ||
u_gender_acc,long,true,"User gender accuracy (disabled by default)." | ||
u_id,keyword,true,"User Id from SortingHat." | ||
u_multi_org_names,keyword,true,"List of the user organizations from SortingHat profile." | ||
u_name,keyword,true,"User's name from SortingHat profile" | ||
u_org_name,keyword,true,"User's organization name from SortingHat profile." | ||
u_user_name,keyword,true,"User's username from SortingHat profile." | ||
u_uuid,keyword,true,"User's UUID from SortingHat profile." | ||
uuid,keyword,true,"Perceval UUID." |
Oops, something went wrong.