Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[rocketchat] Rocketchat patch for PR #882 #890

Merged
merged 2 commits into from
Jun 19, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
213 changes: 213 additions & 0 deletions grimoire_elk/enriched/rocketchat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,213 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2015-2020 Bitergia
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# Authors:
# Animesh Kumar<[email protected]>
# Obaro Ikoh <[email protected]>
#
valeriocos marked this conversation as resolved.
Show resolved Hide resolved

import logging

from grimoirelab_toolkit.datetime import str_to_datetime

from .enrich import Enrich, metadata
from ..elastic_mapping import Mapping as BaseMapping

logger = logging.getLogger(__name__)


class Mapping(BaseMapping):

@staticmethod
def get_elastic_mappings(es_major):
"""Get Elasticsearch mapping.

:param es_major: major version of Elasticsearch, as string
:returns: dictionary with a key, 'items', with the mapping
"""

mapping = """
{
"properties": {
"msg_analyzed": {
"type": "text",
"fielddata": true,
"index": true
}
}
} """

return {"items": mapping}


class RocketChatEnrich(Enrich):
mapping = Mapping

def get_field_author(self):
return "u"

def get_sh_identity(self, item, identity_field=None):
identity = {
'username': None,
'name': None,
'email': None
}

if self.get_field_author() not in item['data']:
return identity
user = item['data'][self.get_field_author()]

identity['username'] = user.get('username', None)
identity['name'] = user.get('name', None)

return identity

def get_identities(self, item):
""" Return the identities from an item """

identity = self.get_sh_identity(item)
yield identity

def get_project_repository(self, eitem):
tokens = eitem['origin'].rsplit("/", 1)
return tokens[0] + " " + tokens[1]

@metadata
def get_rich_item(self, item):

eitem = {}

self.copy_raw_fields(self.RAW_FIELDS_COPY, item, eitem)

message = item['data']

eitem['msg_analyzed'] = message['msg']
eitem['msg'] = message['msg']
eitem['rid'] = message['rid']
valeriocos marked this conversation as resolved.
Show resolved Hide resolved
eitem['msg_id'] = message['_id']
# parent exists in case message is a reply
eitem['msg_parent'] = message.get('parent', None)

author = message.get('u', None)
if author:
eitem['user_id'] = author.get('_id', None)
eitem['user_name'] = author.get('name', None)
eitem['user_username'] = author.get('username', None)

eitem['is_edited'] = 0
editor = message.get('editedBy', None)
if editor:
eitem['edited_at'] = str_to_datetime(message['editedAt']).isoformat()
eitem['edited_by_username'] = editor.get('username', None)
eitem['edited_by_user_id'] = editor.get('_id', None)
eitem['is_edited'] = 1

file = message.get('file', None)
if file:
eitem['file_id'] = file.get('_id', None)
eitem['file_name'] = file.get('name', None)
eitem['file_type'] = file.get('type', None)

eitem['replies'] = len(message['replies']) if message.get('replies', None) else 0

eitem['total_reactions'] = 0
reactions = message.get('reactions', None)
if reactions:
reaction_types, total_reactions = self.__get_reactions(reactions)
eitem.update({'reactions': reaction_types})
eitem['total_reactions'] = total_reactions

eitem['total_mentions'] = 0
mentions = message.get('mentions', None)
if mentions:
eitem['mentions'] = self.__get_mentions(mentions)
eitem['total_mentions'] = len(mentions)

channel_info = message.get('channel_info', None)
if channel_info:
eitem.update(self.__get_channel_info(channel_info))

eitem['total_urls'] = 0
urls = message.get('urls', None)
if urls:
urls = [{'url': url['url']} for url in urls]
eitem['message_urls'] = urls
eitem['total_urls'] = len(urls)

if self.sortinghat:
eitem.update(self.get_item_sh(item))

if self.prjs_map:
eitem.update(self.get_item_project(eitem))

eitem.update(self.get_grimoire_fields(item["metadata__updated_on"], "message"))

self.add_repository_labels(eitem)
self.add_metadata_filter_raw(eitem)
return eitem

def __get_reactions(self, reactions):
"""Enrich reactions for the message"""

reaction_types = []
total_reactions = 0
for reaction_type in reactions:
reaction_data = reactions[reaction_type]
usernames = reaction_data.get('usernames', [])
names = reaction_data.get('names', [])
reaction_type = {
"type": reaction_type,
"username": usernames,
"names": names,
"count": len(usernames)
}
total_reactions += len(usernames)
reaction_types.append(reaction_type)

return reaction_types, total_reactions

def __get_mentions(self, mentioned):
"""Enrich users mentioned in the message"""

rich_mentions = []

for usr in mentioned:
rich_mention = {
'username': usr.get('username', None),
'id': usr.get('_id', None),
'name': usr.get('name', None)
}
rich_mentions.append(rich_mention)

return rich_mentions

def __get_channel_info(self, channel):
"""Enrich channel info of the message"""

rich_channel = {
'channel_id': channel['_id'],
'channel_updated_at': str_to_datetime(channel['_updatedAt']).isoformat(),
'channel_num_messages': channel.get('msgs', None),
'channel_name': channel.get('name', ''),
'channel_num_users': channel.get('usersCount', 0),
'channel_topic': channel.get('topic', ''),
}
rich_channel['avatar'] = ''
if 'lastMessage' in channel and channel['lastMessage']:
valeriocos marked this conversation as resolved.
Show resolved Hide resolved
rich_channel['avatar'] = channel['lastMessage']['avatar']

return rich_channel
67 changes: 67 additions & 0 deletions grimoire_elk/raw/rocketchat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2015-2020 Bitergia
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# Authors:
# Animesh Kumar<[email protected]>
#

from .elastic import ElasticOcean
from ..elastic_mapping import Mapping as BaseMapping


class Mapping(BaseMapping):

@staticmethod
def get_elastic_mappings(es_major):
"""Get Elasticsearch mapping.

:param es_major: major version of Elasticsearch, as string
:returns: dictionary with a key, 'items', with the mapping
"""

mapping = '''
{
"dynamic":true,
"properties": {
"data": {
"dynamic":false,
"properties": {}
}
}
}
'''

return {"items": mapping}


class RocketChatOcean(ElasticOcean):
"""RocketChat Ocean feeder"""

mapping = Mapping

@classmethod
def get_perceval_params_from_url(cls, url):
""" Get the perceval params given a URL for the data source """

params = []

tokens = url.split(' ')
server = tokens[0]
channel = tokens[1]
params.append(server)
params.append(channel)
return params
4 changes: 4 additions & 0 deletions grimoire_elk/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@
from perceval.backends.core.twitter import Twitter, TwitterCommand
from perceval.backends.puppet.puppetforge import PuppetForge, PuppetForgeCommand
from perceval.backends.core.redmine import Redmine, RedmineCommand
from perceval.backends.core.rocketchat import RocketChat, RocketChatCommand
from perceval.backends.core.rss import RSS, RSSCommand
from perceval.backends.core.slack import Slack, SlackCommand
from perceval.backends.core.stackexchange import StackExchange, StackExchangeCommand
Expand Down Expand Up @@ -114,6 +115,7 @@
from .enriched.puppetforge import PuppetForgeEnrich
from .enriched.redmine import RedmineEnrich
from .enriched.remo import ReMoEnrich
from .enriched.rocketchat import RocketChatEnrich
from .enriched.rss import RSSEnrich
from .enriched.slack import SlackEnrich
from .enriched.stackexchange import StackExchangeEnrich
Expand Down Expand Up @@ -156,6 +158,7 @@
from .raw.puppetforge import PuppetForgeOcean
from .raw.redmine import RedmineOcean
from .raw.remo import ReMoOcean
from .raw.rocketchat import RocketChatOcean
from .raw.rss import RSSOcean
from .raw.slack import SlackOcean
from .raw.stackexchange import StackExchangeOcean
Expand Down Expand Up @@ -257,6 +260,7 @@ def get_connectors():
"puppetforge": [PuppetForge, PuppetForgeOcean, PuppetForgeEnrich, PuppetForgeCommand],
"redmine": [Redmine, RedmineOcean, RedmineEnrich, RedmineCommand],
"remo": [ReMo, ReMoOcean, ReMoEnrich, ReMoCommand],
"rocketchat": [RocketChat, RocketChatOcean, RocketChatEnrich, RocketChatCommand],
"rss": [RSS, RSSOcean, RSSEnrich, RSSCommand],
"slack": [Slack, SlackOcean, SlackEnrich, SlackCommand],
"stackexchange": [StackExchange, StackExchangeOcean,
Expand Down
64 changes: 64 additions & 0 deletions schema/rocketchat.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
name,type,aggregatable,description
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please update the CSV by adding the new attributes and removing the old ones, thanks

author_bot,boolean,true,"True if the given author is identified as a bot."
author_domain,keyword,true,"Domain associated to the author in SortingHat profile."
author_gender,keyword,true,"Author gender."
author_gender_acc,keyword,true,"Accuracy to assess author gender."
author_id,keyword,true,"Author Id from SortingHat."
author_multi_org_names,keyword,true,"List of the author organizations from SortingHat profile."
author_name,keyword,true,"Author name."
author_org_name,keyword,true,"Author organization name from SortingHat profile."
author_user_name,keyword,true,"Author username from Sortinghat profile."
author_uuid,keyword,true,"Author UUID from SortingHat."
avatar,keyword,true,"Avatar associated with User profile."
channel_updated_at,date,true,"Date when channel was updated in UNIX timestamp format."
channel_id,keyword,long,true,"Channel Id of a Slack channel."
channel_num_users,long,true,"Number of members in a Rocketchat channel."
channel_num_messages,long,true,"Number of messages in a Rocketchat channel."
channel_name,keyword,true,"Channel Name."
valeriocos marked this conversation as resolved.
Show resolved Hide resolved
channel_topic,keyword,true,"Channel Topic."
edited_at,date,true,"Date message was updated in UNIX timestamp format."
edited_by_user_id,keyword,true,"Message editor's user id."
edited_by_username,keyword,true,"Message editor's username."
grimoire_creation_date,date,true,"Message creation date."
valeriocos marked this conversation as resolved.
Show resolved Hide resolved
file_id,keyword,true,"File id."
file_name,keyword,true,"File name."
file_type,keyword,true,"File type"
is_rocketchat_message,long,true,"1 indicating the item is a message."
valeriocos marked this conversation as resolved.
Show resolved Hide resolved
is_edited,long,true,"1 indicating the message has been edited"
valeriocos marked this conversation as resolved.
Show resolved Hide resolved
metadata__enriched_on,date,true,"Date when the item was enriched."
valeriocos marked this conversation as resolved.
Show resolved Hide resolved
metadata__gelk_backend_name,keyword,true,"Name of the backend used to enrich information."
valeriocos marked this conversation as resolved.
Show resolved Hide resolved
metadata__gelk_version,keyword,true,"Version of the backend used to enrich information."
metadata__timestamp,date,true,"Date when the item was stored in RAW index."
metadata__updated_on,date,true,"Date when the item was updated on its original data source."
metadata__filter_raw,keyword,true,"Raw filter"
valeriocos marked this conversation as resolved.
Show resolved Hide resolved
message_urls.url,keyword,true,"Url on a message."
mentions.id,keyword,true,"Mentions id"
mentions.name,keyword,true,"Mentions name"
mentions.username,keyword,true,"Mentions username"
msg,keyword,true,"Message text."
msg_analyzed,keyword,true,"Message body in plain text."
msg_id,keyword,true,"Message id."
msg_parent,keyword,true,"Message parent."
origin,keyword,true,"Original URL where the channel was retrieved from."
reactions.names,keyword,true,"Names of users who used a reaction type on a message."
reactions.type,keyword,true,"reaction type on a message."
reactions.username,keyword,true,"Usernames of users who used a reaction type on a message."
replies,long,true,"Number of replies on a message."
repository_labels,keyword,true,"Custom repository labels defined by the user."
rid,keyword,true,"Channel id."
tag,keyword,true,"Perceval tag."
total_reactions,long,true,"Number of reactions in a message"
total_urls,long,true,"Number of urls in a message"
total_mentions,long,true,"Number of mentions in a message"
user_username,keyword,true,"Rocketchat user."
valeriocos marked this conversation as resolved.
Show resolved Hide resolved
u_bot,long,true,"1 if the given user is identified as a bot."
u_domain,keyword,true,"Domain associated to the user in SortingHat profile."
u_gender,keyword,true,"User gender, based on her name (disabled by default)."
u_gender_acc,long,true,"User gender accuracy (disabled by default)."
u_id,keyword,true,"User Id from SortingHat."
u_multi_org_names,keyword,true,"List of the user organizations from SortingHat profile."
u_name,keyword,true,"User's name from SortingHat profile"
u_org_name,keyword,true,"User's organization name from SortingHat profile."
u_user_name,keyword,true,"User's username from SortingHat profile."
u_uuid,keyword,true,"User's UUID from SortingHat profile."
uuid,keyword,true,"Perceval UUID."
Loading