Skip to content

Commit

Permalink
Merge pull request #12 from lyft/tfeng_update_user_model
Browse files Browse the repository at this point in the history
Add User node with different user attributes
  • Loading branch information
Tao Feng authored Mar 5, 2019
2 parents 776f884 + 8c5b51a commit 37c700d
Show file tree
Hide file tree
Showing 7 changed files with 240 additions and 26 deletions.
25 changes: 9 additions & 16 deletions databuilder/models/table_column_usage.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
from typing import Iterable, Union, Dict, Any, Iterator # noqa: F401

from databuilder.models.neo4j_csv_serde import (
Neo4jCsvSerializable, NODE_KEY, NODE_LABEL, RELATION_START_KEY, RELATION_END_KEY,
Neo4jCsvSerializable, RELATION_START_KEY, RELATION_END_KEY,
RELATION_START_LABEL, RELATION_END_LABEL, RELATION_TYPE, RELATION_REVERSE_TYPE
)
from databuilder.models.table_metadata import TableMetadata
from databuilder.models.user import User
from databuilder.publisher.neo4j_csv_publisher import UNQUOTED_SUFFIX


Expand Down Expand Up @@ -42,10 +43,6 @@ class TableColumnUsage(Neo4jCsvSerializable):
A model represents user <--> column graph model
Currently it only support to serialize to table level
"""
USER_NODE_LABEL = 'User'
USER_NODE_KEY_FORMAT = '{email}'
USER_NODE_EMAIL = 'email'

TABLE_NODE_LABEL = TableMetadata.TABLE_NODE_LABEL
TABLE_NODE_KEY_FORMAT = TableMetadata.TABLE_KEY_FORMAT

Expand All @@ -64,7 +61,7 @@ def __init__(self,
raise NotImplementedError('Column is not supported yet {}'.format(col_readers))

self.col_readers = col_readers
self._node_iterator = self._create_next_node()
self._node_iterator = self._create_node_iterator()
self._rel_iter = self._create_rel_iterator()

def create_next_node(self):
Expand All @@ -75,16 +72,12 @@ def create_next_node(self):
except StopIteration:
return None

def _create_next_node(self):
def _create_node_iterator(self):
# type: () -> Iterator[Any]
for col_reader in self.col_readers:
if col_reader.column != '*':
raise NotImplementedError('Column is not supported yet {}'.format(col_reader))
yield {
NODE_LABEL: TableColumnUsage.USER_NODE_LABEL,
NODE_KEY: self._get_user_key(col_reader.user_email),
TableColumnUsage.USER_NODE_EMAIL: col_reader.user_email
}
if col_reader.column == '*':
# using yield for better memory efficiency
yield User(email=col_reader.user_email).create_nodes()[0]

def create_next_relation(self):
# type: () -> Union[Dict[str, Any], None]
Expand All @@ -99,7 +92,7 @@ def _create_rel_iterator(self):
for col_reader in self.col_readers:
yield {
RELATION_START_LABEL: TableMetadata.TABLE_NODE_LABEL,
RELATION_END_LABEL: TableColumnUsage.USER_NODE_LABEL,
RELATION_END_LABEL: User.USER_NODE_LABEL,
RELATION_START_KEY: self._get_table_key(col_reader),
RELATION_END_KEY: self._get_user_key(col_reader.user_email),
RELATION_TYPE: TableColumnUsage.TABLE_USER_RELATION_TYPE,
Expand All @@ -116,7 +109,7 @@ def _get_table_key(self, col_reader):

def _get_user_key(self, email):
# type: (str) -> str
return TableColumnUsage.USER_NODE_KEY_FORMAT.format(email=email)
return User.get_user_model_key(email=email)

def __repr__(self):
# type: () -> str
Expand Down
10 changes: 5 additions & 5 deletions databuilder/models/table_owner.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
NODE_LABEL, RELATION_START_KEY, RELATION_START_LABEL, RELATION_END_KEY, \
RELATION_END_LABEL, RELATION_TYPE, RELATION_REVERSE_TYPE

from databuilder.models.table_column_usage import TableColumnUsage
from databuilder.models.user import User


class TableOwner(Neo4jCsvSerializable):
Expand Down Expand Up @@ -50,7 +50,7 @@ def create_next_relation(self):
def get_owner_model_key(self, owner # type: str
):
# type: (...) -> str
return TableColumnUsage.USER_NODE_KEY_FORMAT.format(email=owner)
return User.USER_NODE_KEY_FORMAT.format(email=owner)

def get_metadata_model_key(self):
# type: (...) -> str
Expand All @@ -70,8 +70,8 @@ def create_nodes(self):
if owner:
results.append({
NODE_KEY: self.get_owner_model_key(owner),
NODE_LABEL: TableColumnUsage.USER_NODE_LABEL,
TableColumnUsage.USER_NODE_EMAIL: owner
NODE_LABEL: User.USER_NODE_LABEL,
User.USER_NODE_EMAIL: owner
})
return results

Expand All @@ -85,7 +85,7 @@ def create_relation(self):
for owner in self.owners:
results.append({
RELATION_START_KEY: self.get_owner_model_key(owner),
RELATION_START_LABEL: TableColumnUsage.USER_NODE_LABEL,
RELATION_START_LABEL: User.USER_NODE_LABEL,
RELATION_END_KEY: self.get_metadata_model_key(),
RELATION_END_LABEL: 'Table',
RELATION_TYPE: TableOwner.OWNER_TABLE_RELATION_TYPE,
Expand Down
161 changes: 161 additions & 0 deletions databuilder/models/user.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
from typing import Union, Dict, Any # noqa: F401

from databuilder.models.neo4j_csv_serde import Neo4jCsvSerializable, NODE_KEY, \
NODE_LABEL, RELATION_START_KEY, RELATION_START_LABEL, RELATION_END_KEY, \
RELATION_END_LABEL, RELATION_TYPE, RELATION_REVERSE_TYPE


class User(Neo4jCsvSerializable):
# type: (...) -> None
"""
User model. This model doesn't define any relationship.
"""
USER_NODE_LABEL = 'User'
USER_NODE_KEY_FORMAT = '{email}'
USER_NODE_EMAIL = 'email'
USER_NODE_FIRST_NAME = 'first_name'
USER_NODE_LAST_NAME = 'last_name'
USER_NODE_FULL_NAME = 'full_name'
USER_NODE_GITHUB_NAME = 'github_username'
USER_NODE_TEAM = 'team_name'
USER_NODE_EMPLOYEE_TYPE = 'employee_type'
USER_NODE_MANAGER_EMAIL = 'manager_email'
USER_NODE_SLACK_ID = 'slack_id'
USER_NODE_IS_ACTIVE = 'is_active'
USER_NODE_UPDATED_AT = 'updated_at'

USER_MANAGER_RELATION_TYPE = 'MANAGE_BY'
MANAGER_USER_RELATION_TYPE = 'MANAGE'

def __init__(self,
email, # type: str
first_name='', # type: str
last_name='', # type: str
name='', # type: str
github_username='', # type: str
team_name='', # type: str
employee_type='', # type: str
manager_email='', # type: str
slack_id='', # type: str
is_active=True, # type: bool
updated_at=0, # type: int
):
# type: (...) -> None
"""
This class models user node for Amundsen people.
:param first_name:
:param last_name:
:param name:
:param email:
:param github_username:
:param team_name:
:param employee_type:
:param manager_email:
:param is_active:
:param updated_at: everytime we update the node, we will push the timestamp.
then we will have a cron job to update the ex-employee nodes based on
the case if this timestamp hasn't been updated for two weeks.
"""
self.first_name = first_name
self.last_name = last_name
self.name = name
self.email = email
self.github_username = github_username
# todo: team will be a separate node once Amundsen People supports team
self.team_name = team_name
self.manager_email = manager_email
self.employee_type = employee_type
# this attr not available in team service, either update team service, update with FE
self.slack_id = slack_id
self.is_active = is_active
self.updated_at = updated_at

self._node_iter = iter(self.create_nodes())
self._rel_iter = iter(self.create_relation())

def create_next_node(self):
# type: (...) -> Union[Dict[str, Any], None]
# return the string representation of the data
try:
return next(self._node_iter)
except StopIteration:
return None

def create_next_relation(self):
# type: () -> Union[Dict[str, Any], None]
"""
:return:
"""
try:
return next(self._rel_iter)
except StopIteration:
return None

@classmethod
def get_user_model_key(cls,
email=None):
# type: (...) -> str
if not email:
return ''
return User.USER_NODE_KEY_FORMAT.format(email=email)

def create_nodes(self):
# type: () -> List[Dict[str, Any]]
"""
Create a list of Neo4j node records
:return:
"""
result_node = {
NODE_KEY: User.get_user_model_key(email=self.email),
NODE_LABEL: User.USER_NODE_LABEL,
User.USER_NODE_EMAIL: self.email,
User.USER_NODE_IS_ACTIVE: self.is_active,
}

if self.first_name:
result_node[User.USER_NODE_FIRST_NAME] = self.first_name
if self.last_name:
result_node[User.USER_NODE_LAST_NAME] = self.last_name
if self.name:
result_node[User.USER_NODE_FULL_NAME] = self.name
if self.github_username:
result_node[User.USER_NODE_GITHUB_NAME] = self.github_username
if self.team_name:
result_node[User.USER_NODE_TEAM] = self.team_name
if self.employee_type:
result_node[User.USER_NODE_EMPLOYEE_TYPE] = self.employee_type
if self.slack_id:
result_node[User.USER_NODE_SLACK_ID] = self.slack_id
if self.updated_at:
result_node[User.USER_NODE_UPDATED_AT] = self.updated_at

return [result_node]

def create_relation(self):
# type: () -> List[Dict[str, Any]]
if self.manager_email:
# only create the relation if the manager exists
return [{
RELATION_START_KEY: User.get_user_model_key(email=self.email),
RELATION_START_LABEL: User.USER_NODE_LABEL,
RELATION_END_KEY: self.get_user_model_key(email=self.manager_email),
RELATION_END_LABEL: User.USER_NODE_LABEL,
RELATION_TYPE: User.USER_MANAGER_RELATION_TYPE,
RELATION_REVERSE_TYPE: User.MANAGER_USER_RELATION_TYPE
}]
return []

def __repr__(self):
# type: () -> str
return 'User({!r}, {!r}, {!r}, {!r}, {!r}, ' \
'{!r}, {!r}, {!r}, {!r}, {!r}, {!r},)'.format(self.first_name,
self.last_name,
self.name,
self.email,
self.github_username,
self.team_name,
self.slack_id,
self.manager_email,
self.employee_type,
self.is_active)
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from setuptools import setup, find_packages


__version__ = '1.0.4'
__version__ = '1.0.5'


setup(
name='amundsen-databuilder',
Expand Down
10 changes: 8 additions & 2 deletions tests/unit/models/test_table_column_usage.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,14 @@ def test_serialize(self):
actual.append(node_row)
node_row = table_col_usage.next_node()

expected = [{'email': '[email protected]', 'KEY': '[email protected]', 'LABEL': 'User'},
{'email': '[email protected]', 'KEY': '[email protected]', 'LABEL': 'User'}]
expected = [{'is_active': True,
'LABEL': 'User',
'KEY': '[email protected]',
'email': '[email protected]'},
{'is_active': True,
'LABEL': 'User',
'KEY': '[email protected]',
'email': '[email protected]'}]
self.assertEqual(expected, actual)

rel_row = table_col_usage.next_relation()
Expand Down
4 changes: 2 additions & 2 deletions tests/unit/models/test_table_owner.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import unittest
from databuilder.models.table_column_usage import TableColumnUsage
from databuilder.models.user import User
from databuilder.models.table_owner import TableOwner


Expand Down Expand Up @@ -46,7 +46,7 @@ def test_create_relation(self):

relation = {
RELATION_START_KEY: 'user1@1',
RELATION_START_LABEL: TableColumnUsage.USER_NODE_LABEL,
RELATION_START_LABEL: User.USER_NODE_LABEL,
RELATION_END_KEY: self.table_owner.get_metadata_model_key(),
RELATION_END_LABEL: 'Table',
RELATION_TYPE: TableOwner.OWNER_TABLE_RELATION_TYPE,
Expand Down
53 changes: 53 additions & 0 deletions tests/unit/models/test_user.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import unittest

from databuilder.models.neo4j_csv_serde import RELATION_START_KEY, RELATION_START_LABEL, RELATION_END_KEY, \
RELATION_END_LABEL, RELATION_TYPE, RELATION_REVERSE_TYPE

from databuilder.models.user import User


class TestUser(unittest.TestCase):

def setUp(self):
# type: () -> None
super(TestUser, self).setUp()
self.user = User(first_name='test_first',
last_name='test_last',
name='test_first test_last',
email='[email protected]',
github_username='github_test',
team_name='test_team',
employee_type='FTE',
manager_email='[email protected]',
slack_id='slack',
is_active=True,
updated_at=1)

def test_get_user_model_key(self):
# type: () -> None
user_email = User.get_user_model_key(email=self.user.email)
self.assertEquals(user_email, '{email}'.format(email='[email protected]'))

def test_create_nodes(self):
# type: () -> None
nodes = self.user.create_nodes()
self.assertEquals(len(nodes), 1)

def test_create_relation(self):
# type: () -> None
relations = self.user.create_relation()
self.assertEquals(len(relations), 1)

start_key = '{email}'.format(email='[email protected]')
end_key = '{email}'.format(email='[email protected]')

relation = {
RELATION_START_KEY: start_key,
RELATION_START_LABEL: User.USER_NODE_LABEL,
RELATION_END_KEY: end_key,
RELATION_END_LABEL: User.USER_NODE_LABEL,
RELATION_TYPE: User.USER_MANAGER_RELATION_TYPE,
RELATION_REVERSE_TYPE: User.MANAGER_USER_RELATION_TYPE
}

self.assertTrue(relation in relations)

0 comments on commit 37c700d

Please sign in to comment.