diff --git a/databuilder/models/table_column_usage.py b/databuilder/models/table_column_usage.py index 3cdcf4f54..d26e6aee5 100644 --- a/databuilder/models/table_column_usage.py +++ b/databuilder/models/table_column_usage.py @@ -1,10 +1,11 @@ from typing import Iterable, Union, Dict, Any, Iterator # noqa: F401 from databuilder.models.neo4j_csv_serde import ( - Neo4jCsvSerializable, NODE_KEY, NODE_LABEL, RELATION_START_KEY, RELATION_END_KEY, + Neo4jCsvSerializable, RELATION_START_KEY, RELATION_END_KEY, RELATION_START_LABEL, RELATION_END_LABEL, RELATION_TYPE, RELATION_REVERSE_TYPE ) from databuilder.models.table_metadata import TableMetadata +from databuilder.models.user import User from databuilder.publisher.neo4j_csv_publisher import UNQUOTED_SUFFIX @@ -42,10 +43,6 @@ class TableColumnUsage(Neo4jCsvSerializable): A model represents user <--> column graph model Currently it only support to serialize to table level """ - USER_NODE_LABEL = 'User' - USER_NODE_KEY_FORMAT = '{email}' - USER_NODE_EMAIL = 'email' - TABLE_NODE_LABEL = TableMetadata.TABLE_NODE_LABEL TABLE_NODE_KEY_FORMAT = TableMetadata.TABLE_KEY_FORMAT @@ -64,7 +61,7 @@ def __init__(self, raise NotImplementedError('Column is not supported yet {}'.format(col_readers)) self.col_readers = col_readers - self._node_iterator = self._create_next_node() + self._node_iterator = self._create_node_iterator() self._rel_iter = self._create_rel_iterator() def create_next_node(self): @@ -75,16 +72,12 @@ def create_next_node(self): except StopIteration: return None - def _create_next_node(self): + def _create_node_iterator(self): # type: () -> Iterator[Any] for col_reader in self.col_readers: - if col_reader.column != '*': - raise NotImplementedError('Column is not supported yet {}'.format(col_reader)) - yield { - NODE_LABEL: TableColumnUsage.USER_NODE_LABEL, - NODE_KEY: self._get_user_key(col_reader.user_email), - TableColumnUsage.USER_NODE_EMAIL: col_reader.user_email - } + if col_reader.column == '*': + # using yield for better memory efficiency + yield User(email=col_reader.user_email).create_nodes()[0] def create_next_relation(self): # type: () -> Union[Dict[str, Any], None] @@ -99,7 +92,7 @@ def _create_rel_iterator(self): for col_reader in self.col_readers: yield { RELATION_START_LABEL: TableMetadata.TABLE_NODE_LABEL, - RELATION_END_LABEL: TableColumnUsage.USER_NODE_LABEL, + RELATION_END_LABEL: User.USER_NODE_LABEL, RELATION_START_KEY: self._get_table_key(col_reader), RELATION_END_KEY: self._get_user_key(col_reader.user_email), RELATION_TYPE: TableColumnUsage.TABLE_USER_RELATION_TYPE, @@ -116,7 +109,7 @@ def _get_table_key(self, col_reader): def _get_user_key(self, email): # type: (str) -> str - return TableColumnUsage.USER_NODE_KEY_FORMAT.format(email=email) + return User.get_user_model_key(email=email) def __repr__(self): # type: () -> str diff --git a/databuilder/models/table_owner.py b/databuilder/models/table_owner.py index 1d9c35a0a..1cb753bdf 100644 --- a/databuilder/models/table_owner.py +++ b/databuilder/models/table_owner.py @@ -4,7 +4,7 @@ NODE_LABEL, RELATION_START_KEY, RELATION_START_LABEL, RELATION_END_KEY, \ RELATION_END_LABEL, RELATION_TYPE, RELATION_REVERSE_TYPE -from databuilder.models.table_column_usage import TableColumnUsage +from databuilder.models.user import User class TableOwner(Neo4jCsvSerializable): @@ -50,7 +50,7 @@ def create_next_relation(self): def get_owner_model_key(self, owner # type: str ): # type: (...) -> str - return TableColumnUsage.USER_NODE_KEY_FORMAT.format(email=owner) + return User.USER_NODE_KEY_FORMAT.format(email=owner) def get_metadata_model_key(self): # type: (...) -> str @@ -70,8 +70,8 @@ def create_nodes(self): if owner: results.append({ NODE_KEY: self.get_owner_model_key(owner), - NODE_LABEL: TableColumnUsage.USER_NODE_LABEL, - TableColumnUsage.USER_NODE_EMAIL: owner + NODE_LABEL: User.USER_NODE_LABEL, + User.USER_NODE_EMAIL: owner }) return results @@ -85,7 +85,7 @@ def create_relation(self): for owner in self.owners: results.append({ RELATION_START_KEY: self.get_owner_model_key(owner), - RELATION_START_LABEL: TableColumnUsage.USER_NODE_LABEL, + RELATION_START_LABEL: User.USER_NODE_LABEL, RELATION_END_KEY: self.get_metadata_model_key(), RELATION_END_LABEL: 'Table', RELATION_TYPE: TableOwner.OWNER_TABLE_RELATION_TYPE, diff --git a/databuilder/models/user.py b/databuilder/models/user.py new file mode 100644 index 000000000..6ed6ff79d --- /dev/null +++ b/databuilder/models/user.py @@ -0,0 +1,161 @@ +from typing import Union, Dict, Any # noqa: F401 + +from databuilder.models.neo4j_csv_serde import Neo4jCsvSerializable, NODE_KEY, \ + NODE_LABEL, RELATION_START_KEY, RELATION_START_LABEL, RELATION_END_KEY, \ + RELATION_END_LABEL, RELATION_TYPE, RELATION_REVERSE_TYPE + + +class User(Neo4jCsvSerializable): + # type: (...) -> None + """ + User model. This model doesn't define any relationship. + """ + USER_NODE_LABEL = 'User' + USER_NODE_KEY_FORMAT = '{email}' + USER_NODE_EMAIL = 'email' + USER_NODE_FIRST_NAME = 'first_name' + USER_NODE_LAST_NAME = 'last_name' + USER_NODE_FULL_NAME = 'full_name' + USER_NODE_GITHUB_NAME = 'github_username' + USER_NODE_TEAM = 'team_name' + USER_NODE_EMPLOYEE_TYPE = 'employee_type' + USER_NODE_MANAGER_EMAIL = 'manager_email' + USER_NODE_SLACK_ID = 'slack_id' + USER_NODE_IS_ACTIVE = 'is_active' + USER_NODE_UPDATED_AT = 'updated_at' + + USER_MANAGER_RELATION_TYPE = 'MANAGE_BY' + MANAGER_USER_RELATION_TYPE = 'MANAGE' + + def __init__(self, + email, # type: str + first_name='', # type: str + last_name='', # type: str + name='', # type: str + github_username='', # type: str + team_name='', # type: str + employee_type='', # type: str + manager_email='', # type: str + slack_id='', # type: str + is_active=True, # type: bool + updated_at=0, # type: int + ): + # type: (...) -> None + """ + This class models user node for Amundsen people. + + :param first_name: + :param last_name: + :param name: + :param email: + :param github_username: + :param team_name: + :param employee_type: + :param manager_email: + :param is_active: + :param updated_at: everytime we update the node, we will push the timestamp. + then we will have a cron job to update the ex-employee nodes based on + the case if this timestamp hasn't been updated for two weeks. + """ + self.first_name = first_name + self.last_name = last_name + self.name = name + self.email = email + self.github_username = github_username + # todo: team will be a separate node once Amundsen People supports team + self.team_name = team_name + self.manager_email = manager_email + self.employee_type = employee_type + # this attr not available in team service, either update team service, update with FE + self.slack_id = slack_id + self.is_active = is_active + self.updated_at = updated_at + + self._node_iter = iter(self.create_nodes()) + self._rel_iter = iter(self.create_relation()) + + def create_next_node(self): + # type: (...) -> Union[Dict[str, Any], None] + # return the string representation of the data + try: + return next(self._node_iter) + except StopIteration: + return None + + def create_next_relation(self): + # type: () -> Union[Dict[str, Any], None] + """ + :return: + """ + try: + return next(self._rel_iter) + except StopIteration: + return None + + @classmethod + def get_user_model_key(cls, + email=None): + # type: (...) -> str + if not email: + return '' + return User.USER_NODE_KEY_FORMAT.format(email=email) + + def create_nodes(self): + # type: () -> List[Dict[str, Any]] + """ + Create a list of Neo4j node records + :return: + """ + result_node = { + NODE_KEY: User.get_user_model_key(email=self.email), + NODE_LABEL: User.USER_NODE_LABEL, + User.USER_NODE_EMAIL: self.email, + User.USER_NODE_IS_ACTIVE: self.is_active, + } + + if self.first_name: + result_node[User.USER_NODE_FIRST_NAME] = self.first_name + if self.last_name: + result_node[User.USER_NODE_LAST_NAME] = self.last_name + if self.name: + result_node[User.USER_NODE_FULL_NAME] = self.name + if self.github_username: + result_node[User.USER_NODE_GITHUB_NAME] = self.github_username + if self.team_name: + result_node[User.USER_NODE_TEAM] = self.team_name + if self.employee_type: + result_node[User.USER_NODE_EMPLOYEE_TYPE] = self.employee_type + if self.slack_id: + result_node[User.USER_NODE_SLACK_ID] = self.slack_id + if self.updated_at: + result_node[User.USER_NODE_UPDATED_AT] = self.updated_at + + return [result_node] + + def create_relation(self): + # type: () -> List[Dict[str, Any]] + if self.manager_email: + # only create the relation if the manager exists + return [{ + RELATION_START_KEY: User.get_user_model_key(email=self.email), + RELATION_START_LABEL: User.USER_NODE_LABEL, + RELATION_END_KEY: self.get_user_model_key(email=self.manager_email), + RELATION_END_LABEL: User.USER_NODE_LABEL, + RELATION_TYPE: User.USER_MANAGER_RELATION_TYPE, + RELATION_REVERSE_TYPE: User.MANAGER_USER_RELATION_TYPE + }] + return [] + + def __repr__(self): + # type: () -> str + return 'User({!r}, {!r}, {!r}, {!r}, {!r}, ' \ + '{!r}, {!r}, {!r}, {!r}, {!r}, {!r},)'.format(self.first_name, + self.last_name, + self.name, + self.email, + self.github_username, + self.team_name, + self.slack_id, + self.manager_email, + self.employee_type, + self.is_active) diff --git a/setup.py b/setup.py index 8e0f8cace..eb36cd206 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,8 @@ from setuptools import setup, find_packages -__version__ = '1.0.4' +__version__ = '1.0.5' + setup( name='amundsen-databuilder', diff --git a/tests/unit/models/test_table_column_usage.py b/tests/unit/models/test_table_column_usage.py index 1f0d3a0d0..731b46b97 100644 --- a/tests/unit/models/test_table_column_usage.py +++ b/tests/unit/models/test_table_column_usage.py @@ -22,8 +22,14 @@ def test_serialize(self): actual.append(node_row) node_row = table_col_usage.next_node() - expected = [{'email': 'john@example.com', 'KEY': 'john@example.com', 'LABEL': 'User'}, - {'email': 'jane@example.com', 'KEY': 'jane@example.com', 'LABEL': 'User'}] + expected = [{'is_active': True, + 'LABEL': 'User', + 'KEY': 'john@example.com', + 'email': 'john@example.com'}, + {'is_active': True, + 'LABEL': 'User', + 'KEY': 'jane@example.com', + 'email': 'jane@example.com'}] self.assertEqual(expected, actual) rel_row = table_col_usage.next_relation() diff --git a/tests/unit/models/test_table_owner.py b/tests/unit/models/test_table_owner.py index c3b72dc3f..bb582b5ab 100644 --- a/tests/unit/models/test_table_owner.py +++ b/tests/unit/models/test_table_owner.py @@ -1,5 +1,5 @@ import unittest -from databuilder.models.table_column_usage import TableColumnUsage +from databuilder.models.user import User from databuilder.models.table_owner import TableOwner @@ -46,7 +46,7 @@ def test_create_relation(self): relation = { RELATION_START_KEY: 'user1@1', - RELATION_START_LABEL: TableColumnUsage.USER_NODE_LABEL, + RELATION_START_LABEL: User.USER_NODE_LABEL, RELATION_END_KEY: self.table_owner.get_metadata_model_key(), RELATION_END_LABEL: 'Table', RELATION_TYPE: TableOwner.OWNER_TABLE_RELATION_TYPE, diff --git a/tests/unit/models/test_user.py b/tests/unit/models/test_user.py new file mode 100644 index 000000000..248bdc4d5 --- /dev/null +++ b/tests/unit/models/test_user.py @@ -0,0 +1,53 @@ +import unittest + +from databuilder.models.neo4j_csv_serde import RELATION_START_KEY, RELATION_START_LABEL, RELATION_END_KEY, \ + RELATION_END_LABEL, RELATION_TYPE, RELATION_REVERSE_TYPE + +from databuilder.models.user import User + + +class TestUser(unittest.TestCase): + + def setUp(self): + # type: () -> None + super(TestUser, self).setUp() + self.user = User(first_name='test_first', + last_name='test_last', + name='test_first test_last', + email='test@email.com', + github_username='github_test', + team_name='test_team', + employee_type='FTE', + manager_email='test_manager@email.com', + slack_id='slack', + is_active=True, + updated_at=1) + + def test_get_user_model_key(self): + # type: () -> None + user_email = User.get_user_model_key(email=self.user.email) + self.assertEquals(user_email, '{email}'.format(email='test@email.com')) + + def test_create_nodes(self): + # type: () -> None + nodes = self.user.create_nodes() + self.assertEquals(len(nodes), 1) + + def test_create_relation(self): + # type: () -> None + relations = self.user.create_relation() + self.assertEquals(len(relations), 1) + + start_key = '{email}'.format(email='test@email.com') + end_key = '{email}'.format(email='test_manager@email.com') + + relation = { + RELATION_START_KEY: start_key, + RELATION_START_LABEL: User.USER_NODE_LABEL, + RELATION_END_KEY: end_key, + RELATION_END_LABEL: User.USER_NODE_LABEL, + RELATION_TYPE: User.USER_MANAGER_RELATION_TYPE, + RELATION_REVERSE_TYPE: User.MANAGER_USER_RELATION_TYPE + } + + self.assertTrue(relation in relations)