Skip to content

Commit

Permalink
Excluded relationship properties when doing property comparison (to f…
Browse files Browse the repository at this point in the history
…ind duplicate IDs)
  • Loading branch information
n2iw committed Mar 8, 2022
1 parent 3d9511f commit ae18b7d
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 4 deletions.
2 changes: 1 addition & 1 deletion bento
Submodule bento updated 1 files
+5 −0 common/utils.py
16 changes: 13 additions & 3 deletions data_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from bento.common.utils import get_logger, NODES_CREATED, RELATIONSHIP_CREATED, UUID, \
RELATIONSHIP_TYPE, MULTIPLIER, ONE_TO_ONE, DEFAULT_MULTIPLIER, UPSERT_MODE, \
NEW_MODE, DELETE_MODE, NODES_DELETED, RELATIONSHIP_DELETED, combined_dict_counters, \
MISSING_PARENT, NODE_LOADED
MISSING_PARENT, NODE_LOADED, get_string_md5

NODE_TYPE = 'type'
PROP_TYPE = 'Type'
Expand Down Expand Up @@ -127,6 +127,16 @@ def check_encoding(file_name):
return windows1252


# Mask all relationship properties, so they won't participate in property comparison
def get_props_signature(props):
clean_props = props
for key in clean_props.keys():
if '$' in key:
clean_props[key] = ''
signature = get_string_md5(str(clean_props))
return signature


class DataLoader:
def __init__(self, driver, schema, plugins=None):
if plugins is None:
Expand Down Expand Up @@ -489,7 +499,7 @@ def validate_file(self, file_name, max_violations):
node_id = self.schema.get_id(obj)
if node_id:
if node_id in ids:
if props != ids[node_id]['props']:
if get_props_signature(props) != ids[node_id]['props']:
validation_failed = True
self.log.error(
f'Invalid data at line {line_num}: duplicate {id_field}: {node_id}, found in line: '
Expand All @@ -502,7 +512,7 @@ def validate_file(self, file_name, max_violations):
f'Duplicated data at line {line_num}: duplicate {id_field}: {node_id}, found in line: '
f'{", ".join(ids[node_id]["lines"])}')
else:
ids[node_id] = {'props': props, 'lines': [str(line_num)]}
ids[node_id] = {'props': get_props_signature(props), 'lines': [str(line_num)]}

validate_result = self.schema.validate_node(obj[NODE_TYPE], obj)
if not validate_result['result']:
Expand Down

0 comments on commit ae18b7d

Please sign in to comment.