Skip to content

Commit

Permalink
fix for issue BDNYC#46 when merging sources table
Browse files Browse the repository at this point in the history
  • Loading branch information
dr-rodriguez committed May 10, 2016
1 parent 14c9f07 commit 7d81d2f
Showing 1 changed file with 23 additions and 8 deletions.
31 changes: 23 additions & 8 deletions astrodbkit/astrodb.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,14 +264,29 @@ def clean_up(self, table):

while any(duplicate):
# Pull out duplicates one by one
SQL = "SELECT t1.id, t2.id FROM {0} t1 JOIN {0} t2 ON t1.source_id=t2.source_id WHERE t1.id!=t2.id AND {1}{2}{3}" \
.format(table, ' AND '.join(['t1.{0}=t2.{0}'.format(i) for i in req_keys]), (' AND ' \
+ ' AND '.join(
["(t1.id NOT IN ({0}) and t2.id NOT IN ({0}))".format(','.join(map(str, [id1, id2]))) for id1, id2 \
in zip(ignore['id1'], ignore['id2'])])) if any(ignore) else '', (' AND ' \
+ ' AND '.join(
["(t1.id NOT IN ({0}) and t2.id NOT IN ({0}))".format(','.join(map(str, ni))) for ni \
in new_ignore])) if new_ignore else '')
if table.lower() != 'sources':
SQL = "SELECT t1.id, t2.id FROM {0} t1 JOIN {0} t2 ON t1.source_id=t2.source_id " \
"WHERE t1.id!=t2.id AND {1}{2}{3}"\
.format(table,
' AND '.join(['t1.{0}=t2.{0}'.format(i) for i in req_keys]),
(' AND ' + ' AND '.join(["(t1.id NOT IN ({0}) and t2.id NOT IN ({0}))"
.format(','.join(map(str, [id1, id2]))) for id1, id2
in zip(ignore['id1'], ignore['id2'])]))
if any(ignore) else '',
(' AND ' + ' AND '.join(["(t1.id NOT IN ({0}) and t2.id NOT IN ({0}))"
.format(','.join(map(str, ni))) for ni in new_ignore]))
if new_ignore else '')
else:
SQL = "SELECT t1.id, t2.id FROM {0} t1 JOIN {0} t2 ON t1.id=t2.id WHERE {1}{2}{3}" \
.format(table,
' AND '.join(['t1.{0}=t2.{0}'.format(i) for i in req_keys]),
(' AND ' + ' AND '.join(["(t1.id NOT IN ({0}) and t2.id NOT IN ({0}))"
.format(','.join(map(str, [id1, id2]))) for id1, id2
in zip(ignore['id1'], ignore['id2'])]))
if any(ignore) else '',
(' AND ' + ' AND '.join(["(t1.id NOT IN ({0}) and t2.id NOT IN ({0}))"
.format(','.join(map(str, ni))) for ni in new_ignore]))
if new_ignore else '')

duplicate = self.query(SQL, fetch='one')

Expand Down

0 comments on commit 7d81d2f

Please sign in to comment.