-
Notifications
You must be signed in to change notification settings - Fork 0
/
column_info.py
63 lines (49 loc) · 1.43 KB
/
column_info.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
'''
Created on May 7, 2011
@author: etherealite
'''
import os, sys
proj_dir = os.path.abspath(os.path.dirname(__file__))
packages_dir = os.path.join(proj_dir, 'packages')
sys.path.append(packages_dir)
import settings
from mysql_env import cursor, db_connect
# Create a new cursor for mysql introspection DB.
info_c = db_connect('information_schema')
cursor.execute("DROP TABLE IF EXISTS column_dups")
create_info = """
CREATE TABLE column_dups
(
id INT(11) AUTO_INCREMENT,
attribute varchar(60),
value varchar(60),
occurrances INT(11),
table_name varchar(60),
PRIMARY KEY(id)
)
"""
cursor.execute(create_info)
get_columns = """
SELECT column_name FROM COLUMNS
WHERE table_schema = '%s'
AND table_name = 'master'
""" % settings.DATABASE
info_c.execute(get_columns)
columns = info_c.fetchall()
columns = tuple(map(lambda col: col[0], columns ))
cursor.execute("SELECT distinct(source) FROM master")
sources = tuple(map(lambda res: res[0], cursor.fetchall()))
do_tally_base = r"""
SELECT * FROM
(SELECT %s as sb1, COUNT(*) as sb2, source as sb3 FROM master
WHERE source = '%s'
AND %s IS NOT NULL
GROUP BY %s ORDER BY COUNT(*) DESC) as t2
WHERE sb2 > 1
"""
for source in sources:
for column in columns:
do_tally = do_tally_base % (column, source, column, column)
cursor.execute(do_tally)
stuff = cursor.fetchall()
if len(stuff) > 0: print stuff