diff --git a/.gitignore b/.gitignore index f39e015..97dcd3c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,11 @@ - +sqlcell.db +sqlcell_dev*.ipynb +sqlcell_demo*.ipynb *.pyc - +**/.ipynb_checkpoints +.DS_Store +*.egg-info +dist/* +dist/sqlcell-* +**sqlcell.db** +.vscode/* diff --git a/.idea/SQLCell.iml b/.idea/SQLCell.iml new file mode 100644 index 0000000..5b205b3 --- /dev/null +++ b/.idea/SQLCell.iml @@ -0,0 +1,14 @@ + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/R_User_Library.xml b/.idea/libraries/R_User_Library.xml new file mode 100644 index 0000000..71f5ff7 --- /dev/null +++ b/.idea/libraries/R_User_Library.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..595ee64 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,7 @@ + + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..04b735a --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/.idea/workspace.xml b/.idea/workspace.xml new file mode 100644 index 0000000..d2ded4f --- /dev/null +++ b/.idea/workspace.xml @@ -0,0 +1,201 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + hook + run_query + engine_var + enginehan + get_engine + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ') -}; \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..8c1b846 --- /dev/null +++ b/setup.py @@ -0,0 +1,25 @@ +# -*- coding: utf-8 -*- +from setuptools import setup, find_packages + +setup( + name="sqlcell", + version="0.2.0.15", + description="run sql in jupyter notebooks or jupyter lab", + license="MIT", + author="Tim Dobbins", + author_email="noneya@gmail.com", + url="https://github.com/tmthyjames/SQLCell", + packages=find_packages(), + install_requires=[ + 'ipython', + 'ipywidgets', + 'sqlalchemy', + 'pandas' + ], + classifiers=[ + "Programming Language :: Python", + "Programming Language :: Python :: 3.6", + ], + package_dir={'sqlcell': 'sqlcell'}, + py_modules=['sqlcell'] +) \ No newline at end of file diff --git a/sqlcell.py b/sqlcell.py deleted file mode 100644 index c7e2971..0000000 --- a/sqlcell.py +++ /dev/null @@ -1,617 +0,0 @@ -# __builtin__'s used with MAKE_GLOBAL param so data can -# be passed from this module to the notebook without referencing -# a class or any other object. We can just call the variable that's -# passed to MAKE_GLOBAL -import __builtin__ -import re -import fileinput -import time -import uuid -import subprocess -import sys -import threading -import logging -from os.path import expanduser - -import IPython -from IPython.display import Javascript -from IPython.core.display import display, HTML - -from sqlalchemy import create_engine, exc - -from .engines.engine_config import driver, username, password, host, port, default_db -from .engines.engines import __ENGINES_JSON_DUMPS__ - - -display(Javascript("""$.getScript( "js/editableTableWidget.js");""")) - -unique_db_id = str(uuid.uuid4()) -jupyter_id = 'jupyter' + unique_db_id -application_name = '?application_name='+jupyter_id - - -engine = create_engine(driver+'://'+username+':'+password+'@'+host+':'+port+'/'+default_db+application_name) - - -class __KERNEL_VARS__(object): - g = {} - - -class __SQLCell_GLOBAL_VARS__(object): - - jupyter_id = jupyter_id - engine = engine - EDIT = False - - logger = logging.getLogger() - handler = logging.StreamHandler() - logger.setLevel(logging.DEBUG) - - def kill_last_pid_on_new_thread(self, app, db, unique_id): - t = threading.Thread(target=kill_last_pid, args=(app, db)) - t.start() - HTMLTable([], unique_id).display(msg='
QUERY DEAD...
') - return True - - @staticmethod - def update_table(sql): - return engine.execute(sql) - - -def threaded(fn): - def wrapper(*args, **kwargs): - threading.Thread(target=fn, args=args, kwargs=kwargs).start() - return wrapper - -class QUERY(object): - raw = '' - history = [] - -class HTMLTable(list): - """ - Creates an HTML table if pandas isn't installed. - The .empty attribute takes the place of df.empty, - and to_csv takes the place of df.to_csv. - """ - - def __init__(self, data, id_): - self.id_ = id_ - self.data = data - - empty = [] - - def _repr_html_(self, n_rows=100): - table = '' - thead = '' - tbody = '' - for n,row in enumerate(self.data): - if n == 0: - thead += '' ''.join([('') for r in row]) - elif n > n_rows: - break - else: - tbody += '' + ''.join([('') for r in row]) + '' - # tbody += '' + ''.join([('') for r in row]) + '' # for adding new row - thead += '' - tbody += '' - table += thead + tbody - return table - - @threaded - def display(self, columns=[], msg=None): - table_str = HTMLTable([columns] + self.data, self.id_)._repr_html_(n_rows=100) - table_str = table_str.replace(' - .input { - position:relative; - } - #childDiv'''+unique_id+''' { - width: 90%; - position:absolute; - } - #table'''+unique_id+'''{ - padding-top: 40px; - } - -
-
- - - - - -
-
-
-
-
- - ''' - ) - ) - - psql_command = False - if cell.startswith('\\'): - psql_command = True - db_name = db if isinstance(db, (str, unicode)) else __ENGINE__.url.database - - commands = '' - for i in cell.strip().split(';'): - if i: - commands += ' -c ' + '"'+i+'" ' - commands = 'psql ' + db_name + commands + '-H' - - matches = re.findall(r'%\([a-zA-Z_][a-zA-Z0-9_]*\)s', cell) - - connection = engine.connect() - t0 = time.time() - - try: - if not psql_command: - data = connection.execute(cell, reduce(build_dict, matches, {})) - t1 = time.time() - t0 - columns = data.keys() - table_data = [i for i in data] if 'pd' in globals() else [columns] + [i for i in data] - if 'DISPLAY' in locals(): - if not DISPLAY: - if 'MAKE_GLOBAL' in locals(): - exec('__builtin__.' + glovar[1] + '=table_data') - else: - exec('__builtin__.DATA=table_data') - glovar = ['', 'DATA'] - print 'To execute: ' + str(round(t1, 3)) + ' sec', '|', - print 'Rows:', len(table_data), '|', - print 'DB:', engine.url.database, '| Host:', engine.url.host - print 'data not displayed but captured in variable: ' + glovar[1] - return None - df = to_table(table_data) - else: - output = subprocess.check_output(commands, shell=True) - t1 = time.time() - t0 - if '
' + ' ' + '' + str(r) + '
' + str(n) + '' + str(r).replace(' ', '   ') + '
To execute: %s sec | ' - +'To render: %s sec | ' - +'Rows: %s | ' - +'DB: %s | Host: %s' - ) - """ % (str(round(t1, 3)), str(round(t3, 3)), len(df.index), engine.url.database, engine.url.host) - ) - ) - - table_name = re.search('from\s*([a-z_][a-z\-_0-9]{,})', cell, re.IGNORECASE) - table_name = None if not table_name else table_name.group(1).strip() - - if __SQLCell_GLOBAL_VARS__.EDIT: - __SQLCell_GLOBAL_VARS__.EDIT = False - primary_key_results = engine.execute(""" - SELECT - %(table_name)s as table_name, pg_attribute.attname as column_name - FROM pg_index, pg_class, pg_attribute, pg_namespace - WHERE - pg_class.oid = %(table_name)s::regclass AND - indrelid = pg_class.oid AND - nspname = 'public' AND - pg_class.relnamespace = pg_namespace.oid AND - pg_attribute.attrelid = pg_class.oid AND - pg_attribute.attnum = any(pg_index.indkey) - AND indisprimary - """, {'table_name': table_name}).first() - - if primary_key_results: - primary_key = primary_key_results.column_name - - if not re.search('join', cell, re.IGNORECASE): - - HTMLTable(table_data, unique_id).display(columns, msg=' | EDIT MODE') - - display( - Javascript( - """ - $('#table%s').editableTableWidget({preventColumns:[1,2]}); - $('#table%s').on('change', function(evt, newValue){ - var th = $('#table%s th').eq(evt.target.cellIndex); - var columnName = th.text(); - - var tableName = '%s'; - var primary_key = '%s'; - - var pkId, - pkValue; - $('#table%s tr th').filter(function(i,v){ - if (v.innerHTML == primary_key){ - pkId = i; - } - }); - - var row = $('#table%s > tbody > tr').eq(evt.target.parentNode.rowIndex-1); - row.find('td').each(function(i,v){ - if (i == pkId){ - pkValue = v.innerHTML; - } - }); - - var SQLText = "UPDATE " + tableName + " SET " + columnName + " = '" + newValue + "' WHERE " + primary_key + " = " + pkValue; - console.log(SQLText, 't' + pkValue + 't'); - - if (pkValue === ''){ - console.log('testingietren'); - } else { - $('#error').remove(); - IPython.notebook.kernel.execute('__SQLCell_GLOBAL_VARS__.update_table("'+SQLText+'")', - { - iopub: { - output: function(response) { - var $table = $('#table%s').parent(); - if (response.content.evalue){ - var error = response.content.evalue.replace(/\\n/g, "
"); - $table.append('
'+error+'
'); - } else { - $table.append('
Update successful
'); - } - } - } - }, - { - silent: false, - store_history: false, - stop_on_error: true - } - ); - } - }); - """ % (unique_id, unique_id, unique_id, table_name, primary_key, unique_id, unique_id, unique_id) - ) - ) - - else: - HTMLTable(table_data, unique_id).display(columns, msg=" | CAN\\'T EDIT MULTIPLE TABLES") - return None - else: - HTMLTable(table_data, unique_id).display(columns, msg=' | TABLE HAS NO PK') - return None - else: - HTMLTable(table_data, unique_id).display(columns, msg=' | READ MODE') - return None - - -def sql(path, cell): - t = threading.Thread( - target=_SQL, - args=( - path, cell, { - k:v - for (k,v) in __KERNEL_VARS__.g.iteritems() - if k not in ('In', 'Out', 'v', 'k') - and not k.startswith('_') - and isinstance(v, - (str, int, float, list, unicode, tuple) - ) - } - ) - ) - t.daemon = True - t.start() - return None - - -js = "IPython.CodeCell.config_defaults.highlight_modes['magic_sql'] = {'reg':[/^%%sql/]};" -IPython.core.display.display_javascript(js, raw=True) - \ No newline at end of file diff --git a/sqlcell/__init__.py b/sqlcell/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/sqlcell/_initdb.py b/sqlcell/_initdb.py new file mode 100644 index 0000000..4d69511 --- /dev/null +++ b/sqlcell/_initdb.py @@ -0,0 +1,52 @@ +import sqlite3 +# sqlcell table + +db_file = 'sqlcell.db' + +create_table_sql = """CREATE TABLE IF NOT EXISTS sqlcell ( + id integer PRIMARY KEY, + key text NOT NULL, + value BINARY, + dt datetime default current_timestamp +);""" + +create_settings_sql = """CREATE TABLE IF NOT EXISTS settings ( + id integer PRIMARY KEY, + key text NOT NULL, + value text NOT NULL, + dt datetime default current_timestamp +);""" + +create_engines_sql = """CREATE TABLE IF NOT EXISTS engines ( + id integer PRIMARY KEY, + alias text, + db text NOT NULL, + host text NOT NULL, + engine text NOT NULL, + engine_b blob, + dt datetime default current_timestamp +);""" + +create_hooks_sql = """CREATE TABLE IF NOT EXISTS hooks ( + id integer PRIMARY KEY, + key text NOT NULL UNIQUE, + cmd text NOT NULL, + engine text NOT NULL, + engine_b blob, + dt datetime default current_timestamp, + UNIQUE (key, engine) ON CONFLICT IGNORE +); +""" + +tables = [create_table_sql, create_settings_sql, create_engines_sql, create_hooks_sql] + +def run(): + """ + initialize sqlite3 database to record engines/hooks + """ + conn = sqlite3.connect(db_file) + for table in tables: + conn.execute(table) + +if __name__ == '__main__': + run() diff --git a/sqlcell/args.py b/sqlcell/args.py new file mode 100644 index 0000000..97a4102 --- /dev/null +++ b/sqlcell/args.py @@ -0,0 +1,37 @@ +import argparse +import shlex + +class ArgHandler(object): + def __init__(self, line): + self.parser = argparse.ArgumentParser(description='SQLCell arguments') + self.parser.add_argument( + "-e", "--engine", + help='Engine param, specify your connection string: --engine=postgresql://user:password@localhost:5432/mydatabase', + required=False + ) + self.parser.add_argument( + "-es", "--engines", + help='add new engines to be aliased and stored for future use without having to specify entire connection string.', + required=False, default=False, action="store_true" + ) + self.parser.add_argument( + "-v", "--var", + help='Variable name to write output to: --var=foo', + required=False + ) + self.parser.add_argument( + "-bg", "--background", + help='whether to run query in background or not: --background runs in background', + required=False, default=False, action="store_true" + ) + self.parser.add_argument( + "-k", "--hook", + help='define shortcuts with the --hook param', + required=False, default=False, action="store_true" + ) + self.parser.add_argument( + "-r", "--refresh", + help='refresh engines/hooks by specifying --refresh flag', + required=False, default=False, action="store_true" + ) + self.args = self.parser.parse_args(shlex.split(line)) diff --git a/sqlcell/db.py b/sqlcell/db.py new file mode 100644 index 0000000..3e173f1 --- /dev/null +++ b/sqlcell/db.py @@ -0,0 +1,110 @@ +from sqlalchemy import create_engine +from sqlalchemy.engine.url import make_url +from sqlalchemy.ext.automap import automap_base +from sqlalchemy.orm import Session, sessionmaker +from sqlalchemy import create_engine +from sqlalchemy import desc, asc +from sqlalchemy.engine.base import Engine +import pandas as pd +import re + +class DBSessionHandler(object): + def __init__(self): + Base = automap_base() + engine = create_engine("sqlite:///sqlcell.db") + Base.prepare(engine, reflect=True) + self.classes = Base.classes + self.tables = Base.metadata.tables.keys() + self.Sqlcell = Base.classes.sqlcell + self.Engines = Base.classes.engines + self.Hooks = Base.classes.hooks + Session = sessionmaker(autoflush=False) + Session.configure(bind=engine) + self.session = Session() + + dbs = self.session.query(self.Engines).all() + self.db_info = {} + for row in dbs: + engine = row.engine + if row.db: + self.db_info[row.db] = engine + if row.alias: + self.db_info[row.alias] = engine + self.db_info[engine] = engine + self.db_info[row.host] = engine + + def recycle(self): + pass + + def create(self): + pass + + def dispose(self): + pass + +class EngineHandler(DBSessionHandler): + """remove all engines from sqlcell.db: + %%sql refresh + may have to use @cell_line_magic. + add multiple new engines: + %%sql add + foo= + bar= + baz=""" + def __init__(self, *args, **kwargs): + super(EngineHandler, self).__init__() + + def list(self): + "show all alias/engines" + engines = [] + for row in self.session.query(self.Engines): + engine = { + 'Alias': row.alias, + 'Engine': row.engine + } + engines.append(engine) + return pd.DataFrame(engines) + + @property + def latest_engine(self) -> Engine: + record = self.session.query(self.Engines).order_by(desc(self.Engines.dt)).limit(1).first() + if record: + engine = record.engine + return create_engine(engine) + + def get_engine(self, engine_var: str, session_engine: bool or Engine=False, as_binary: bool=False): + if engine_var: + if engine_var not in self.db_info: + engine = create_engine(engine_var) #new engines + self.add_engine(engine) + else: + engine = create_engine(self.db_info[engine_var]) #engine lookup + else: + engine = session_engine or self.latest_engine + return engine + + def add_engine(self, engine: Engine, alias: str=None) -> None: + if isinstance(engine, str): + engine = make_url(engine) + else: + engine = engine.url + host = engine.host + db = engine.database + engine_str = str(engine) + engine_exists_check = self.session.query(self.Engines).filter_by(db=db, host=host, engine=engine_str).first() + if engine_exists_check: return None + self.session.add(self.Engines(db=db, host=host, engine=engine_str, alias=alias)) + self.session.commit() + + def add_alias(self, cell): + for i in re.split('\n{1,}', cell): + row = i.replace(' ', '').split('=', 1) + if row[1:]: + alias, engine = row + self.add_engine(engine, alias=alias) + return ('Engines successfully registered') + + def refresh(self, cell): + self.session.query(self.Engines).delete() + self.session.commit() + diff --git a/sqlcell/hooks.py b/sqlcell/hooks.py new file mode 100644 index 0000000..e1966bc --- /dev/null +++ b/sqlcell/hooks.py @@ -0,0 +1,58 @@ +from sqlalchemy import create_engine +from sqlcell.db import DBSessionHandler, EngineHandler +import pandas as pd + +class HookHandler(EngineHandler): + """input common queries to remember with a key/value pair. ie, + %%sql hook + \d=" + \dt=""" + def __init__(self, engine, *args, **kwargs): + super().__init__() + self.hook_engine = engine + + def is_engine(self, engine: str): + try: + create_engine(engine) + return True + except: + return False + + def add(self, line, cell): + "add hook to db" + cmds_to_add = [] + hooks = cell.split('\n\n') + for hook in hooks: + hook = hook.strip() + if hook: + key, cmd = [i.strip() for i in hook.split('=', 1)] + cmds_to_add.append((key, cmd)) + + for key, cmd in cmds_to_add: + self.session.add(self.Hooks(key=key, engine='', cmd=cmd)) + self.session.commit() + return self + + def run(self, cell, engine_var): + cell = cell.replace('~', '').split(' ') + engine_alias, sql, cmd_args = cell[0], cell[1], cell[2:] + hook_query = self.session.query(self.Hooks).filter_by(key=sql).first() + hook_cmd = hook_query.cmd + hook_engine = self.get_engine(engine_alias) + self.hook_engine = hook_engine + return hook_engine, hook_cmd.format(*cmd_args) + + def list(self, *srgs, **kwargs): + hooks = [] + for row in self.session.query(self.Hooks).all(): + hook = { + 'Alias': row.key, + 'Hook': row.cmd, + 'Engine': row.engine + } + hooks.append(hook) + return pd.DataFrame(hooks) + + def refresh(self, cell): + self.session.query(self.Hooks).delete() + self.session.commit() diff --git a/sqlcell/sqlcell.py b/sqlcell/sqlcell.py new file mode 100644 index 0000000..8515bbc --- /dev/null +++ b/sqlcell/sqlcell.py @@ -0,0 +1,147 @@ +from IPython.core.magic import (Magics, magics_class, line_magic, + cell_magic, line_cell_magic) +from sqlalchemy import create_engine +from sqlalchemy.ext.automap import automap_base +from sqlalchemy.orm import Session, sessionmaker +from sqlalchemy import create_engine +from sqlalchemy import desc, asc +from sqlalchemy.engine.base import Engine +from sqlalchemy import sql +import pandas as pd +import pickle +import threading +################# SQLCell modules ################# +from sqlcell.db import EngineHandler, DBSessionHandler +from sqlcell.args import ArgHandler +from sqlcell.hooks import HookHandler +from sqlcell._initdb import run + + +@magics_class +class SQLCell(Magics, EngineHandler): + + current_engine = False + current_hook_engine = False + modes = ['query', 'hook', 'refresh'] + # consider yaml file for these types of params: + hook_indicator = '~' + + def __init__(self, shell, data): + # You must call the parent constructor + super(SQLCell, self).__init__(shell) + self.shell = shell + self.data = data + self.ipy = get_ipython() + self.refresh_options = ['hooks', 'engines'] + self.line_args = None + + def register_line_vars(self, line): + """options: engine, var, bg""" + mode = self.get_mode(line) + if line.strip() and mode == 'query': + line = line.split(' ') + line_vars = {} + for var in line: + key,value = var.split('=') + line_vars[key] = value + self.line_vars = line_vars + return line_vars + return {} + + def push_var(self, obj): + if self.line_args.var: + self.ipy.push({self.line_args.var: obj}) + + def async_handler(self, obj): + self.push_var(obj) + return obj + + def run_query(self, engine, query_params, var=None, callback=None, **kwargs): + results = pd.DataFrame([dict(row) for row in engine.execute(*query_params)]) + return callback(results) + + def query_router(self, *args): + if self.line_args.background: + processThread = threading.Thread(target=self.run_query, args=args) + processThread.start() + return None + return self.run_query(*args) + + def get_mode(self, line): + line = [l.split('=') for l in line.split('=')] + if len(line) == 0: + if line in SQLCell.modes: return line + else: raise Exception('Invalid mode, please review docs') + return 'query' + + def get_bind_params(self, params, ipython): + return {key:getattr(ipython.user_module, key) for key in params.keys()} + + def get_sql_statement(self, cell): + text = sql.text(cell) + params = text.compile().params + bind_params = self.get_bind_params(params, self.ipy) + return (text, bind_params) + + @line_cell_magic + def sql(self, line: str="", cell: str="") -> None: + + line = line.strip() + cell = cell.strip() + + line_args = ArgHandler(line).args + container_var = line_args.var + engine_var = line_args.engine + background = line_args.background + hook = line_args.hook + refresh = line_args.refresh + add_engines = line_args.engines + # refer to all args as self.line_args. to get rid of entire block ^? + self.line_args = line_args + + ############################ Refresh logic ########################## + if refresh and cell in self.refresh_options: + if cell in self.tables: + self.session.query(getattr(self.classes, cell)).delete() + self.session.commit() + return ('Removed all records from ' + cell) + ############################ End Refresh logic ###################### + ############################ Engine Aliases Logic ################### + if self.line_args.engines: + if cell == 'list': + return self.list() + else: + self.add_alias(cell) + # need to reinit db_info to update new engines added + self.db_info = SQLCell(self.shell, self.data).db_info + return ('Engines successfully registered') + ############################ End Engine Aliases ##################### + # need engine below but not in refresh or alias logic + engine = self.get_engine(engine_var, session_engine=SQLCell.current_engine) + ########################## HookHandler logic ######################## + hook_handler = HookHandler(engine) + if hook: + if cell == 'list': + return hook_handler.list() + hook_handler.add(line, cell) + return ('Hook successfully registered') + + if cell.startswith(self.hook_indicator): + # run returns `engine, cmd`, consider renaming + engine, cell = hook_handler.run(cell, engine_var) + SQLCell.current_hook_engine = hook_handler.hook_engine + ########################## End HookHandler logic #################### + sql_statemnent_params = self.get_sql_statement(cell) + results = self.query_router(engine, sql_statemnent_params, self.line_args.var, self.async_handler) + # self.push_var(results) + engine.pool.dispose() + + # reinitialize to update db_info, find better way + self.db_info = SQLCell(self.shell, self.data).db_info + SQLCell.current_engine = engine + return results + +def load_ipython_extension(ipython): + run() + magics = SQLCell(ipython, []) + ipython.register_magics(magics) diff --git a/sqlcell_app.py b/sqlcell_app.py deleted file mode 100644 index f04f488..0000000 --- a/sqlcell_app.py +++ /dev/null @@ -1,7 +0,0 @@ -# should be moved to Jupyter's or IPython's profile_default/startup directory -from IPython.core.magic import register_line_cell_magic -from SQLCell.sqlcell import sql, __KERNEL_VARS__, __SQLCell_GLOBAL_VARS__ - -sql = register_line_cell_magic(sql) - -__KERNEL_VARS__.g = globals()