-
Notifications
You must be signed in to change notification settings - Fork 10
/
mysql2csv
executable file
·709 lines (620 loc) · 26.1 KB
/
mysql2csv
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
#!/usr/bin/env python
# mysql2csv (part of ossobv/vcutil) // wdoekes/2011-2024 // Public Domain
#
# This mysql2csv dump script is a convenient way to dump and/or prune
# large tables. Instead of selecting all data at once, it takes chunks
# of data based on a datetime column supplied by the caller.
#
# For example, if you run this:
#
# mysql2csv ... mydatabase large_table created
#
# Then the dump script will check the MIN(created) on large_table, and
# from that point, start BETWEEN queries between that time and the next
# hour.
#
# SELECT * FROM mydatabase.large_table
# WHERE created >= '2011-01-21 03:00:00' AND
# created < '2011-01-21 04:00:00'
# ORDER BY created
#
# If you have proper indexes on the created column, these
# "hour-selecting" queries should be light enough to handle without
# locking up your production database.
#
# If you have an index on an integer column, that can be used too, but
# then you'll want to pass a timestamp column as extra argument:
#
# mysql2csv ... mydatabase large_table id created
#
# When selecting by id, you may want to fetch larger chunks at once (see
# the --period-size argument).
#
# See mysql2csv -h for more info.
#
# Created by Walter Doekes, OSSO B.V.
#
# Changes:
# - 2011-01-21: Initial from http://wjd.nu/files/2011/01/mysql2csv.py
# - 2016-05-23: PEP cleanup, add into vcutil repository.
# - 2016-05-23: Only write CSVs if there is data for that period. End
# before until_date if the max date is lower.
# - 2016-05-23: Use argparse for the options. Read ini file as well.
# - 2016-05-23: Add --quiet option to silence the output.
# - 2016-05-23: Append to CSV instead clobbering it. Safer for data.
# - 2016-05-23: Allow sleep-time to be set.
# - 2016-05-23: Prune during dump, using the same date filters.
# - 2016-05-23: Fix problem with TIMESTAMP column and DST changes.
# - 2016-05-25: Add --extra-where parameter to limit the query results.
# - 2019-04-02: Trap incorrect None conversions by MySQLdb: better crash
# than silently truncate/NULLify values.
# - 2019-04-02: Handle YYYY-mm-dd HH:MM:SS[.qqq[qqq]] TIMESTAMP values.
# - 2022-03-04: Compatibility tweaks (py2/3, DATETIME NULL, BLOB).
# - 2022-12-05: Add the option to sort/filter by integer column.
# - 2023-10-26: When sorting by integer, allow time based templates,
# by reading the time from another (unindexed) column.
# - 2023-10-26: Rename -t/--template to -o/--output.
# - 2024-01-10: Sanitize --prune parts after previous refactor.
#
# Caveats:
# - mysql2csv clobbers existing files without asking!
# - When using a different column (date_column) for the filename template,
# mysql2csv will not close files until the end. If you use %Y%m%d, you get
# up to 4096 file descriptors when processing 11 years at once.
#
from __future__ import print_function, unicode_literals
import argparse # pip install argparse on old pre-2.7 python
import getpass
import sys
import time
from base64 import b16encode
from datetime import date, datetime, timedelta
from decimal import Decimal
from os import utime
from MySQLdb import OperationalError, connect, converters
try:
from configparser import RawConfigParser
long = int
unicode = str
except ImportError: # python2-
from ConfigParser import RawConfigParser
DEFAULT_FILENAME = '%(database)s.%(table)s.%%Y%%m.csv'
DEFAULT_SLEEP_TIME = 0.005 # sleep N secs every iteration, easing server load
EXAMPLE_UNTIL_DATE = datetime(2035, 1, 1)
ONE_HOUR = timedelta(seconds=3600)
TWO_HOURS = timedelta(seconds=7200)
class ConfigError(ValueError):
pass
def _make_conversion_safe(orig_conversion):
"""
Wrap orig_conversion with one that checks that the returned value is
not None.
We do NOT want this flaky behaviour::
def Date_or_None(s):
try: return date(*[ int(x) for x in s.split('-',2)])
except: return None
If the type conversion is not recognised by MySQLdb, dont't have it
convert to None. This is Python, not let-me-silently-downcast-this-PHP.
"""
def _safe_conversion(s):
"Assert that None is never returned."
ret = orig_conversion(s)
if ret is None:
# > select ts, ts is null, ts = '0000-00-00 00:00:00'
# from communication where ts is null;
# +---------------------+------------+----------------------------+
# | ts | ts is null | ts = '0000-00-00 00:00:00' |
# +---------------------+------------+----------------------------+
# | 0000-00-00 00:00:00 | 0 | 1 |
# +---------------------+------------+----------------------------+
# ... so ts IS NULL but NOT NULL at the same time.
if s == '0000-00-00 00:00:00':
return "0000-00-00 00:00:00"
raise NotImplementedError(
'conversion from {!r} yielded None; bug in MySQLdb?'.format(s))
return ret
return _safe_conversion
def _make_conversion_safe_tuple(orig_tuple):
"""
Update conversion function with a safe-wrapped one.
Note that conversions *from* SQL to Python will have integer-keys.
We're only concerned with those.
The values can be either (a) a conversion function, or (b) a list of
tuples where the second element is the conversion function.
"""
k, v = orig_tuple
if not isinstance(k, int):
return k, v
if isinstance(v, (list, tuple)):
new_list = [(i[0], _make_conversion_safe(i[1])) for i in v]
return k, new_list
return k, _make_conversion_safe(v)
def _make_timestamp_conversion(orig_conversion):
"""
Create conversion function with fallback to original.
"""
def _timestamp_conversion(s):
"Convert 'YYYY-mm-dd HH:MM:SS[.qqq[qqq]]' to datetime"
try:
ymd, hms = s.split(' ')
y, m, d = [int(i) for i in ymd.split('-')]
h, mm, s = hms.split(':')
if '.' in s:
s, us = s.split('.')
us = int(us[0:6].ljust(6, '0'))
else:
us = 0
h, mm, s = int(h), int(mm), int(s)
return datetime(
year=y, month=m, day=d, hour=h, minute=mm, second=s,
microsecond=us)
except Exception:
return orig_conversion(s)
return _timestamp_conversion
# Update conversions to make them safe from None-truncation.
CONVERSIONS = dict(
_make_conversion_safe_tuple(it) for it in converters.conversions.items())
# Add TIMESTAMP conversion that handles 'YYYY-mm-dd HH:MM:SS[.qqq[qqq]]'.
CONVERSIONS[converters.FIELD_TYPE.TIMESTAMP] = _make_timestamp_conversion(
CONVERSIONS[converters.FIELD_TYPE.TIMESTAMP])
class Interval(object):
def __init__(self, db_connection, table_name, seq_column,
period_size, until=None, extra_where=None):
self._db = db_connection
self._table_name = table_name
self._seq_column = seq_column
self._until = until
self._extra_where = extra_where
self._set_min()
self._set_max()
if self.type is None:
self.period = None
self.begin = 2
self.end = None # no data
self.last = 1 # no data, end before we start
elif self.type == datetime:
self.period = timedelta(seconds=period_size)
self.begin = self._min_seq.replace(minute=0, second=0)
self.end = self.begin + self.period
self.last = self._max_seq # inclusive!
elif self.type == int:
self.period = period_size
self.begin = self._min_seq - (self._min_seq % self.period)
self.end = self.begin + self.period
self.last = self._max_seq # inclusive!
self._maybe_dst_issues = (
self.type == datetime and period_size < 7200)
def _set_min(self):
self._min_seq = self._find_min('(1 = 1)')
if self._min_seq is None:
self.type = None
elif isinstance(self._min_seq, datetime):
self.type = datetime
elif isinstance(self._min_seq, int):
self.type = int
else:
raise NotImplementedError('Unhandled %s.%s type %r (%s)' % (
self._table_name, self._seq_column, self._min_seq,
type(self._min_seq).__name__))
def _set_max(self):
if self._until is None:
where = '(1 = 1)'
elif isinstance(self._until, date):
where = "(%s < '%s')" % (self._seq_column, self._until.strftime(
'%Y-%m-%d'))
elif isinstance(self._until, datetime):
where = "(%s < '%s')" % (self._seq_column, self._until.strftime(
'%Y-%m-%d %H:%M:%S'))
elif isinstance(self._until, int):
where = "(%s < %s)" % (self._seq_column, self._until)
else:
raise NotImplementedError(self._until)
query = "SELECT MAX(%s) FROM %s WHERE %s %s" % (
self._seq_column, self._table_name, where, self._extra_where)
# #print(query)
cursor = self._db.cursor()
cursor.execute(query)
ret = cursor.fetchall()
self._max_seq = ret[0][0] # inclusive!
def _find_min(self, where):
query = "SELECT MIN(%s) FROM %s WHERE %s %s" % (
self._seq_column, self._table_name, where, self._extra_where)
# #print(query)
cursor = self._db.cursor()
cursor.execute(query)
ret = cursor.fetchall()
return ret[0][0]
def set_next(self):
if self._maybe_dst_issues and has_crossed_dst(
self._db, self.end, self.period):
# For certain columns, string/time comparisons can fail during
# DST changes, for example:
# TABLE auth (ts timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP)
# mysql> SELECT MIN(ts) FROM auth\G
# min(ts): 2014-10-26 02:00:02
# > SELECT COUNT(*) FROM auth WHERE '2014-10-26 02:00:00' <= ts
# AND ts < '2014-10-26 03:00:00' ORDER BY ts\G
# COUNT(*): 0
# > SELECT COUNT(*) FROM auth WHERE '2014-10-26 01:59:59' <= ts
# AND ts < '2014-10-26 03:00:00' ORDER BY ts\G
# COUNT(*): 256
# For DATETIME columns, the problem does not occur.
# As a workaround we'll have to do an extra check to see if
# we've just crossed a DST line and add more time to the
# query.
#
# Keep begin time, but extend end time if needed.
self.end = self.begin + TWO_HOURS
else:
self.begin, self.end = self.end, self.end + self.period
def seek_next(self):
if self._maybe_dst_issues and has_crossed_dst(
self._db, self.begin, self.period):
# Lower begin time, extend end time.
self.begin = self._find_min("(%s >= '%s')" % (
self._seq_column,
(self.begin - self.period).strftime('%Y-%m-%d %H:%M:%S'),))
self.begin = self.begin - ONE_HOUR
self.end = self.begin + TWO_HOURS
else:
# Normal.
self.begin = self._find_min("(%s >= '%s')" % (
self._seq_column, self.end.strftime('%Y-%m-%d %H:%M:%S'),))
self.end = self.begin + self.period
class FileManager(object):
def __init__(self, filename_template, header, is_monotonic):
self._filename_template = filename_template
self._header = header
# If is_monotonic, we'll assume we can close files as soon as we're
# moving to the next file. For others, it might look like we're leaking
# files until the last moment when close() is called.
self._is_monotonic = is_monotonic
self._written_files = {}
self._files = {}
def _open(self, filename):
assert filename not in self._files, (filename, self._files)
# We must open in append-mode if sort_column!=date_column.
# If we get a record with an older date we could need to reopen
# a file we've already written to.
fp = open(filename, ('a' if filename in self._written_files else 'w'))
if fp.tell() == 0:
fp.write(self._header) # write header because we're at pos 0
self._written_files[filename] = time.time()
self._files[filename] = fp
return fp
def _make_filename(self, date_value):
return date_value.strftime(self._filename_template)
def close(self):
for filename, fp in self._files.items():
fp.close()
# Set last updated time after closing, to get a more
# realistic view of when the files were populated.
last_updated = self._written_files[filename]
utime(filename, (last_updated, last_updated))
self._files = {}
def write(self, line, when):
opened_new_file = False
filename = self._make_filename(when)
try:
# Most common case first. We expect few exceptions.
fp = self._files[filename]
except KeyError:
if self._is_monotonic:
self.close()
fp = self._open(filename)
opened_new_file = True
self._written_files[filename] = time.time()
fp.write(line)
return opened_new_file
def dump2csv_and_prune(db_connection, kwargs):
# Get relevant args.
(table_name, sort_column, date_column, until, output,
period_size, quiet, sleep, prune) = tuple(
kwargs[i] for i in (
'table', 'sort_column', 'date_column', 'until', 'output',
'period_size', 'quiet', 'sleep', 'prune'))
extra_where = (kwargs.get('extra_where') or '').strip()
if extra_where:
extra_where = 'AND (%s)' % (extra_where.replace('%', '%%'),)
rows = describe_table(db_connection, table_name)
# Figure out the interval/datatypes involved.
try:
interval = Interval(
db_connection, table_name, sort_column,
period_size, until, extra_where)
except OperationalError as e:
if e.args[0] == 1054:
# Unknown column 'xyz' in 'field list'
raise ConfigError('there is no %r column in table %r' % (
sort_column, table_name))
raise
# Check what column to use for the filenames.
date_column_idx = None
if date_column is None and interval.type == datetime:
date_column = sort_column
is_monotonic = True
elif date_column is None and '%' in output:
raise NotImplementedError(
'output filename templates (using %) require a date_column')
elif date_column is None:
assert '%' not in output, output
is_monotonic = True # very monotonic, only one file
else:
is_monotonic = False
if date_column is not None:
try:
date_column_idx = rows.index(date_column)
except ValueError:
raise ConfigError('there is no %r column in table %r' % (
date_column, table_name))
# Make filemanager that auto-opens files and auto-adds CSV headers.
# This allows us to write to multiple files simultaneously. For instance
# when using an INT index, but storing the files in a time-based filename.
filemanager = FileManager(
filename_template=output,
header='%s\n' % (','.join(rows),),
is_monotonic=is_monotonic)
# Create cursor. Increment in 1 hour steps.
cursor = db_connection.cursor()
caret_angle = 0
while interval.begin <= interval.last:
if interval.type == datetime:
begin_range_sql = "'%s'" % (
interval.begin.strftime('%Y-%m-%d %H:%M:%S'),)
end_range_sql = "'%s'" % (
interval.end.strftime('%Y-%m-%d %H:%M:%S'),)
else:
begin_range_sql = interval.begin
end_range_sql = interval.end
# Query a bit of data and write to file.
query = (
"SELECT * FROM %s WHERE %s <= %s AND %s < %s %s "
"ORDER BY %s" % (
table_name, begin_range_sql, sort_column,
sort_column, end_range_sql, extra_where, sort_column))
# #print(query)
cursor.execute(query)
row = None
cursorit = iter(cursor)
try:
row = next(cursorit)
except StopIteration:
# No data! Seek to first data now. Otherwise we might keep
# scanning and scanning.
interval.seek_next()
continue
# Write data (first row, and the rest).
opened_new_file = filemanager.write(
line='%s\n' % (','.join(to_string(i) for i in row),),
when=row[date_column_idx])
for row in cursorit:
opened_new_file |= filemanager.write(
line='%s\n' % (','.join(to_string(i) for i in row),),
when=row[date_column_idx])
if opened_new_file and not quiet:
sys.stdout.write('+') # a new file was opened
sys.stdout.flush()
# Prune immediately in equal sized chunks.
if prune and row is not None:
cursor.execute(
"DELETE FROM %s WHERE %s <= %s AND %s < %s %s" % (
table_name, begin_range_sql, sort_column,
sort_column, end_range_sql, extra_where))
db_connection.commit()
if not quiet:
sys.stdout.write('!') # a chunk was deleted
sys.stdout.flush()
# Increase dates.
interval.set_next()
if not quiet:
sys.stdout.write('%s\b' % (r'\|/-'[caret_angle],)) # progress
sys.stdout.flush()
caret_angle = (caret_angle + 1) % 4
if sleep:
time.sleep(sleep)
filemanager.close()
def describe_table(db_connection, table_name):
cursor = db_connection.cursor()
cursor.execute('DESCRIBE %s;' % (table_name,))
ret = []
for row in cursor.fetchall():
ret.append(row[0])
return ret
def has_crossed_dst(db_connection, begin_date, period):
cursor = db_connection.cursor()
cursor.execute(
"SELECT (UNIX_TIMESTAMP('%s') - UNIX_TIMESTAMP('%s')) AS v" % (
begin_date.strftime('%Y-%m-%d %H:%M:%S'),
(begin_date - period).strftime('%Y-%m-%d %H:%M:%S')))
ret = cursor.fetchall()
found = ret[0][0]
expected = period.total_seconds()
if found == expected:
return False
assert found in (expected - 3600, expected + 3600), ret
return True
def to_string(data):
if isinstance(data, str):
return '"%s"' % (data.replace('"', '""'),)
if isinstance(data, unicode):
return '"%s"' % (data.encode('utf-8').replace('"', '""'),)
if isinstance(data, int) or isinstance(data, long):
return str(data)
if isinstance(data, datetime):
return '"%s"' % (data.strftime('%Y-%m-%d %H:%M:%S'),)
if isinstance(data, date): # must come after datetime
return '"%s"' % (data.strftime('%Y-%m-%d'),)
if isinstance(data, Decimal):
return str(data)
if data is None:
return 'NULL'
if isinstance(data, bytes):
return '0x%s' % (b16encode(data).decode('ascii'),) # UPPERCASE
raise TypeError('Unexpected type %s for data %r: %s' % (
type(data), data, data))
def parse_defaults_file(filename):
config = RawConfigParser()
with open(filename, 'r') as fp:
config.read_file(fp)
sections = config.sections()
for possible_section in ('mysql2csv', 'mysql', 'client'):
if possible_section in sections:
break
else:
return {}
return dict(config.items(possible_section))
def parse_date(date_):
return datetime.fromtimestamp(
time.mktime(time.strptime(date_, '%Y-%m-%d')))
def main():
example_filename = DEFAULT_FILENAME % {
'database': '<DATABASE>', 'table': '<TABLE>'}
example_until = EXAMPLE_UNTIL_DATE.strftime('%Y-%m-%d')
parser = argparse.ArgumentParser(
prog='mysql2csv',
formatter_class=argparse.RawDescriptionHelpFormatter,
description=('''\
Dumps all data from the MySQL table to a CSV-formatted file on stdout IN A NON-
LOCKING FASHION. In most cases you can safely use this on a production database
to dump old records that you will be pruning later on.
Supply the necessary HOST/USER/DATABASE to connect to, the password is prompted
on stdin if required. The data from TABLE will be exported ordered by the date
from DATE_COLUMN in ascending order.
If strftime variables are used are used in the CSV filename template, they are
replaced with the values from the fetched period. The default filename TEMPLATE
is: '%(filename)s'
The default UNTIL time is %(until)s which should be far enough in the future
to get "all" records. (It is used as "exclusive" date, meaning that no records
from that date and onwards will be output/pruned.)
You can use the date(1) shell utility to specify a relative date for --until;
for example one year ago would be: date +%%Y-%%m-%%d -d '-1 year'
Supply --prune as argument to immediately prune the data after dumping it.
''' % {'filename': example_filename, 'until': example_until}))
parser.add_argument(
'-H', '--host',
help='which database host to connect to')
parser.add_argument(
'-p', '--port', type=int,
help='the database port if not 3306')
parser.add_argument(
'-u', '--user',
help='the username if not the current user')
parser.add_argument(
'-P', action='store_true',
help='ask for password')
parser.add_argument(
'-s', '--socket',
help='unix socket to connect to instead of tcp host')
parser.add_argument(
'-o', '--output',
help="output filename template to use instead of '%s'" % (
example_filename.replace('%', '%%'),))
parser.add_argument( # not through defaults-file
'--until', type=parse_date, default=None,
help='stop when arriving at this date (do not set for int columns)')
parser.add_argument(
'-q', '--quiet', action='store_true',
help='be quiet, apart from showing errors')
parser.add_argument(
'--period-size', type=int, metavar='PERIOD', default=3600,
help=('interval size in seconds or in integers if dealing with a '
'sequence (maybe use 50000 for primary key sorted ones)'))
parser.add_argument( # not through defaults-file
'--prune', action='store_true', default=False,
help='prunes the data after writing the csv (DESTRUCTIVE!)')
parser.add_argument(
'--sleep', type=float, metavar='SECONDS',
help=('sleep this long between every data fetch/prune; accepts '
'fractional seconds'))
parser.add_argument(
'--extra-where',
help=('additional filter to add to the SELECT and DELETE queries; '
'for example: is_active = 0'))
parser.add_argument(
'-f', '--defaults-file',
help=("a defaults-file in mysql-client fashion to set the username, "
"password or any of the other long options; checks the "
"[mysql2csv] section (or [mysql] or [client] if the former "
"doesn't exist)"))
parser.add_argument(
'database',
help='the database to connect to')
parser.add_argument(
'table',
help='the table to dump and/or prune')
parser.add_argument(
'sort_column',
help='the column that holds the indexed date or primary key')
parser.add_argument(
'date_column', nargs='?', default=None,
help=('the column that holds a datetime to use for the filename '
'template (same as sort_column if not specified)'))
# Extract args and merge with defaults. If a defaults-file is set,
# use that and have the command line overrule it.
args = parser.parse_args()
# Copy defaults-file contents.
kwargs = {'password': None}
if args.defaults_file:
kwargs.update(parse_defaults_file(args.defaults_file))
if 'port' in kwargs:
kwargs['port'] = int(kwargs['port'])
if 'sleep' in kwargs:
kwargs['sleep'] = float(kwargs['sleep'])
# Copy command line args.
for arg in ('host', 'port', 'user', 'socket', 'output',
'database', 'table', 'sort_column', 'date_column', 'until',
'period_size', 'quiet', 'sleep', 'extra_where'):
value = getattr(args, arg)
if value is not None or arg not in kwargs:
kwargs[arg] = value
kwargs['prune'] = args.prune
# Set defaults.
if kwargs['output'] is None:
kwargs['output'] = DEFAULT_FILENAME % {
'database': kwargs['database'],
'table': kwargs['table']}
if kwargs['sleep'] is None:
kwargs['sleep'] = DEFAULT_SLEEP_TIME
# Get password?
if args.P:
kwargs['password'] = getpass.getpass('Enter password: ')
# Extract mysql connect args.
my_kwargs = {}
for from_, to in (
('host', 'host'), ('port', 'port'), ('user', 'user'),
('password', 'passwd'), ('database', 'db'),
('socket', 'unix_socket')):
if kwargs[from_] is not None:
my_kwargs[to] = kwargs[from_]
# Print a bit of info.
if not kwargs['quiet']:
print('MySQL server settings: %s@%s/%s' % (
kwargs['user'] or '<nobody>',
kwargs['socket'] or kwargs['host'] or '<local>',
kwargs['database']))
if kwargs['until']:
until = kwargs['until'].strftime('%Y-%m-%d')
else:
until = 'end'
print(
'Table to dump: %s (ordered by %s, %d chunk/period, '
'from begin to %s)' % (
kwargs['table'], kwargs['sort_column'],
kwargs['period_size'], until))
print('Output filename template: "%s"' % (kwargs['output'],))
print('Processing %s.%s (ordered by %s)' % (
my_kwargs['db'], kwargs['table'], kwargs['sort_column']))
# Connect and do work.
conn = connect(conv=CONVERSIONS, **my_kwargs)
try:
dump2csv_and_prune(conn, kwargs)
except ConfigError as e:
print('\n%s: %s' % (sys.argv[0], e), file=sys.stderr)
exit(1)
finally:
conn.close()
if not kwargs['quiet']:
print()
if __name__ == '__main__':
main()
# vim: set ts=8 sw=4 sts=4 et ai: