-
Notifications
You must be signed in to change notification settings - Fork 45
/
Copy pathxlsxtract.py
129 lines (116 loc) · 3.26 KB
/
xlsxtract.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#!/usr/bin/python3
#
# Extract cells from XLS sheets
#
# Usage:
#
# ./xlsxtract.py --cols=start1-[end1],startN-[endN] --rows=start1-[end1],startN-[endN]
# [--workbook|-w <name>]
# [--prefix|-p]
# [--help|-h]
# <-|file>
#
# Author: Xavier Mertens <[email protected]>
# Copyright: GPLv3 (http://gplv3.fsf.org)
# Fell free to use the code, but please share the changes you've made
#
# Todo
# -
#
import os
from optparse import OptionParser
try:
from openpyxl import load_workbook
except:
print("[!] Please install openpyxl")
exit(1)
def processFile(file, options):
""" ----------------------------- """
""" Read cells from an Excel file """
""" ----------------------------- """
max = 65535
if options.max:
max = options.max
try:
xls = load_workbook(filename = file, read_only=True)
wb = xls.get_sheet_names()
except:
print("[!] Cannot read '%s' (not XLSX format?)" % file)
return False
if options.workbook not in wb:
print("[!] Workbook '%s' does not exist. Found workbooks: %s" % (options.workbook, wb))
return False
try:
sheet_ranges = xls[options.workbook]
except:
print("[!] Cannot open workbook '%s'" % options.workbook)
return False
if options.cols:
colRanges = options.cols.split(',')
else:
colRanges = ['A-']
if options.rows:
rowRanges = options.rows.split(',')
else:
rowRanges = ['1-']
for c in colRanges:
c_range = c.split('-')
c = c_min = ord(c_range[0])
for r in rowRanges:
if len(c_range) == 1: # 'A'
c_max = c_min
elif len(c_range[1]) == 0: # 'A-'
c_max = max
else: # A-B
c_max = ord(c_range[1])
r_range = r.split('-')
try:
r = r_min = int(r_range[0])
except:
print("[!] Row value is not correct: '%s'" % r_range[0])
exit(1)
if len(r_range) == 1:
r_max = r_min
elif len(r_range[1]) == 0:
r_max = max
else:
r_max = int(r_range[1])
while c <= c_max:
while r <= r_max:
cell = chr(c) + str(r)
try:
data = sheet_ranges[cell].value
if data == None and options.stop == True:
return True
if options.prefix == True:
print('%s=%s' % (cell, data))
else:
print(data)
r = r +1
except:
print("[!] Cannot read data from cell '%s'" % cell)
exit(1)
r = r_min
c = c + 1
c = c_min
return True
def main():
parser = OptionParser(usage="usage: %prog [options] <file> ...", version="%prog 1.0")
parser.add_option('-w', '--workbook', dest='workbook', type='string', \
help='Workbook to extract data from')
parser.add_option('-c', '--cols', dest='cols', type='string', \
help='Read columns (Format: "A", "A-" or "A-B")')
parser.add_option('-r', '--rows', dest='rows', type='string', \
help='Read rows (Format: "1", "1-" or "1-10")')
parser.add_option('-m', '--max', dest='max', type='int', \
help='Process maximum rows')
parser.add_option('-p', '--prefix', action='store_true', dest='prefix', \
help='Display cell name', default='False')
parser.add_option('-s', '--stop', action='store_true', dest='stop', \
help='Stop processing when empty cell is found', default='False')
(options, args) = parser.parse_args()
for a in args:
processFile(a, options)
if __name__ == '__main__':
main()
exit(0)