forked from snowflakedb/snowflake-connector-python
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutil_text.py
224 lines (206 loc) · 7.7 KB
/
util_text.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (c) 2012-2019 Snowflake Computing Inc. All right reserved.
#
import logging
import re
try:
import ijson
except Exception:
ijson = None
COMMENT_PATTERN_RE = re.compile(r'^\s*\-\-')
EMPTY_LINE_RE = re.compile(r'^\s*$')
_logger = logging.getLogger(__name__)
def split_statements(buf, remove_comments=False):
"""
Splits a stream into SQL statements (ends with a semicolon) or
commands (!...)
:param buf: Unicode data stream
:param remove_comments: True removes all comments
:return: yields a SQL statement or a command
"""
in_quote = False
ch_quote = None
in_comment = False
in_double_dollars = False
line = buf.readline()
if isinstance(line, bytes):
raise TypeError("Input data must not be binary type.")
statement = []
while line != '':
col = 0
col0 = 0
len_line = len(line)
while True:
if col >= len_line:
if col0 < col:
if not in_comment and not in_quote \
and not in_double_dollars:
statement.append((line[col0:col], True))
if len(statement) == 1 and statement[0][0] == '':
statement = []
break
elif not in_comment and (in_quote or in_double_dollars):
statement.append((line[col0:col], True))
elif not remove_comments:
statement.append((line[col0:col], False))
break
elif in_comment:
if line[col:].startswith("*/"):
in_comment = False
if not remove_comments:
statement.append((line[col0:col + 2], False))
col += 2
col0 = col
else:
col += 1
elif in_double_dollars:
if line[col:].startswith("$$"):
in_double_dollars = False
statement.append((line[col0:col + 2], False))
col += 2
col0 = col
else:
col += 1
elif in_quote:
if line[col] == '\\' and col < len_line - 1 and \
line[col + 1] in (ch_quote, '\\'):
col += 2
elif line[col] == ch_quote:
if col < len_line - 1 and line[col + 1] != ch_quote or \
col == len_line - 1:
# exits quote
in_quote = False
statement.append((line[col0:col + 1], True))
col += 1
col0 = col
else:
# escaped quote and still in quote
col += 2
else:
col += 1
else:
if line[col] in ("'", '"'):
in_quote = True
ch_quote = line[col]
col += 1
elif line[col] in (' ', '\t'):
statement.append((line[col0:col + 1], True))
col += 1
col0 = col
elif line[col:].startswith("--"):
statement.append((line[col0:col], True))
if not remove_comments:
# keep the comment
statement.append((line[col:], False))
col = len_line + 1
col0 = col
elif line[col:].startswith("/*") and \
not line[col0:].startswith("file://"):
if not remove_comments:
statement.append((line[col0:col + 2], False))
else:
statement.append((line[col0:col], False))
col += 2
col0 = col
in_comment = True
elif line[col:].startswith("$$"):
statement.append((line[col0:col + 2], True))
col += 2
col0 = col
in_double_dollars = True
elif line[col] == ';':
statement.append((line[col0:col + 1], True))
col += 1
try:
if line[col] == '>':
col += 1
statement[-1] = (statement[-1][0] + '>',
statement[-1][1])
except IndexError:
pass
if COMMENT_PATTERN_RE.match(line[col:]) or \
EMPTY_LINE_RE.match(line[col:]):
if not remove_comments:
# keep the comment
statement.append((line[col:], False))
col = len_line
while col < len_line and line[col] in (' ', '\t'):
col += 1
yield _concatenate_statements(statement)
col0 = col
statement = []
elif col == 0 and line[col] == '!': # command
if len(statement) > 0:
yield _concatenate_statements(statement)
statement = []
yield line.rstrip(';').strip(), False
break
else:
col += 1
line = buf.readline()
if len(statement) > 0:
yield _concatenate_statements(statement)
def _concatenate_statements(statement_list):
"""
concatenate statements
is_put_or_get is set to True if the statement is PUT or GET otherwise
False for valid statement. None is set if the statement is empty or
comment only.
:return: a statement, is_put_or_get
"""
valid_statement_list = []
is_put_or_get = None
for text, is_statement in statement_list:
valid_statement_list.append(text)
if is_put_or_get is None and is_statement and len(text.strip()) >= 3:
is_put_or_get = text[:3].upper() in ('PUT', 'GET')
return u''.join(valid_statement_list).strip(), is_put_or_get
def split_rows_from_stream(stream):
"""
Splits into rows from a stream object. Generator.
"""
if not ijson:
raise Exception("install ijson")
row = []
in_row = False
for prefix, event, value in ijson.parse(stream):
if prefix == '':
continue
if in_row:
if event == 'end_array':
yield row
row = []
in_row = False
else:
row.append(value)
elif event == 'start_array':
in_row = True
def construct_hostname(region, account):
"""
Constructs hostname from region and account
"""
if region == u'us-west-2':
region = ''
if region:
if account.find(u'.') > 0:
account = account[0:account.find(u'.')]
host = u'{}.{}.snowflakecomputing.com'.format(account, region)
else:
host = u'{}.snowflakecomputing.com'.format(account)
return host
def parse_account(account):
url_parts = account.split(u'.')
# if this condition is true, then we have some extra
# stuff in the account field.
if len(url_parts) > 1:
if url_parts[1] == u'global':
# remove external ID from account
parsed_account = url_parts[0][0:url_parts[0].rfind(u'-')]
else:
# remove region subdomain
parsed_account = url_parts[0]
else:
parsed_account = account
return parsed_account