1
1
# frozen_string_literal: true
2
2
3
3
require 'set'
4
- require 'values'
5
4
5
+ require_relative 'string_splitter/split'
6
6
require_relative 'string_splitter/version'
7
7
8
8
# This class extends the functionality of +String#split+ by:
17
17
# These enhancements allow splits to handle many cases that otherwise require bigger
18
18
# guns, e.g. regex matching or parsing.
19
19
#
20
- # Implementation-wise, we split the string with a scanner which works in a similar
21
- # way to +String#split+ and parse the resulting tokens into an array of Split objects
22
- # with the following fields:
20
+ # Implementation-wise, we split the string either with String#split, or with a custom
21
+ # scanner if the delimiter may contain captures (since String#split doesn't handle
22
+ # them correctly) and parse the resulting tokens into an array of Split objects with
23
+ # the following attributes:
23
24
#
24
25
# - captures: separator substrings captured by parentheses in the delimiter pattern
25
26
# - count: the number of splits
@@ -43,42 +44,6 @@ class StringSplitter
43
44
DEFAULT_DELIMITER = /\s +/ . freeze
44
45
REMOVE = [ ] . freeze
45
46
46
- Split = Value . new ( :captures , :count , :index , :lhs , :rhs , :separator ) do
47
- def position
48
- index + 1
49
- end
50
-
51
- alias_method :pos , :position
52
-
53
- # 0-based index relative to the end of the array, e.g. for 5 items:
54
- #
55
- # index | rindex
56
- # ------|-------
57
- # 0 | 4
58
- # 1 | 3
59
- # 2 | 2
60
- # 3 | 1
61
- # 4 | 0
62
- def rindex
63
- count - position
64
- end
65
-
66
- # 1-based position relative to the end of the array, e.g. for 5 items:
67
- #
68
- # position | rposition
69
- # ----------|----------
70
- # 1 | 5
71
- # 2 | 4
72
- # 3 | 3
73
- # 4 | 2
74
- # 5 | 1
75
- def rposition
76
- count + 1 - position
77
- end
78
-
79
- alias_method :rpos , :rposition
80
- end
81
-
82
47
# simulate an enum. the value is returned by the case statement
83
48
# in the generated block if the positions match
84
49
module Action
@@ -130,9 +95,10 @@ def split(
130
95
131
96
return result unless splits
132
97
133
- splits . each_with_index do |hash , index |
134
- split = Split . with ( hash . merge ( { count : count , index : index } ) )
135
- result << split . lhs if result . empty?
98
+ result << splits . first . lhs
99
+
100
+ splits . each_with_index do |split , index |
101
+ split . update! ( count : count , index : index )
136
102
137
103
if accept . call ( split )
138
104
result << split . captures << split . rhs
@@ -166,9 +132,10 @@ def rsplit(
166
132
167
133
return result unless splits
168
134
169
- splits . reverse_each . with_index do |hash , index |
170
- split = Split . with ( hash . merge ( { count : count , index : index } ) )
171
- result . unshift ( split . rhs ) if result . empty?
135
+ result . unshift ( splits . last . rhs )
136
+
137
+ splits . reverse_each . with_index do |split , index |
138
+ split . update! ( count : count , index : index )
172
139
173
140
if accept . call ( split )
174
141
# [lhs + captures] + result
@@ -190,7 +157,7 @@ def rsplit(
190
157
# the following fields:
191
158
#
192
159
# - result: the array of separated strings to return from +split+ or +rsplit+.
193
- # if the splits arry is empty, the caller returns this array immediately
160
+ # if the splits array is empty, the caller returns this array immediately
194
161
# without any further processing
195
162
#
196
163
# - splits: an array of hashes containing the lhs, rhs, separator and captured
@@ -202,23 +169,76 @@ def rsplit(
202
169
# accepted (true) or rejected (false)
203
170
#
204
171
def init ( string :, delimiter :, select :, reject :, block :)
205
- if reject
206
- positions = reject
207
- action = Action ::REJECT
208
- elsif select
209
- positions = select
210
- action = Action ::SELECT
172
+ return [ [ ] ] if string . empty?
173
+
174
+ unless block
175
+ if reject
176
+ positions = reject
177
+ action = Action ::REJECT
178
+ elsif select
179
+ positions = select
180
+ action = Action ::SELECT
181
+ else
182
+ block = ACCEPT_ALL
183
+ end
211
184
end
212
185
213
- splits = parse ( string , delimiter )
186
+ # use String#split if we can
187
+ #
188
+ # NOTE +reject!+ is no faster than +reject+ on MRI and significantly slower
189
+ # on TruffleRuby
190
+
191
+ if delimiter . is_a? ( String )
192
+ limit = -1
193
+
194
+ if delimiter == ' '
195
+ delimiter = / / # don't trim
196
+ elsif delimiter . empty?
197
+ limit = 0 # remove the trailing empty string
198
+ end
199
+
200
+ result = string . split ( delimiter , limit )
201
+
202
+ return [ result ] if result . length == 1 # delimiter not found: no splits
203
+
204
+ if block == ACCEPT_ALL # return the (2 or more) fields
205
+ result = result . reject ( &:empty? ) if @remove_empty_fields
206
+ return [ result ]
207
+ end
208
+
209
+ splits = [ ]
210
+
211
+ result . each_cons ( 2 ) do |lhs , rhs | # 2 or more fields
212
+ splits << Split . new (
213
+ captures : [ ] ,
214
+ lhs : lhs ,
215
+ rhs : rhs ,
216
+ separator : delimiter
217
+ )
218
+ end
219
+ elsif delimiter == DEFAULT_DELIMITER && block == ACCEPT_ALL
220
+ # non-empty separators so -1 is safe
221
+
222
+ if @remove_empty_fields
223
+ result = [ ]
224
+ string . split ( delimiter , -1 ) do |field |
225
+ result << field unless it . empty?
226
+ end
227
+ else
228
+ result = string . split ( delimiter , -1 )
229
+ end
214
230
215
- if splits . empty?
216
- result = string . empty? ? [ ] : [ string ]
217
231
return [ result ]
232
+ else
233
+ splits = parse ( string , delimiter )
218
234
end
219
235
220
- block ||= positions ? compile ( positions , action , splits . length ) : ACCEPT_ALL
221
- [ [ ] , splits , splits . length , block ]
236
+ count = splits . length
237
+
238
+ return [ [ string ] ] if count . zero?
239
+
240
+ block ||= compile ( positions , action , count )
241
+ [ [ ] , splits , count , block ]
222
242
end
223
243
224
244
def render ( values )
@@ -227,6 +247,7 @@ def render(values)
227
247
value . empty? && @remove_empty_fields ? REMOVE : [ value ]
228
248
elsif @include_captures
229
249
if @spread_captures
250
+ # TODO make sure compact can return a Capture
230
251
@spread_captures == :compact ? value . compact : value
231
252
elsif value . empty?
232
253
# we expose non-captures (string delimiters or regexps with no
@@ -247,7 +268,7 @@ def render(values)
247
268
# the delimiter, returning an array of objects (hashes) representing each split.
248
269
# e.g. for:
249
270
#
250
- # parse.split ("foo:bar:baz:quux", ":")
271
+ # parse("foo:bar:baz:quux", ":")
251
272
#
252
273
# we return:
253
274
#
@@ -258,6 +279,7 @@ def render(values)
258
279
# ]
259
280
#
260
281
def parse ( string , delimiter )
282
+ # has_names = delimiter.is_a?(Regexp) && !delimiter.names.empty?
261
283
result = [ ]
262
284
start = 0
263
285
@@ -273,21 +295,23 @@ def parse(string, delimiter)
273
295
next if separator . empty? && ( index . zero? || after == string . length )
274
296
275
297
lhs = string . slice ( start , index - start )
276
- result . last [ : rhs] = lhs unless result . empty?
298
+ result . last . rhs = lhs unless result . empty?
277
299
278
300
# this is correct for the last/only match, but gets updated to the next
279
301
# match's lhs for other matches
280
302
rhs = match . post_match
281
303
282
- result << {
304
+ # captures = (has_names ? Captures.new(match) : match.captures)
305
+
306
+ result << Split . new (
283
307
captures : match . captures ,
284
308
lhs : lhs ,
285
309
rhs : rhs ,
286
- separator : separator ,
287
- }
310
+ separator : separator
311
+ )
288
312
289
- # move the start index (the start of the next lhs) to the index after the
290
- # last character of the separator
313
+ # advance the start index (the start of the next lhs) to the position
314
+ # after the last character of the separator
291
315
start = after
292
316
end
293
317
@@ -297,8 +321,8 @@ def parse(string, delimiter)
297
321
# returns a lambda which splits at (i.e. accepts or rejects splits at, depending
298
322
# on the action) the supplied positions
299
323
#
300
- # positions are preprocessed to support additional features: negative
301
- # ranges, infinite ranges, and descending ranges, e.g.:
324
+ # positions are preprocessed to support negative indices, infinite ranges, and
325
+ # descending ranges, e.g.:
302
326
#
303
327
# ss.split("foo:bar:baz:quux", ":", at: -1)
304
328
#
@@ -309,9 +333,8 @@ def parse(string, delimiter)
309
333
# and
310
334
#
311
335
# ss.split("1:2:3:4:5:6:7:8:9", ":", -3..)
312
- # ss.split("1:2:3:4:5:6:7:8:9", ":", -3..)
313
336
#
314
- # translate to:
337
+ # translates to:
315
338
#
316
339
# ss.split("foo:bar:baz:quux", ":", at: 6..8)
317
340
#
0 commit comments