Skip to content

Commit 32a51dc

Browse files
thibaultchaagentzh
authored andcommitted
feature: implemented the split() method in the ngx.re module.
Implemented a new `ngx.re` module which implements the `split()` API described at openresty/lua-nginx-module#217 Signed-off-by: Yichun Zhang (agentzh) <[email protected]>
1 parent 57128f5 commit 32a51dc

File tree

5 files changed

+1118
-0
lines changed

5 files changed

+1118
-0
lines changed

README.markdown

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ Table of Contents
2929
* [ngx.balancer](#ngxbalancer)
3030
* [ngx.ssl](#ngxssl)
3131
* [ngx.ssl.session](#ngxsslsession)
32+
* [ngx.re](#ngxre)
3233
* [Caveat](#caveat)
3334
* [TODO](#todo)
3435
* [Author](#author)
@@ -235,6 +236,15 @@ See the [documentation](./lib/ngx/ssl/session.md) for this Lua module for more d
235236

236237
[Back to TOC](#table-of-contents)
237238

239+
## ngx.re
240+
241+
This Lua module provides a Lua API which implements convenience utilities for
242+
the `ngx.re` API.
243+
244+
See the [documentation](./lib/ngx/re.md) for this Lua module for more details.
245+
246+
[Back to TOC](#table-of-contents)
247+
238248
Caveat
239249
======
240250

lib/ngx/re.lua

Lines changed: 196 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,196 @@
1+
-- I hereby assign copyright in this code to the lua-resty-core project,
2+
-- to be licensed under the same terms as the rest of the code.
3+
4+
5+
local ffi = require 'ffi'
6+
local bit = require "bit"
7+
local base = require "resty.core.base"
8+
local core_regex = require "resty.core.regex"
9+
10+
11+
local C = ffi.C
12+
local sub = string.sub
13+
local type = type
14+
local band = bit.band
15+
local new_tab = base.new_tab
16+
local tostring = tostring
17+
local math_max = math.max
18+
local math_min = math.min
19+
local re_match_compile = core_regex.re_match_compile
20+
local destroy_compiled_regex = core_regex.destroy_compiled_regex
21+
22+
23+
local FLAG_DFA = 0x02
24+
local PCRE_ERROR_NOMATCH = -1
25+
local DEFAULT_SPLIT_RES_SIZE = 4
26+
27+
28+
local split_ctx = new_tab(0, 1)
29+
30+
31+
local _M = { version = base.version }
32+
33+
34+
local function re_split_helper(subj, compiled, compile_once, flags, ctx)
35+
local rc
36+
do
37+
local pos = math_max(ctx.pos - 1, 0)
38+
39+
rc = C.ngx_http_lua_ffi_exec_regex(compiled, flags, subj, #subj, pos)
40+
end
41+
42+
if rc == PCRE_ERROR_NOMATCH then
43+
if not compile_once then
44+
destroy_compiled_regex(compiled)
45+
end
46+
return nil, nil, nil
47+
end
48+
49+
if rc < 0 then
50+
if not compile_once then
51+
destroy_compiled_regex(compiled)
52+
end
53+
return nil, nil, nil, "pcre_exec() failed: " .. rc
54+
end
55+
56+
if rc == 0 then
57+
if band(flags, FLAG_DFA) == 0 then
58+
return nil, nil, nil, "capture size too small"
59+
end
60+
61+
rc = 1
62+
end
63+
64+
local caps = compiled.captures
65+
local ncaps = compiled.ncaptures
66+
67+
local from = caps[0] + 1
68+
local to = caps[1]
69+
70+
if from < 0 or to < 0 then
71+
return nil, nil, nil
72+
end
73+
74+
ctx.pos = to + 1
75+
76+
-- retrieve the first sub-match capture if any
77+
78+
if ncaps > 0 and rc > 1 then
79+
return from, to, sub(subj, caps[2] + 1, caps[3])
80+
end
81+
82+
return from, to
83+
end
84+
85+
86+
function _M.split(subj, regex, opts, ctx, max, res)
87+
-- we need to cast this to strings to avoid exceptions when they are
88+
-- something else.
89+
-- needed because of further calls to string.sub in this function.
90+
subj = tostring(subj)
91+
92+
if not ctx then
93+
ctx = split_ctx
94+
ctx.pos = 1 -- set or reset upvalue field
95+
96+
elseif not ctx.pos then
97+
-- ctx provided by user but missing pos field
98+
ctx.pos = 1
99+
end
100+
101+
max = max or 0
102+
103+
if not res then
104+
-- limit the initial arr_n size of res to a reasonable value
105+
-- 0 < narr <= DEFAULT_SPLIT_RES_SIZE
106+
local narr = DEFAULT_SPLIT_RES_SIZE
107+
if max > 0 then
108+
-- the user specified a valid max limiter if max > 0
109+
narr = math_min(narr, max)
110+
end
111+
112+
res = new_tab(narr, 0)
113+
114+
elseif type(res) ~= "table" then
115+
return error("res is not a table", 2)
116+
end
117+
118+
-- compile regex
119+
120+
local compiled, compile_once, flags = re_match_compile(regex, opts)
121+
if compiled == nil then
122+
-- compiled_once holds the error string
123+
return nil, compile_once
124+
end
125+
126+
local sub_idx = ctx.pos
127+
local res_idx = 0
128+
129+
-- splitting: with and without a max limiter
130+
131+
if max > 0 then
132+
local count = 1
133+
134+
while count < max do
135+
local from, to, capture, err = re_split_helper(subj, compiled,
136+
compile_once, flags, ctx)
137+
if err then
138+
return nil, err
139+
end
140+
141+
if not from then
142+
break
143+
end
144+
145+
count = count + 1
146+
res_idx = res_idx + 1
147+
res[res_idx] = sub(subj, sub_idx, from - 1)
148+
149+
if capture then
150+
res_idx = res_idx + 1
151+
res[res_idx] = capture
152+
end
153+
154+
sub_idx = to + 1
155+
end
156+
157+
if count == max then
158+
if not compile_once then
159+
destroy_compiled_regex(compiled)
160+
end
161+
end
162+
163+
else
164+
while true do
165+
local from, to, capture, err = re_split_helper(subj, compiled,
166+
compile_once, flags, ctx)
167+
if err then
168+
return nil, err
169+
end
170+
171+
if not from then
172+
break
173+
end
174+
175+
res_idx = res_idx + 1
176+
res[res_idx] = sub(subj, sub_idx, from - 1)
177+
178+
if capture then
179+
res_idx = res_idx + 1
180+
res[res_idx] = capture
181+
end
182+
183+
sub_idx = to + 1
184+
end
185+
end
186+
187+
-- trailing nil for non-cleared res tables
188+
189+
res[res_idx + 1] = sub(subj, sub_idx, #subj)
190+
res[res_idx + 2] = nil
191+
192+
return res
193+
end
194+
195+
196+
return _M

0 commit comments

Comments
 (0)