Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add new operator \J (skip search) #278 #299

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 61 additions & 4 deletions src/regexec.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
regexec.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2022 K.Kosako
* Copyright (c) 2002-2024 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
Expand Down Expand Up @@ -177,6 +177,9 @@ typedef struct {
#ifdef USE_CALL
unsigned long subexp_call_in_search_counter;
#endif
#ifdef USE_SKIP_SEARCH
UChar* skip_search;
#endif
} MatchArg;


Expand Down Expand Up @@ -1261,6 +1264,7 @@ struct OnigCalloutArgsStruct {
#endif

#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
#ifdef USE_SKIP_SEARCH
#define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mpv) do { \
(msa).stack_p = (void* )0;\
(msa).options = (arg_option)|(reg)->options;\
Expand All @@ -1272,6 +1276,35 @@ struct OnigCalloutArgsStruct {
(msa).mp = mpv;\
(msa).best_len = ONIG_MISMATCH;\
(msa).ptr_num = PTR_NUM_SIZE(reg);\
(msa).skip_search = (UChar* )(arg_start);\
} while(0)
#else
#define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mpv) do { \
(msa).stack_p = (void* )0;\
(msa).options = (arg_option)|(reg)->options;\
(msa).region = (arg_region);\
(msa).start = (arg_start);\
(msa).match_stack_limit = (mpv)->match_stack_limit;\
RETRY_IN_MATCH_ARG_INIT(msa,mpv)\
SUBEXP_CALL_IN_MATCH_ARG_INIT(msa,mpv)\
(msa).mp = mpv;\
(msa).best_len = ONIG_MISMATCH;\
(msa).ptr_num = PTR_NUM_SIZE(reg);\
} while(0)
#endif
#else
#ifdef USE_SKIP_SEARCH
#define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mpv) do { \
(msa).stack_p = (void* )0;\
(msa).options = (arg_option)|(reg)->options;\
(msa).region = (arg_region);\
(msa).start = (arg_start);\
(msa).match_stack_limit = (mpv)->match_stack_limit;\
RETRY_IN_MATCH_ARG_INIT(msa,mpv)\
SUBEXP_CALL_IN_MATCH_ARG_INIT(msa,mpv)\
(msa).mp = mpv;\
(msa).ptr_num = PTR_NUM_SIZE(reg);\
(msa).skip_search = (UChar* )(arg_start);\
} while(0)
#else
#define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mpv) do { \
Expand All @@ -1286,6 +1319,7 @@ struct OnigCalloutArgsStruct {
(msa).ptr_num = PTR_NUM_SIZE(reg);\
} while(0)
#endif
#endif

#define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p)

Expand Down Expand Up @@ -4335,6 +4369,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
case UPDATE_VAR_RIGHT_RANGE_INIT:
INIT_RIGHT_RANGE;
break;
#ifdef USE_SKIP_SEARCH
case UPDATE_VAR_SKIP_SEARCH:
if (s > msa->skip_search) msa->skip_search = s;
break;
#endif
}
}
INC_OP;
Expand Down Expand Up @@ -5629,6 +5668,9 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end,
while (s <= high) {
MATCH_AND_RETURN_CHECK(data_range);
s += enclen(reg->enc, s);
#ifdef USE_SKIP_SEARCH
if (s < msa.skip_search) s = msa.skip_search;
#endif
}
} while (s < range);
goto mismatch;
Expand All @@ -5646,10 +5688,18 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end,
prev = s;
s += enclen(reg->enc, s);

while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && s < range) {
prev = s;
s += enclen(reg->enc, s);
#ifdef USE_SKIP_SEARCH
if (s < msa.skip_search) s = msa.skip_search;
else {
#endif
while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) &&
s < range) {
prev = s;
s += enclen(reg->enc, s);
}
#ifdef USE_SKIP_SEARCH
}
#endif
}
goto mismatch;
}
Expand All @@ -5660,6 +5710,13 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end,
MATCH_AND_RETURN_CHECK(data_range);
if (s >= range) break;
s += enclen(reg->enc, s);

#ifdef USE_SKIP_SEARCH
if (s < msa.skip_search) {
s = msa.skip_search;
if (s > range) break;
}
#endif
}
}
else { /* backward search */
Expand Down
6 changes: 5 additions & 1 deletion src/regint.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
regint.h - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2023 K.Kosako
* Copyright (c) 2002-2024 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
Expand Down Expand Up @@ -62,6 +62,7 @@
#define USE_REGSET
#define USE_CALL
#define USE_CALLOUT
#define USE_SKIP_SEARCH
#define USE_BACKREF_WITH_LEVEL /* \k<name+n>, \k<name-n> */
#define USE_WHOLE_OPTIONS
#define USE_RIGID_CHECK_CAPTURES_IN_EMPTY_REPEAT /* /(?:()|())*\2/ */
Expand Down Expand Up @@ -584,6 +585,9 @@ enum UpdateVarType {
UPDATE_VAR_RIGHT_RANGE_FROM_S_STACK = 3,
UPDATE_VAR_RIGHT_RANGE_TO_S = 4,
UPDATE_VAR_RIGHT_RANGE_INIT = 5,
#ifdef USE_SKIP_SEARCH
UPDATE_VAR_SKIP_SEARCH = 6,
#endif
};

enum CheckPositionType {
Expand Down
27 changes: 27 additions & 0 deletions src/regparse.c
Original file line number Diff line number Diff line change
Expand Up @@ -2781,6 +2781,16 @@ node_new_keep(Node** node, ParseEnv* env)
return ONIG_NORMAL;
}

#ifdef USE_SKIP_SEARCH
static int
node_new_skip_search(Node** node, ParseEnv* env)
{
int r;
r = node_new_update_var_gimmick(node, UPDATE_VAR_SKIP_SEARCH, 0, env);
return r;
}
#endif

#ifdef USE_CALLOUT

extern void
Expand Down Expand Up @@ -4526,6 +4536,9 @@ enum TokenSyms {
TK_QUOTE_OPEN,
TK_CHAR_PROPERTY, /* \p{...}, \P{...} */
TK_KEEP, /* \K */
#ifdef USE_SKIP_SEARCH
TK_SKIP_SEARCH, /* \J */
#endif
TK_GENERAL_NEWLINE, /* \R */
TK_NO_NEWLINE, /* \N */
TK_TRUE_ANYCHAR, /* \O */
Expand Down Expand Up @@ -5743,6 +5756,13 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ParseEnv* env)
tok->type = TK_KEEP;
break;

#ifdef USE_SKIP_SEARCH
case 'J':
if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP)) break;
tok->type = TK_SKIP_SEARCH;
break;
#endif

case 'R':
if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE)) break;
tok->type = TK_GENERAL_NEWLINE;
Expand Down Expand Up @@ -9093,6 +9113,13 @@ prs_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
if (r < 0) return r;
break;

#ifdef USE_SKIP_SEARCH
case TK_SKIP_SEARCH:
r = node_new_skip_search(np, env);
if (r < 0) return r;
break;
#endif

case TK_GENERAL_NEWLINE:
r = node_new_general_newline(np, env);
if (r < 0) return r;
Expand Down
Loading