Skip to content

Commit

Permalink
feat(regexp): support for more metacharacters
Browse files Browse the repository at this point in the history
  • Loading branch information
Water-Melon committed Mar 1, 2024
1 parent 1a106d0 commit 3eedca3
Show file tree
Hide file tree
Showing 4 changed files with 76 additions and 27 deletions.
10 changes: 8 additions & 2 deletions docs/Melon Developer Guide.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1594,9 +1594,15 @@ Their definitions can be found in melon/include/mln_types.h.
Match a nonnumeric character.
m) \n
Just like '\n' in C.
n) \a
n) \w
Match a letter.
o) \\
o) \W
Match a non-letter character.
p) \s
Match a whitespace.
q) \S
Match a non-whitespace.
r) \\
Just like '\\' in C.

21) JSON
Expand Down
41 changes: 22 additions & 19 deletions include/mln_regexp.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,25 +16,28 @@ typedef mln_array_t mln_reg_match_result_t;
#define M_REGEXP_MASK_NEW ((unsigned int)0x02000000)
#define M_REGEXP_SPECIAL_MASK ((unsigned int)0x0000ffff)

#define M_REGEXP_ALPHA 128
#define M_REGEXP_NUM 129
#define M_REGEXP_NOT_NUM 130
#define M_REGEXP_SEPARATOR 160

#define M_REGEXP_LBRACE 161
#define M_REGEXP_RBRACE 162
#define M_REGEXP_LPAR 163
#define M_REGEXP_RPAR 164
#define M_REGEXP_LSQUAR 165
#define M_REGEXP_RSQUAR 166
#define M_REGEXP_XOR 167
#define M_REGEXP_STAR 168
#define M_REGEXP_DOLL 169
#define M_REGEXP_DOT 170
#define M_REGEXP_QUES 171
#define M_REGEXP_PLUS 172
#define M_REGEXP_SUB 173
#define M_REGEXP_OR 174
#define M_REGEXP_ALPHA 128
#define M_REGEXP_NOT_ALPHA 129
#define M_REGEXP_NUM 130
#define M_REGEXP_NOT_NUM 131
#define M_REGEXP_WHITESPACE 132
#define M_REGEXP_NOT_WHITESPACE 133
#define M_REGEXP_SEPARATOR 160

#define M_REGEXP_LBRACE 161
#define M_REGEXP_RBRACE 162
#define M_REGEXP_LPAR 163
#define M_REGEXP_RPAR 164
#define M_REGEXP_LSQUAR 165
#define M_REGEXP_RSQUAR 166
#define M_REGEXP_XOR 167
#define M_REGEXP_STAR 168
#define M_REGEXP_DOLL 169
#define M_REGEXP_DOT 170
#define M_REGEXP_QUES 171
#define M_REGEXP_PLUS 172
#define M_REGEXP_SUB 173
#define M_REGEXP_OR 174


#define mln_reg_match_result_new(prealloc) mln_array_new(NULL, sizeof(mln_string_t), (prealloc))
Expand Down
9 changes: 5 additions & 4 deletions include/mln_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -150,10 +150,11 @@ extern int socketpair(int domain, int type, int protocol, int sv[2]);
#define mln_socket_close close
#endif

#define mln_isalpha(x) (((x) >= 'a' && (x) <= 'z') || ((x) >= 'A' && (x) <= 'Z'))
#define mln_isdigit(x) ((x) >= '0' && (x) <= '9')
#define mln_isascii(x) ((x) >= 0 && (x) <= 127)
#define mln_isprint(x) ((x) >= 32 && (x) <= 126)
#define mln_isalpha(x) (((x) >= 'a' && (x) <= 'z') || ((x) >= 'A' && (x) <= 'Z'))
#define mln_isdigit(x) ((x) >= '0' && (x) <= '9')
#define mln_isascii(x) ((x) >= 0 && (x) <= 127)
#define mln_isprint(x) ((x) >= 32 && (x) <= 126)
#define mln_iswhitespace(x) ((x) == ' ' || (x) == '\t' || (x) == '\n' || (x) == '\f' || (x) == '\r' || (x) == '\v')

#define MLN_AUTHOR "Niklaus F.Schen"

Expand Down
43 changes: 41 additions & 2 deletions src/mln_regexp.c
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,36 @@ MLN_FUNC(static, int, mln_match_here, \
reglen -= len_0;
goto again;
}
if (c_0 == M_REGEXP_NOT_ALPHA && textlen > 0) {
if (mln_isalpha(*text)) {
return -1;
}
++text;
--textlen;
regexp += len_0;
reglen -= len_0;
goto again;
}
if (c_0 == M_REGEXP_WHITESPACE && textlen > 0) {
if (!mln_iswhitespace(*text)) {
return -1;
}
++text;
--textlen;
regexp += len_0;
reglen -= len_0;
goto again;
}
if (c_0 == M_REGEXP_NOT_WHITESPACE && textlen > 0) {
if (mln_iswhitespace(*text)) {
return -1;
}
++text;
--textlen;
regexp += len_0;
reglen -= len_0;
goto again;
}
if (c_0 == M_REGEXP_DOT && textlen > 0) {
regexp += len_0;
++text;
Expand Down Expand Up @@ -773,12 +803,18 @@ MLN_FUNC(static inline, int, mln_get_char, (unsigned int flag, char *s, int len)
return '|';
case '\\':
return '\\';
case 'a':
case 'w':
return M_REGEXP_ALPHA;
case 'W':
return M_REGEXP_NOT_ALPHA;
case 'd':
return M_REGEXP_NUM;
case 'D':
return M_REGEXP_NOT_NUM;
case 's':
return M_REGEXP_WHITESPACE;
case 'S':
return M_REGEXP_NOT_WHITESPACE;
default:
return s[1];
}
Expand Down Expand Up @@ -829,9 +865,12 @@ MLN_FUNC(static inline, int, mln_get_length, (char *s, int len), (s, len), {
case 't':
case '-':
case '|':
case 'a':
case 'w':
case 'W':
case 'd':
case 'D':
case 's':
case 'S':
case '\\':
default:
return 2;
Expand Down

0 comments on commit 3eedca3

Please sign in to comment.