forked from kean/Regex
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgrammar.ebnf
119 lines (88 loc) · 2.89 KB
/
grammar.ebnf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
Regex ::= StartOfStringAnchor? Expression
Expression ::= Subexpression ("|" Expression)?
/* Anything that can be on one side of the alternation. */
Subexpression ::= SubexpressionItem+
SubexpressionItem
::= Match
| Group
| Anchor
| Backreference
/* Grouping Constructs
------------------------------------------------------------------*/
Group ::= "(" GroupNonCapturingModifier? Expression ")" Quantifier?
GroupNonCapturingModifier ::= "?:"
/* Match
------------------------------------------------------------------*/
Match ::= MatchItem Quantifier?
MatchItem
::= MatchAnyCharacter
| MatchCharacterClass
| MatchCharacter
MatchAnyCharacter ::= "."
MatchCharacterClass
::= CharacterGroup
| CharacterClass
| CharacterClassFromUnicodeCategory
MatchCharacter ::= Char
/* Character Classes
------------------------------------------------------------------*/
CharacterGroup ::= "[" CharacterGroupNegativeModifier? CharacterGroupItem+ "]"
CharacterGroupNegativeModifier ::= "^"
CharacterGroupItem
::= CharacterClass
| CharacterClassFromUnicodeCategory
| CharacterRange
| Char /* excluding ] */
CharacterClass
::= CharacterClassAnyWord
| CharacterClassAnyWordInverted
| CharacterClassAnyDecimalDigit
| CharacterClassAnyDecimalDigitInverted
CharacterClassAnyWord ::= "\w"
CharacterClassAnyWordInverted ::= "\W"
CharacterClassAnyDecimalDigit ::= "\d"
CharacterClassAnyDecimalDigitInverted ::= "\D"
CharacterClassFromUnicodeCategory ::= "\p{" UnicodeCategoryName "}"
UnicodeCategoryName ::= Letters
CharacterRange ::= Char ("-" Char)?
/* Quantifiers
------------------------------------------------------------------*/
Quantifier ::= QuantifierType LazyModifier?
QuantifierType
::= ZeroOrMoreQuantifier
| OneOrMoreQuantifier
| ZeroOrOneQuantifier
| RangeQuantifier
LazyModifier ::= "?"
ZeroOrMoreQuantifier ::= "*"
OneOrMoreQuantifier ::= "+"
ZeroOrOneQuantifier ::= "?"
RangeQuantifier ::= "{" RangeQuantifierLowerBound ( "," RangeQuantifierUpperBound? )? "}"
RangeQuantifierLowerBound ::= Integer
RangeQuantifierUpperBound ::= Integer
/* Backreferences
------------------------------------------------------------------*/
Backreference ::= "\" Integer
/* Anchors
------------------------------------------------------------------*/
StartOfStringAnchor ::= "^"
Anchor
::= AnchorWordBoundary
| AnchorNonWordBoundary
| AnchorStartOfStringOnly
| AnchorEndOfStringOnlyNotNewline
| AnchorEndOfStringOnly
| AnchorPreviousMatchEnd
| AnchorEndOfString
AnchorWordBoundary ::= "\b"
AnchorNonWordBoundary ::= "\B"
AnchorStartOfStringOnly ::= "\A"
AnchorEndOfStringOnlyNotNewline ::= "\z"
AnchorEndOfStringOnly ::= "\Z"
AnchorPreviousMatchEnd ::= "\G"
AnchorEndOfString ::= "$"
/* Misc
------------------------------------------------------------------*/
Integer ::= [0-9]+
Letters ::= [a-zA-Z]+
Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]