-
Notifications
You must be signed in to change notification settings - Fork 3
/
user_syntax.text
162 lines (141 loc) · 7.94 KB
/
user_syntax.text
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
A User-Friendly Syntax for Core SHACL
This is a user-friendly syntax for the core of SHACL that tries to align
directly with my SHACL RDF syntax. The grammar is compatible with ANTLR.
It would be possible to modify the syntax to come up with a more standard
treatment of conjunction, disjunction, and negation but that syntax would
not align as closely with the RDF syntax.
Example Description
∈ ex:Person ⊩ |≤27| // there are at most 27 people
¹ex:child ⊩ ∋ ex:john // child subjects include John
¹ex:child ⊩ ∈ ex:Person ∧ IRI // ... are people and not blank
ex:child² ⊩ ∈ ex:Person // child objects are people
∈ ex:Patriot ⊩ ⋹ ex:Citizen // patriots are directly citizens
ex:password² ⊩ ^^xs:string ∧ ℓ≤24 ∧ ℓ≥8 // passwords are between 8 and 24 long
ex:age² ⊩ ^^xs:integer ∧ ≥0 // ages are non-negative integers
ex:john ⊩ ex:name ∝ "^John.*" ★ // John's name starts with John
ex:mstatus² ⊩ ∈ { ex:single ex:married ex:divorced }
// three marital statuses only
ex:Person ⊩ ex:mstatus ∝ |=1| // people have one marital status
ex:Person ⊩ ex:mstatus ∝ ∋ ex:married → ex:spouse ∝ |≥1|
// married people have a spouse
ex:Person ⊩ ex:mstatus ∝ ∋ ex:single → ex:spouse ∝ |≤0|
// single people don't have a spouse
∈ ex:Person ⊩ ex:spouse ∅ ex:child ∧ // people can't marry their children
ex:child ex:age ≤ ex:age ∧ // ... are older than their children
ex:age ≤ ex:child⁻¹ ex:age ∧ // ... are younger than their parents
ex:child⁻¹ ex:child ∝ |≤9| ∧ // ... have at most 8 siblings
ex:name ∝ ∈ rdf:langString ∧ // ... names are lang-tagged strings
ex:name ∝ ➀ // ... have only one name per language
∈ ex:Person ⊩
( ( ex:spouse ∝ |≤0| → ex:mstatus ∝ ( |≥1| ∧ ∈{ex:single ex:divorced} ) ) ∖
( ex:spouse ∝ |≤1| → ex:mstatus ∝ ( |≥1| ∧ ∈{ex:married} ) ) ∖
|≤3| ∖ ) // people with no spouse are single or divorced
// people with one spouse are married
// and there are at most three people left over
∈ ex:Isolated ⊩ ⟦ rdf:type ⟧ // isolated nodes have only types
∈ ex:nonIsolated ⊩ ¬ ⟦ rdf:type ⟧ // non-isolated nodes have other values
sh:partShape ≡ (IRI ∨ sh:inverse ∝ IRI) // parts are properties or inverses
sh:pathShape ≡ ∈ sh:path ⊩ ( sh:partShape ∨ ⦇ sh:partShape ⦈ )
// paths are parts or lists of parts
Grammar Meaning
shaclDoc : statement* EOF ;
statement : prefix | define | scoping ;
prefix : '@prefix' PNAME_NS IRIREF '.' ;
define : iri '≡' ( scope ( '∪' scope )* '⊩' )? shape '.' ; // name shape
scoping : scope ( '∪' scope )* '⊩' shape '.' ; // nodes in scope validate
scope : value #svalue // the value
| '∈' clss #sclass // SHACL instances of class
| '¹' proprty #spsubject // subjects of property
| proprty '²' #spobject // objects of property
| '¹' '?' #sasubject // all subjects
| '?' '²' #saobject // all objects
;
shape : ( filtr ( '∧' filtr )* '→' )? component ( '∧' component )* ;
// set of nodes that validate against all filtrs
// validates against each component
filtr : component ; // nodes that validate vs shape
component : iri #cname // validate against named shape
| '∈' clss ( '∪' clss )* #cclass // SHACL instance of some class
| '^^' datatype ( '∪' datatype )* #cdatatype // has one of datatypes
| '∈' '{' value* '}' #cin // is one of values
| '⋹' clss #cdirect // has rdf:type of class
| 'ℓ' '≤' UNSIGNED_INTEGER #cmaxLength // maximum string length
| 'ℓ' '≥' UNSIGNED_INTEGER #cminLength // minimum string length
| '>' literal #cminExcl // exclusive minimum
| '≥' literal #cminIncl // inclusive minimum
| '<' literal #cmaxExcl // exclusive maximum
| '≤' literal #cmaxIncl // inclusive maximum
| 'IRI' #cnkIRI
| 'Literal' #cnkLit
| 'BlankNode' #cnkBlank // kind of node
| regex '★' ( string )? #cpattern // matches pattern (with flags)
| path '=' path #cequals // path values the same
| path '∅' path #cdisjoint // path values disjoint
| path '<' path #clt // path1 values < path2 values
| path '≤' path #clte // path1 values ≤ path2 values
| '⟦' pathpart * '⟧' #cclosed // no other property has values
| '∋' value #chasValue // set contains value
| '|' '≥' UNSIGNED_INTEGER '|' #cminCount // minimum size of set
| '|' '≤' UNSIGNED_INTEGER '|' #cmaxCount // maximum size of set
| '|' '=' UNSIGNED_INTEGER '|' #cexactCount// exact size of set
| '➀' #cuniqueLang// only one value per language
| '¬' component #cnot // doesn't validate ag. comp.
| path '∝' component #cpathValues// path values in shape
| '⦇' shape '⦈' #clist // list members in shape
| '(' shape ')' #cshape // validate against shape
| '(' component ('∨'component)+ ')'#cor // validate against one or more
| '(' ( component '∖' ) + ')' #cpartition // partition - see below
; // The initial remnant is the entire set being validated.
// The next remnant is the subset of the current one that fails
// to validate against the filter of the respective component.
// The final remnant is empty.
// Each remnant validates against the respective component.
path : pathpart + ; // composition
pathpart : proprty #pprop
| proprty '⁻¹' #pinv // inverse of property
;
clss : iri ; // a class
datatype : iri ; // a datatype
proprty : iri ; // a property
value : iri | literal ; // object
regex : string ; // regular expression
// Productions from Turtle grammar
literal : rdfLiteral | numericLiteral | booleanLiteral ;
rdfLiteral : string ( LANGTAG | '^^' iri )? ;
numericLiteral : integer | DECIMAL | DOUBLE ;
booleanLiteral : 'true' | 'false' ;
string : STRING_LITERAL_QUOTE | STRING_LITERAL_SINGLE_QUOTE
| STRING_LITERAL_LONG_SINGLE_QUOTE | STRING_LITERAL_LONG_QUOTE ;
iri : IRIREF | prefixedName ;
prefixedName : PNAME_LN | PNAME_NS ;
integer : UNSIGNED_INTEGER | SIGNED_INTEGER ;
IRIREF : '<' (~[\u0000-\u0020<>"{}|^`\\] | UCHAR)* '>' ;
PNAME_NS : PN_PREFIX? ':' ;
PNAME_LN : PNAME_NS PN_LOCAL ;
LANGTAG : '@' [a-zA-Z]+ ('-' [a-zA-Z0-9]+)* ;
//INTEGER : ('+'|'-')? [0-9]+ ; // split in two
SIGNED_INTEGER : ('+'|'-') [0-9]+ ;
UNSIGNED_INTEGER : [0-9]+ ;
DECIMAL : ('+'|'-')? [0-9]* '.' [0-9]+ ;
DOUBLE : ('+'|'-')? ( [0-9]+'.'[0-9]* EXPONENT | '.'[0-9]+ EXPONENT |
[0-9]+ EXPONENT ) ;
fragment EXPONENT : [eE] ('+'|'-')? [0-9]+ ;
STRING_LITERAL_QUOTE : '"' (~[\u0022\u005C\u000A\u000D] | ECHAR | UCHAR)* '"' ;
STRING_LITERAL_SINGLE_QUOTE : '\'' (~[\u0027\u005C\u000A\u000D] | ECHAR | UCHAR)* '\'' ;
STRING_LITERAL_LONG_SINGLE_QUOTE:'\'\'\'' (('\''|'\'\'')?(~['\\]|ECHAR|UCHAR))* '\'\'\'' ;
STRING_LITERAL_LONG_QUOTE : '"""' (('"' | '""')? (~["\\] | ECHAR | UCHAR))* '"""' ;
fragment UCHAR : '\\u' HEX HEX HEX HEX | '\\U' HEX HEX HEX HEX HEX HEX HEX HEX ;
fragment ECHAR : '\\' [tbnrf"'\\] ;
fragment PN_CHARS_BASE : [A-Za-z\u00C0-\u00D6\u00D8-\u00F6] |
[\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF] |
[\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF] |
[\uF900-\uFDCF\uFDF0-\uFFFD-\uEFFF\U00010000-\u000EFFFF] ;
fragment PN_CHARS_U : PN_CHARS_BASE | '_' ;
fragment PN_CHARS : PN_CHARS_U | '-' | [0-9\u00B7\u0300-\u036F\u203F-\u2040] ;
fragment PN_PREFIX : PN_CHARS_BASE ((PN_CHARS | '.')* PN_CHARS)? ;
fragment PN_LOCAL : (PN_CHARS_U | [:0-9] | PLX) ((PN_CHARS | '.' | ':' | PLX)* (PN_CHARS | ':' | PLX))? ;
fragment PLX : PERCENT | PN_LOCAL_ESC ;
fragment PERCENT : '%' HEX HEX ;
fragment HEX : [0-9A-Fa-f] ;
fragment PN_LOCAL_ESC : '\\' [_~.-!$&'()*+,;=/?#@%] ;
WS : [ \t\r\n]+ -> skip ;