v. 1.0.1

foo123 · Aug 6, 2015 · bf90a3b · bf90a3b
1 parent ea0d4be
commit bf90a3b
Show file tree

Hide file tree

Showing 9 changed files with 206 additions and 104 deletions.
diff --git a/README.md b/README.md
@@ -10,7 +10,7 @@ A simple and light-weight (~ 20kB minified, ~ 8kB zipped) [CodeMirror](https://g
 to generate syntax-highlight parsers (codemirror modes) from a grammar specification in JSON format.
 
 
-See also:  [ace-grammar](https://github.com/foo123/ace-grammar) , [prism-grammar](https://github.com/foo123/prism-grammar)
+See also: [ace-grammar](https://github.com/foo123/ace-grammar) , [prism-grammar](https://github.com/foo123/prism-grammar)
 
 
 ###Contents
@@ -33,12 +33,12 @@ Code Indentation is Codemirror default, looking for ways to add more elaborate i
 
 ###Features
 
-* A grammar can **extend other grammars** (so arbitrary variations and dialects can be parsed more easily)
-* [`Grammar`](/grammar-reference.md) includes: `Style` Model , `Lex` Model and `Syntax` Model (optional), plus a couple of *settings* (see examples)
-* `Grammar` **specification can be minimal** (defaults will be used) (see example grammars)
-* [`Grammar Syntax Model`](/grammar-reference.md) can enable highlight in a more context-specific way, plus detect possible *syntax errors*
-* [`Grammar Syntax Model`](/grammar-reference.md) can contain *recursive references* (see `/test/grammar-js-recursion.html`)
-* [`Grammar Syntax Model`](/grammar-reference.md) can be specificed using [`PEG`](https://en.wikipedia.org/wiki/Parsing_expression_grammar)-like notation or [`BNF`](https://en.wikipedia.org/wiki/Backus%E2%80%93Naur_Form)-like notation  (**NEW feature**)
+* A `Grammar` can **extend other `Grammars`** (so arbitrary `variations` and `dialects` can be handled more easily)
+* [`Grammar`](/grammar-reference.md) includes: **`Style Model`** , **`Lex Model`** and **`Syntax Model`** (optional), plus a couple of *settings* (see examples)
+* **`Grammar` specification can be minimal** (defaults will be used) (see example grammars)
+* `Grammar.Syntax Model` can enable highlight in a more context-specific way, plus detect possible *syntax errors*
+* `Grammar.Syntax Model` can contain **recursive references** (see `/test/grammar-js-recursion.html`)
+* `Grammar.Syntax Model` can be (fully) specificed using [`PEG`](https://en.wikipedia.org/wiki/Parsing_expression_grammar)-like notation or [`BNF`](https://en.wikipedia.org/wiki/Backus%E2%80%93Naur_Form)-like notation  (**NEW feature**)
 * Generated highlight modes can support **toggle comments** and **keyword autocompletion** functionality if defined in the grammar
 * Generated highlight modes can support **lint-like syntax-annotation** functionality generated from the grammar
 * Generated parsers are **optimized for speed and size**

diff --git a/api-reference.md b/api-reference.md
@@ -23,9 +23,9 @@ __Method__: `extend`
 extendedgrammar = CodeMirrorGrammar.extend( grammar, basegrammar1 [, basegrammar2, ..] );
 ```
 
-Extend a grammar with basegrammar1, basegrammar2, etc..
+Extend a `grammar` with `basegrammar1`, `basegrammar2`, etc..
 
-This way arbitrary dialects and variations can be handled more easily
+This way arbitrary `dialects` and `variations` can be handled more easily
 
 
 
@@ -35,9 +35,9 @@ __Method__: `parse`
 parsedgrammar = CodeMirrorGrammar.parse( grammar );
 ```
 
-This is used internally by the CodeMirrorGrammar Class
-In order to parse a JSON grammar to a form suitable to be used by the syntax-highlight parser.
-However user can use this method to cache a parsedgrammar to be used later.
+This is used internally by the `CodeMirrorGrammar` Class
+In order to parse a `JSON grammar` to a form suitable to be used by the syntax-highlight parser.
+However user can use this method to cache a `parsedgrammar` to be used later.
 Already parsed grammars are NOT re-parsed when passed through the parse method again
 
 
@@ -48,7 +48,7 @@ __Method__: `getMode`
 mode = CodeMirrorGrammar.getMode( grammar [, DEFAULT] );
 ```
 
-This is the main method which transforms a JSON grammar into a CodeMirror syntax-highlight parser.
-DEFAULT is the default return value (null by default) for things that are skipped or not styled
+This is the main method which transforms a `JSON grammar` into a `CodeMirror` syntax-highlight parser.
+`DEFAULT` is the default return value (`null` by default) for things that are skipped or not styled
 In general there is no need to set this value, unless you need to return something else
 
diff --git a/beeld.config b/beeld.config
@@ -40,7 +40,7 @@ tasks =[{}]
 
             "@@ROOT@@" = "this"
             "@@EXPORTS@@" = "exports"
-            "@@VERSION@@" = "1.0"
+            "@@VERSION@@" = "1.0.1"
             "@@MODULE_NAME@@" = "CodeMirrorGrammar"
 
         @

diff --git a/build/codemirror_grammar.js b/build/codemirror_grammar.js
@@ -1,7 +1,7 @@
 /**
 *
 *   CodeMirrorGrammar
-*   @version: 1.0
+*   @version: 1.0.1
 *
 *   Transform a grammar specification in JSON format, into a syntax-highlight parser mode for CodeMirror
 *   https://github.com/foo123/codemirror-grammar
@@ -292,7 +292,7 @@ var undef = undefined, PROTO = 'prototype', HAS = 'hasOwnProperty', IS_ENUM = 'p
     },
 
     newline_re = /\r\n|\r|\n/g, dashes_re = /[\-_]/g, 
-    bnf_special_re = /^([{}()*+?|'"]|\s)/,
+    peg_bnf_notation_re = /^([{}()*+?|'"]|\s)/,
 
     has_prefix = function(s, id) {
         return (
@@ -1546,20 +1546,13 @@ CompositeToken = Class(Token, {
     }
 });
 
-function parse_bnf_shorthand( tok, Lex, Syntax, sub_seq )
+function parse_peg_bnf_notation( tok, Lex, Syntax )
 {
     var alternation, sequence, token, literal, repeat, 
-        t, q, c, prev_token, curr_token;
+        t, q, c, prev_token, curr_token, stack, tmp;
 
-    if ( 'undefined' === typeof tok.pos )
-    {
-        t = new String( trim(tok) );
-        t.pos = 0;
-    }
-    else
-    {
-        t = tok;
-    }
+    t = new String( trim(tok) );
+    t.pos = 0;
 
     if ( 1 === t.length )
     {
@@ -1569,15 +1562,16 @@ function parse_bnf_shorthand( tok, Lex, Syntax, sub_seq )
     }
     else
     {
+        // parse PEG/BNF-like shorthand notations for syntax groups
         alternation = [ ];
         sequence = [ ];
         token = '';
+        stack = [];
         while ( t.pos < t.length )
         {
-            // parse BNF-like shorthand notations for syntax groups
             c = t.charAt( t.pos++ );
 
-            if ( bnf_special_re.test( c ) )
+            if ( peg_bnf_notation_re.test( c ) )
             {
                 if ( token.length )
                 {
@@ -1591,7 +1585,7 @@ function parse_bnf_shorthand( tok, Lex, Syntax, sub_seq )
                     sequence.push( token );
                     token = '';
                 }
-                
+
                 if ( '"' === c || "'" === c )
                 {
                     // literal token, quoted
@@ -1705,24 +1699,57 @@ function parse_bnf_shorthand( tok, Lex, Syntax, sub_seq )
                 else if ( '(' === c )
                 {
                     // start of grouped sub-sequence
-                    prev_token = parse_bnf_shorthand( t, Lex, Syntax, true );
-                    curr_token = '(' + prev_token + ')';
-                    if ( !Syntax[curr_token] ) Syntax[curr_token] = clone( Lex[prev_token] || Syntax[prev_token] );
-                    sequence.push( curr_token );
+                    stack.push([sequence, alternation, token]);
+                    sequence = []; alternation = []; token = '';
                 }
 
                 else if ( ')' === c )
                 {
                     // end of grouped sub-sequence
-                    if ( sub_seq )
+                    if ( sequence.length > 1 )
                     {
-                        //t.pos++;
-                        break;
+                        curr_token = '' + sequence.join( " " );
+                        if ( !Syntax[curr_token] )
+                        {
+                            Syntax[curr_token] = {
+                                type: 'group',
+                                match: 'sequence',
+                                tokens: sequence
+                            };
+                        }
+                        alternation.push( curr_token );
                     }
-                    else
+                    else if ( sequence.length )
                     {
-                        continue;
+                        alternation.push( sequence[0] );
                     }
+                    sequence = [];
+
+                    if ( alternation.length > 1 )
+                    {
+                        curr_token = '' + alternation.join( " | " );
+                        if ( !Syntax[curr_token] )
+                        {
+                            Syntax[curr_token] = {
+                                type: 'group',
+                                match: 'either',
+                                tokens: alternation
+                            };
+                        }
+                    }
+                    else if ( alternation.length )
+                    {
+                        curr_token = alternation[ 0 ];
+                    }
+                    alternation = [];
+
+                    tmp = stack.pop( );
+                    sequence = tmp[0]; alternation = tmp[1]; token = tmp[2];
+
+                    prev_token = curr_token;
+                    curr_token = '(' + prev_token + ')';
+                    if ( !Syntax[curr_token] ) Syntax[curr_token] = clone( Lex[prev_token] || Syntax[prev_token] );
+                    sequence.push( curr_token );
                 }
 
                 else // space
@@ -1861,7 +1888,7 @@ function get_tokenizer( tokenID, RegExpID, Lex, Syntax, Style,
 
     if ( T_STR & get_type( tok ) ) 
     {
-        tok = parse_bnf_shorthand( tok, Lex, Syntax );
+        tok = parse_peg_bnf_notation( tok, Lex, Syntax );
         tok = Lex[ tok ] || Syntax[ tok ];
     }
 
@@ -1884,24 +1911,24 @@ function get_tokenizer( tokenID, RegExpID, Lex, Syntax, Style,
         // loop and get all references
     }
 
-    // provide some defaults
     if ( 'undefined' === typeof tok.type )
     {
-        if ( tok['either'] )
-        {
-            tok.type = "group";
-            tok.match = "either";
-            tok.tokens = tok['either'];
-            delete tok['either'];
-        }
-        else if ( tok['all'] || tok['sequence'] )
+        // provide some defaults
+        if ( tok['all'] || tok['sequence'] )
         {
             tok.type = "group";
             tok.match = "sequence";
             tok.tokens = tok['all'] || tok['sequence'];
             if ( tok['all'] ) delete tok['all'];
             else delete tok['sequence'];
         }
+        else if ( tok['either'] )
+        {
+            tok.type = "group";
+            tok.match = "either";
+            tok.tokens = tok['either'];
+            delete tok['either'];
+        }
         else if ( tok['zeroOrMore'] )
         {
             tok.type = "group";
@@ -1923,6 +1950,30 @@ function get_tokenizer( tokenID, RegExpID, Lex, Syntax, Style,
             tok.tokens = tok['zeroOrOne'];
             delete tok['zeroOrOne'];
         }
+        else if ( tok['comment'] )
+        {
+            tok.type = "comment";
+            tok.tokens = tok['comment'];
+            delete tok['comment'];
+        }
+        else if ( tok['block'] )
+        {
+            tok.type = "block";
+            tok.tokens = tok['block'];
+            delete tok['block'];
+        }
+        else if ( tok['escaped-block'] )
+        {
+            tok.type = "escaped-block";
+            tok.tokens = tok['escaped-block'];
+            delete tok['escaped-block'];
+        }
+        else if ( tok['simple'] )
+        {
+            tok.type = "simple";
+            tok.tokens = tok['simple'];
+            delete tok['simple'];
+        }
         else
         {
             tok.type = "simple";
@@ -2179,7 +2230,7 @@ function parse_grammar( grammar )
 /**
 *
 *   CodeMirrorGrammar
-*   @version: 1.0
+*   @version: 1.0.1
 *
 *   Transform a grammar specification in JSON format, into a syntax-highlight parser mode for CodeMirror
 *   https://github.com/foo123/codemirror-grammar
@@ -2536,7 +2587,7 @@ function get_mode( grammar, DEFAULT )
 [/DOC_MARKDOWN]**/
 var CodeMirrorGrammar = exports['CodeMirrorGrammar'] = {
 
-    VERSION: "1.0",
+    VERSION: "1.0.1",
 
     // extend a grammar using another base grammar
     /**[DOC_MARKDOWN]
@@ -2546,9 +2597,9 @@ var CodeMirrorGrammar = exports['CodeMirrorGrammar'] = {
     * extendedgrammar = CodeMirrorGrammar.extend( grammar, basegrammar1 [, basegrammar2, ..] );
     * ```
     *
-    * Extend a grammar with basegrammar1, basegrammar2, etc..
+    * Extend a `grammar` with `basegrammar1`, `basegrammar2`, etc..
     *
-    * This way arbitrary dialects and variations can be handled more easily
+    * This way arbitrary `dialects` and `variations` can be handled more easily
     [/DOC_MARKDOWN]**/
     extend: extend,
 
@@ -2560,9 +2611,9 @@ var CodeMirrorGrammar = exports['CodeMirrorGrammar'] = {
     * parsedgrammar = CodeMirrorGrammar.parse( grammar );
     * ```
     *
-    * This is used internally by the CodeMirrorGrammar Class
-    * In order to parse a JSON grammar to a form suitable to be used by the syntax-highlight parser.
-    * However user can use this method to cache a parsedgrammar to be used later.
+    * This is used internally by the `CodeMirrorGrammar` Class
+    * In order to parse a `JSON grammar` to a form suitable to be used by the syntax-highlight parser.
+    * However user can use this method to cache a `parsedgrammar` to be used later.
     * Already parsed grammars are NOT re-parsed when passed through the parse method again
     [/DOC_MARKDOWN]**/
     parse: parse_grammar,
@@ -2575,8 +2626,8 @@ var CodeMirrorGrammar = exports['CodeMirrorGrammar'] = {
     * mode = CodeMirrorGrammar.getMode( grammar [, DEFAULT] );
     * ```
     *
-    * This is the main method which transforms a JSON grammar into a CodeMirror syntax-highlight parser.
-    * DEFAULT is the default return value (null by default) for things that are skipped or not styled
+    * This is the main method which transforms a `JSON grammar` into a `CodeMirror` syntax-highlight parser.
+    * `DEFAULT` is the default return value (`null` by default) for things that are skipped or not styled
     * In general there is no need to set this value, unless you need to return something else
     [/DOC_MARKDOWN]**/
     getMode: get_mode

diff --git a/build/codemirror_grammar.min.js b/build/codemirror_grammar.min.js
diff --git a/grammar-reference.md b/grammar-reference.md
@@ -171,7 +171,7 @@ example:
 
 **Syntax shorthand BNF-like notations (new)**
 
-`Syntax` part supports *shorthand definitions* (similar to `BNF-style` definitions) for syntax sequences and groups of syntax sequences:
+`Syntax` part supports *shorthand definitions* (similar to `PEG / BNF-style` definitions) for syntax sequences and groups of syntax sequences:
 
 Specificaly:
 
@@ -252,7 +252,7 @@ Specificaly:
     "tokens": ["t1* t2", "t3"]
 }
 
-// a literal tokens wrapped in quotes (' or ")
+// literal tokens wrapped in quotes (' or ")
 // are equivalent to their literal value
 // empty literal token (i.e '') matches empty production
 // NOTE: unlike NON-Space token definition described previously

diff --git a/src/main.js b/src/main.js
@@ -368,9 +368,9 @@ var CodeMirrorGrammar = exports['@@MODULE_NAME@@'] = {
     * extendedgrammar = CodeMirrorGrammar.extend( grammar, basegrammar1 [, basegrammar2, ..] );
     * ```
     *
-    * Extend a grammar with basegrammar1, basegrammar2, etc..
+    * Extend a `grammar` with `basegrammar1`, `basegrammar2`, etc..
     *
-    * This way arbitrary dialects and variations can be handled more easily
+    * This way arbitrary `dialects` and `variations` can be handled more easily
     [/DOC_MARKDOWN]**/
     extend: extend,
 
@@ -382,9 +382,9 @@ var CodeMirrorGrammar = exports['@@MODULE_NAME@@'] = {
     * parsedgrammar = CodeMirrorGrammar.parse( grammar );
     * ```
     *
-    * This is used internally by the CodeMirrorGrammar Class
-    * In order to parse a JSON grammar to a form suitable to be used by the syntax-highlight parser.
-    * However user can use this method to cache a parsedgrammar to be used later.
+    * This is used internally by the `CodeMirrorGrammar` Class
+    * In order to parse a `JSON grammar` to a form suitable to be used by the syntax-highlight parser.
+    * However user can use this method to cache a `parsedgrammar` to be used later.
     * Already parsed grammars are NOT re-parsed when passed through the parse method again
     [/DOC_MARKDOWN]**/
     parse: parse_grammar,
@@ -397,8 +397,8 @@ var CodeMirrorGrammar = exports['@@MODULE_NAME@@'] = {
     * mode = CodeMirrorGrammar.getMode( grammar [, DEFAULT] );
     * ```
     *
-    * This is the main method which transforms a JSON grammar into a CodeMirror syntax-highlight parser.
-    * DEFAULT is the default return value (null by default) for things that are skipped or not styled
+    * This is the main method which transforms a `JSON grammar` into a `CodeMirror` syntax-highlight parser.
+    * `DEFAULT` is the default return value (`null` by default) for things that are skipped or not styled
     * In general there is no need to set this value, unless you need to return something else
     [/DOC_MARKDOWN]**/
     getMode: get_mode