From 4dbdf831f7cba77d4be89891076cfce54eafc206 Mon Sep 17 00:00:00 2001 From: Carl Davidson Date: Tue, 5 Jan 2016 22:55:11 -0600 Subject: [PATCH] updated _parse_rx() tests, among others --- peg.test.regex.scad | 175 +----------------------------------------- strings.scad | 12 ++- strings.test.scad | 180 ++++++++++++++++++++++++++++++++++++-------- 3 files changed, 161 insertions(+), 206 deletions(-) diff --git a/peg.test.regex.scad b/peg.test.regex.scad index f764f7b..b49f414 100644 --- a/peg.test.regex.scad +++ b/peg.test.regex.scad @@ -1,178 +1,5 @@ -include +include -_rx_peg = -_index_peg_refs -( - ["grammar", - ["private_rule", "operation", - ["sequence", - ["choice", - ["ref", "choice"], - ["ref", "sequence"], - ["ref", "postfix"], - ], - // ["negative_lookahead", ["wildcard"]], - ] - ], - ["private_rule", "postfix", - ["choice", - ["ref", "many_to_many"], - ["ref", "one_to_many"], - ["ref", "zero_to_many"], - ["ref", "zero_to_one"], - ["ref", "primitive"] - ], - ], - - //BINARY OPERATIONS - ["rule", "choice", - ["sequence", - ["choice", - ["ref", "sequence"], - ["ref", "postfix"] - ], - ["one_to_many", - ["sequence", - ["private", ["literal", "|"]], - ["choice", - ["ref", "sequence"], - ["ref", "postfix"] - ], - ] - ] - ] - ], - ["rule", "sequence", - ["sequence", - ["ref", "postfix"], - ["one_to_many", - ["ref", "postfix"], - ] - ] - ], - - ["rule", "positive_lookahead", - ["sequence", - ["private", ["literal", "(?="]], - ["ref", "operation"], - ["private", ["literal", ")"]], - ] - ], - ["rule", "negative_lookahead", - ["sequence", - ["private", ["literal", "(?!"]], - ["ref", "operation"], - ["private", ["literal", ")"]], - ] - ], - - //UNARY POSTFIX OPERATIONS - ["rule", "one_to_many", - ["sequence", - ["ref", "primitive"], - ["private", ["literal", "+"]] - ] - ], - ["rule", "zero_to_many", - ["sequence", - ["ref", "primitive"], - ["private", ["literal", "*"]] - ] - ], - ["rule", "zero_to_one", - ["sequence", - ["ref", "primitive"], - ["private", ["literal", "?"]] - ] - ], - ["rule", "many_to_many", - ["sequence", - ["ref", "primitive"], - ["private", ["literal", "{"]], - ["character_set_shorthand", "d"], - ["zero_to_one", - ["sequence", - ["private", ["literal", ","]], - ["character_set_shorthand", "d"], - ], - ], - ["private", ["literal", "}"]], - ] - ], - - //PRIMITIVES - ["private_rule", "primitive", - ["choice", - ["ref", "wildcard"], - ["ref", "character_set_shorthand"], - ["ref", "negative_character_set"], - ["ref", "positive_character_set"], - ["ref", "negative_lookahead"], - ["ref", "positive_lookahead"], - ["sequence", - ["private", ["literal", "("]], - ["ref", "operation"], - ["private", ["literal", ")"]], - ], - ["ref", "literal"], - ], - ], - - ["rule", "wildcard", - ["private", ["literal", "."]], - ], - ["rule", "literal", - ["negative_character_set", - "{","}","[","]","(",")", - "|","*","+","?",".","\\","." - ], - ], - ["rule", "positive_character_set", - ["sequence", - ["private", ["literal", "["]], - ["one_to_many", - ["choice", - ["ref", "character_range"], - ["ref", "character_set_shorthand"], - ["ref", "character_literal"] - ] - ], - ["private", ["literal", "]"]], - ] - ], - ["rule", "negative_character_set", - ["sequence", - ["private", ["literal", "[^"]], - ["one_to_many", - ["choice", - ["ref", "character_range"], - ["ref", "character_set_shorthand"], - ["ref", "character_literal"] - ] - ], - ["private", ["literal", "]"]], - ] - ], - ["rule", "character_literal", - ["negative_character_set", "]"] - ], - ["rule", "character_range", - ["sequence", - ["character_set_shorthand", "w"], - ["private", ["literal", "-"]], - ["character_set_shorthand", "w"], - ], - ], - ["rule", "character_set_shorthand", - ["sequence", - ["private", ["literal", "\\"]], - ["positive_character_set", - "s","S","d","D","w","W", "\\", "]", "(", ")" - ] - ] - ], - ] -); echo(_unit_test("regex primitives", [ _match_parsed_peg( "", _rx_peg, peg_op=_get_rule(_rx_peg, "wildcard") )[_PARSED], undef, diff --git a/strings.scad b/strings.scad index 7ab4ef0..b8e9b02 100644 --- a/strings.scad +++ b/strings.scad @@ -395,6 +395,8 @@ _index_peg_refs //PRIMITIVES ["private_rule", "primitive", ["choice", + ["ref", "start"], + ["ref", "end"], ["ref", "wildcard"], ["ref", "character_set_shorthand"], ["ref", "negative_character_set"], @@ -410,13 +412,20 @@ _index_peg_refs ], ], + ["rule", "start", + ["private", ["literal", "^"]], + ], + ["rule", "end", + ["private", ["literal", "$"]], + ], ["rule", "wildcard", ["private", ["literal", "."]], ], ["rule", "literal", ["negative_character_set", "{","}","[","]","(",")", - "|","*","+","?",".","\\","." + "|","*","+","?",".","\\",".", + "^","$", ], ], ["rule", "positive_character_set", @@ -895,4 +904,3 @@ function _unit_test(name, tests) = : str(name, ":\tpassed") ; - diff --git a/strings.test.scad b/strings.test.scad index 7786286..5041c96 100644 --- a/strings.test.scad +++ b/strings.test.scad @@ -138,45 +138,165 @@ echo([ -*echo([ +echo([ "_parse_rx:", "atomic operations", - _parse_rx("a?"), - _parse_rx("a*"), - _parse_rx("a+"), - _parse_rx("foo"), - _parse_rx("a|b"), + _parse_rx("a?") == ["zero_to_one", ["literal", "a"]], + _parse_rx("a*") == ["zero_to_many", ["literal", "a"]], + _parse_rx("a+") == ["one_to_many", ["literal", "a"]], + _parse_rx("foo") + == ["sequence", ["literal", "f"], + ["literal", "o"], + ["literal", "o"] + ], + _parse_rx("a|b") + == ["choice", ["literal", "a"], + ["literal", "b"] + ], "variable repetition", - _parse_rx(".{3}"), - _parse_rx(".{3,5}"), + _parse_rx(".{3}") == ["many_to_many", ["wildcard"], "3"], + _parse_rx(".{3,5}") == ["many_to_many", ["wildcard"], "35"], "charsets", - _parse_rx(".[abcdef]"), - _parse_rx("[a-z]"), - _parse_rx(".[^abcdef]"), - _parse_rx("^[a-z]"), + _parse_rx(".[abcdef]") + == ["sequence", ["wildcard"], + ["positive_character_set", + ["character_literal", "a"], + ["character_literal", "b"], + ["character_literal", "c"], + ["character_literal", "d"], + ["character_literal", "e"], + ["character_literal", "f"] + ] + ], + _parse_rx("[a-z]") == ["positive_character_set", ["character_range", "az"]], + _parse_rx(".[^abcdef]") + == ["sequence", ["wildcard"], + ["negative_character_set", + ["character_literal", "a"], + ["character_literal", "b"], + ["character_literal", "c"], + ["character_literal", "d"], + ["character_literal", "e"], + ["character_literal", "f"] + ] + ], + _parse_rx("^[a-z]") + == ["sequence", ["start"], + ["positive_character_set", ["character_range", "az"]] + ], "escape characters", - _parse_rx("\\d"), - _parse_rx("\\d\\d"), - _parse_rx("\\d?"), - _parse_rx("\\s\\d?"), - _parse_rx("\\d?|b*\\d+"), - _parse_rx("a|\\(bc\\)"), + _parse_rx("\\d") == ["character_set_shorthand", "d"], + _parse_rx("\\d\\d") + == ["sequence", + ["character_set_shorthand", "d"], + ["character_set_shorthand", "d"] + ], + _parse_rx("\\d?") == ["zero_to_one", ["character_set_shorthand", "d"]], + _parse_rx("\\s\\d?") + == ["sequence", + ["character_set_shorthand", "s"], + ["zero_to_one", ["character_set_shorthand", "d"]] + ], + _parse_rx("\\d?|b*\\d+") + == ["choice", + ["zero_to_one", ["character_set_shorthand", "d"]], + ["sequence", + ["zero_to_many", ["literal", "b"]], + ["one_to_many", ["character_set_shorthand", "d"]] + ] + ], + _parse_rx("a|\\(bc\\)") + == ["choice", + ["literal", "a"], + ["sequence", ["character_set_shorthand", "("], + ["literal", "b"], + ["literal", "c"], + ["character_set_shorthand", ")"] + ] + ], "order of operations", - _parse_rx("ab?"), - _parse_rx("(ab)?"), - _parse_rx("a|b?"), - _parse_rx("(a|b)?"), - _parse_rx("a|bc"), - _parse_rx("ab|c"), - _parse_rx("(a|b)c"), - _parse_rx("a|(bc)"), - _parse_rx("a?|b*c+"), - _parse_rx("a?|b*c+d|d*e+"), + _parse_rx("ab?") + == ["sequence", + ["literal", "a"], + ["zero_to_one", ["literal", "b"]] + ], + _parse_rx("(ab)?") + == ["zero_to_one", + ["sequence", + ["literal", "a"], + ["literal", "b"] + ] + ], + _parse_rx("a|b?") + == ["choice", + ["literal", "a"], + ["zero_to_one", ["literal", "b"]] + ], + _parse_rx("(a|b)?") + == ["zero_to_one", + ["choice", + ["literal", "a"], + ["literal", "b"] + ] + ], + _parse_rx("a|bc") + == ["choice", + ["literal", "a"], + ["sequence", + ["literal", "b"], + ["literal", "c"] + ] + ], + _parse_rx("ab|c") + == ["choice", + ["sequence", + ["literal", "a"], + ["literal", "b"] + ], + ["literal", "c"] + ], + _parse_rx("(a|b)c") + == ["sequence", + ["choice", + ["literal", "a"], + ["literal", "b"] + ], + ["literal", "c"] + ], + _parse_rx("a|(bc)") + == ["choice", + ["literal", "a"], + ["sequence", + ["literal", "b"], + ["literal", "c"] + ] + ], + _parse_rx("a?|b*c+") + == ["choice", + ["zero_to_one", ["literal", "a"]], + ["sequence", + ["zero_to_many", ["literal", "b"]], + ["one_to_many", ["literal", "c"]] + ] + ], + _parse_rx("a?|b*c+d|d*e+") + == ["choice", + ["zero_to_one", ["literal", "a"]], + ["sequence", + ["zero_to_many", ["literal", "b"]], + ["one_to_many", ["literal", "c"]], + ["literal", "d"] + ], + ["sequence", + ["zero_to_many", ["literal", "d"]], + ["one_to_many", ["literal", "e"]] + ] + ], "edge cases", - _parse_rx("a"), + _parse_rx("a") == ["literal", "a"], _parse_rx("") , - _parse_rx(undef), + _parse_rx(undef) == undef, "invalid syntax", // _parse_rx("((()))"), // _parse_rx( "(()))"),