diff --git a/src/main/scala/dsls/regex/Program.scala b/src/main/scala/dsls/regex/Program.scala index 38a0302..ae119c2 100644 --- a/src/main/scala/dsls/regex/Program.scala +++ b/src/main/scala/dsls/regex/Program.scala @@ -1,24 +1,28 @@ package dsls.regex +import scala.language.postfixOps + object Program extends App { + import RegularExpression._ + /**************************************************************************** - * TODO: Extend characters to support regular expressions + * Extend characters to support regular expressions * * Make it possible to replace the definition of the numbers with: * val zero = '0' * etc. ***************************************************************************/ - val zero = Literal('0') - val one = Literal('1') - val two = Literal('2') - val three = Literal('3') - val four = Literal('4') - val five = Literal('5') - val six = Literal('6') - val seven = Literal('7') - val eight = Literal('8') - val nine = Literal('9') + val zero = '0' + val one = '1' + val two = '2' + val three = '3' + val four = '4' + val five = '5' + val six = '6' + val seven = '7' + val eight = '8' + val nine = '9' require(zero matches "0") require(one matches "1") @@ -32,23 +36,22 @@ object Program extends App { require(nine matches "9") /**************************************************************************** - * TODO: Extend strings to support regular expressions + * Extend strings to support regular expressions * * Make it possible to replace the definition of answer with: * val answer = "42" ***************************************************************************/ - val answer = Concat(four, two) + val answer = "42" require(answer matches "42") /**************************************************************************** - * TODO: Add the union operator for regular expressions + * Add the union operator for regular expressions * * Make it possible to replace the definition of digit with: * val digit = '0' || '1' || '2' || '3' || '4' || '5' || '6' || '7' || '8' || '9' ***************************************************************************/ - val digit = Union(zero, Union(one, Union(two, Union(three, Union(four, - Union(five, Union(six, Union(seven, Union(eight, nine))))))))) + val digit = '0' || '1' || '2' || '3' || '4' || '5' || '6' || '7' || '8' || '9' require(digit matches "0") require(digit matches "1") @@ -62,22 +65,22 @@ object Program extends App { require(digit matches "9") /**************************************************************************** - * TODO: Add the concatenation operator for regular expressions + * Add the concatenation operator for regular expressions * * Make it possible to replace the definition of digit with: * val pi = '3' ~ '1' ~ '4' ***************************************************************************/ - val pi = Concat(Literal('3'), Concat(Literal('1'), Literal('4'))) + val pi = '3' ~ '1' ~ '4' require(pi matches "314") /**************************************************************************** - * TODO: Add the star operator for regular expressions + * Add the star operator for regular expressions * * Make it possible to replace the definition of zeroOrMoreDigits with: * val zeroOrMoreDigits = digit <*> ***************************************************************************/ - val zeroOrMoreDigits = Star(digit) + val zeroOrMoreDigits = digit <*> require(zeroOrMoreDigits matches "") require(zeroOrMoreDigits matches "0") @@ -89,9 +92,9 @@ object Program extends App { * TODO: Add the plus operator for regular expressions * * Make it possible to replace the definition of number with: - * val number = digit <+> + * val number = digit <+> ***************************************************************************/ - val number = Concat(digit, zeroOrMoreDigits) + val number = digit <+> require(!(number matches "")) require(number matches "0") @@ -99,13 +102,13 @@ object Program extends App { require(number matches "09") require(number matches "987651234") - /**************************************************************************** - * TODO: Add the repetition operator for regular expressions + /**************************************************************************** + * Add the repetition operator for regular expressions * * Make it possible to replace the definition of cThree with: * val cThree = 'c'{3} ***************************************************************************/ - val cThree = Concat(Literal('c'), Concat(Literal('c'), Literal('c'))) + val cThree = 'c'{3} require(cThree matches "ccc") @@ -115,9 +118,7 @@ object Program extends App { * the following several definitions with: * val pattern = "42" || ( ('a' <*>) ~ ('b' <+>) ~ ('c'{3})) ***************************************************************************/ - val aStar = Star(Literal('a')) - val bPlus = Concat(Literal('b'), Star(Literal('b'))) - val pattern = Union(answer, Concat(aStar, Concat(bPlus, cThree))) + val pattern = "42" || ( ('a' <*>) ~ ('b' <+>) ~ ('c'{3})) require(pattern matches "42") require(pattern matches "bccc") @@ -133,13 +134,7 @@ object Program extends App { * the following several definitions with: * val helloworld = ("hello" <*>) ~ "world" ***************************************************************************/ - val hello = Concat(Literal('h'), Concat(Literal('e'), Concat(Literal('l'), - Concat(Literal('l'), Literal('o'))))) - - val world = Concat(Literal('w'), Concat(Literal('o'), Concat(Literal('r'), - Concat(Literal('l'), Literal('d'))))) - - val helloworld = Concat(Star(hello), world) + val helloworld = ("hello" <*>) ~ "world" require(helloworld matches "helloworld") require(helloworld matches "world") @@ -152,10 +147,7 @@ object Program extends App { * the following several definitions with: * val telNumber = '(' ~ digit{3} ~ ')' ~ digit{3} ~ '-' ~ digit{4} ***************************************************************************/ - val threeDigits = Concat(digit, Concat(digit, digit)) - val fourDigits = Concat(threeDigits, digit) - val areaCode = Concat(Literal('('), Concat(threeDigits, Literal(')'))) - val telNumber = Concat(areaCode, Concat(threeDigits, Concat(Literal('-'), fourDigits))) + val telNumber = '(' ~ digit{3} ~ ')' ~ digit{3} ~ '-' ~ digit{4} require(telNumber matches "(202)456-1111") } diff --git a/src/main/scala/dsls/regex/RegexMatcher.scala b/src/main/scala/dsls/regex/RegexMatcher.scala index c644967..d0fee15 100644 --- a/src/main/scala/dsls/regex/RegexMatcher.scala +++ b/src/main/scala/dsls/regex/RegexMatcher.scala @@ -1,12 +1,27 @@ package dsls.regex +import scala.language.implicitConversions + object RegexMatcher { + import RegularExpression._ // might as well take advantage of the DSL :) + + // add a bit more DSL-ness (this definitely crosses the border of ridiculous) + // but it lets us say: + // ε ∈ language + // and + // ε ∉ language + object EpsilonChecker { + def ∈(language: RegularExpression) = matchesEpsilon(language) + def ∉(language: RegularExpression) = !(matchesEpsilon(language)) + } + implicit def epsilonToChecker(e: ε.type) = EpsilonChecker + /** * returns true if the given string matches the given pattern */ def matches(string: String, pattern: RegularExpression): Boolean = if (string.isEmpty) - matchesEpsilon(pattern) + ε ∈ pattern else matches(string.tail, ∂(string.head, pattern)) @@ -23,32 +38,32 @@ object RegexMatcher { * * ∂c( {d} ) = {ε} if c = d; ∅ otherwise * - * ∂c( L1 ∪ L2 ) = ∂c( L1 ) ∪ ∂c( L2 ) + * ∂c( l1 ∪ l2 ) = ∂c( l1 ) ∪ ∂c( l2 ) * - * ∂c( L1 ⋅ L2 ) = ∂c( L1 ) ⋅ L2 if ε ∉ L1 - * (∂c( L1 ) ⋅ L2) ∪ ∂c( L2 ) otherwise + * ∂c( l1 ⋅ l2 ) = ∂c( l1 ) ⋅ l2 if ε ∉ l1 + * (∂c( l1 ) ⋅ l2) ∪ ∂c( l2 ) otherwise * - * ∂c( L* ) = ∂c( L ) ⋅ L* + * ∂c( l* ) = ∂c( l ) ⋅ l* */ def ∂(c: Char, pattern: RegularExpression): RegularExpression = pattern match { - case EMPTY | EPSILON ⇒ EMPTY - case Literal(d) ⇒ if (c == d) EPSILON else EMPTY - case Union(l1, l2) ⇒ Union(∂(c, l1), ∂(c, l2)) - case Concat(l1, l2) ⇒ if (!matchesEpsilon(l1)) - Concat(∂(c, l1), l2) + case `∅` | `ε` ⇒ ∅ + case Literal(d) ⇒ if (c == d) ε else ∅ + case l1 ∪ l2 ⇒ ∂(c, l1) ∪ ∂(c, l2) + case l1 ⋅ l2 ⇒ if (ε ∉ l1) + ∂(c, l1) ⋅ l2 else - Union(Concat(∂(c, l1), l2), ∂(c, l2)) - case Star(expr) ⇒ Concat(∂(c, expr), pattern) + (∂(c, l1) ⋅ l2) ∪ ∂(c, l2) + case Star(l) ⇒ ∂(c, l) ⋅ pattern } /** * returns true if the empty string matches the pattern */ def matchesEpsilon(pattern: RegularExpression): Boolean = pattern match { - case EPSILON | Star(_) ⇒ true - case Union(a, b) ⇒ matchesEpsilon(a) || matchesEpsilon(b) - case Concat(a, b) ⇒ matchesEpsilon(a) && matchesEpsilon(b) + case `ε` | Star(_) ⇒ true + case a ∪ b ⇒ (ε ∈ a) || (ε ∈ b) + case a ⋅ b ⇒ (ε ∈ a) && (ε ∈ b) case _ ⇒ false } } \ No newline at end of file diff --git a/src/main/scala/dsls/regex/RegularExpression.scala b/src/main/scala/dsls/regex/RegularExpression.scala index 199fe1e..7fc90d3 100644 --- a/src/main/scala/dsls/regex/RegularExpression.scala +++ b/src/main/scala/dsls/regex/RegularExpression.scala @@ -1,5 +1,8 @@ package dsls.regex +import scala.language.implicitConversions +import scala.language.postfixOps + /** * Modify this file to implement an internal DSL for regular expressions. * @@ -11,6 +14,32 @@ package dsls.regex abstract class RegularExpression { /** returns true if the given string matches this regular expression */ def matches(string: String) = RegexMatcher.matches(string, this) + def matches(char: Char) = RegexMatcher.matches(char.toString, this) + + /** the union operator */ + def ||(other: RegularExpression) = Union(this, other) + def ∪(other: RegularExpression) = this || other + + /** the concatenation operator */ + def ~(other: RegularExpression) = Concat(this, other) + def ⋅(other: RegularExpression) = this ~ other + + /** the star operator (zero or more repetitions) */ + def <*> = Star(this) + def * = this <*> + + /** the plus operator (one or more repetitions) */ + def <+> = this ~ (this <*>) + def + = this <+> + + /** the repetition operator (n repetitions) */ + def apply(n: Int): RegularExpression = { + require(n >= 0) + if (n == 0) + EPSILON + else + this ~ this{n - 1} + } } /** a regular expression that matches nothing */ @@ -34,3 +63,54 @@ case class Concat(val left: RegularExpression, val right: RegularExpression) * expression */ case class Star(val expression: RegularExpression) extends RegularExpression + +/** + * A companion object that extends chars and strings to regexes + * (and adds other sugar) + */ +object RegularExpression { + // convert characters to regular expressions + implicit def charToRegex(c: Char): RegularExpression = Literal(c) + + // convert strings to regular expressions + implicit def stringToRegex(s: String): RegularExpression = { + val chars: List[RegularExpression] = s.toList map Literal + val initial: RegularExpression = EPSILON + (chars :\ initial)(Concat) + } + + // syntactic sugar + val ε : RegularExpression = EPSILON + val ∅ : RegularExpression = EMPTY + + // this might be too ridiculous, but it lets us say: + // string ∈ pattern or char ∈ pattern + // and + // string ∉ pattern or char ∉ pattern + case class Flipper(s: String) { + def ∈(pattern: RegularExpression) = pattern matches s + def ∉(pattern: RegularExpression) = !(pattern matches s) + } + implicit def stringToMatcher(s: String) = Flipper(s) + implicit def charToMatcher(c: Char) = Flipper(c.toString) + + // this bit lets us pattern-match on unions, using the binary operators + object || { + def unapply(arg: Union): Option[(RegularExpression, RegularExpression)] = + Some(arg.left, arg.right) + } + object ∪ { + def unapply(arg: Union): Option[(RegularExpression, RegularExpression)] = + Some(arg.left, arg.right) + } + + // this bit lets us pattern-match on concatenations, using the binary operators + object ~ { + def unapply(arg: Concat): Option[(RegularExpression, RegularExpression)] = + Some(arg.left, arg.right) + } + object ⋅ { + def unapply(arg: Concat): Option[(RegularExpression, RegularExpression)] = + Some(arg.left, arg.right) + } +} diff --git a/src/test/scala/dsls/regex/RegexCheck.scala b/src/test/scala/dsls/regex/RegexCheck.scala index 995bd32..b118976 100644 --- a/src/test/scala/dsls/regex/RegexCheck.scala +++ b/src/test/scala/dsls/regex/RegexCheck.scala @@ -3,32 +3,34 @@ package dsls.regex import org.scalacheck._ import Prop.forAll +import scala.language.postfixOps + /** * These tests are to make sure that the pattern-matching algorithm works. * * BUT: you can modify these tests to use your internal DSL! */ object RegexCheck extends Properties("Regex") { - property("empty") = forAll { s: String ⇒ !(EMPTY matches s) } + import RegularExpression._ + + property("empty") = forAll { s: String ⇒ s ∉ ∅ } - property("literals") = forAll { c: Char ⇒ Literal(c) matches c.toString } + property("literals") = forAll { c: Char ⇒ c matches c } property("concat") = forAll { (c1: Char, c2: Char) ⇒ - Concat(Literal(c1), Literal(c2)) matches (c1.toString + c2.toString) + (c1 ⋅ c2) matches (c1.toString + c2.toString) } property("union") = forAll { (c1: Char, c2: Char) ⇒ - val pattern = Union(Literal(c1), Literal(c2)) - (pattern matches c1.toString) && (pattern matches c2.toString) + val union = c1 ∪ c2 + (c1 ∈ union) && (c2 ∈ union) } property("star") = forAll { c: Char ⇒ - val pattern = Star(Literal(c)) - (pattern matches "") && (pattern matches c.toString) && + val pattern = (c <*>) + (pattern matches "") && (pattern matches c) && (pattern matches c.toString * 3) } - property("epsilon*") = forAll { c: Char ⇒ - !(Star(EPSILON) matches c.toString) - } + property("epsilon*") = forAll { c: Char ⇒ c ∉ (ε *) } } \ No newline at end of file diff --git a/src/test/scala/dsls/regex/RegexSuite.scala b/src/test/scala/dsls/regex/RegexSuite.scala index 830a65d..ba73cab 100644 --- a/src/test/scala/dsls/regex/RegexSuite.scala +++ b/src/test/scala/dsls/regex/RegexSuite.scala @@ -3,12 +3,16 @@ package dsls.regex import org.scalatest.FunSuite import org.scalatest.Matchers +import scala.language.postfixOps + class RegexSuite extends FunSuite with Matchers { - test("epsilon") { - (EPSILON matches "") should be (true) - } - - test("epsilon*") { - (Star(EPSILON) matches "") should be (true) - } + import RegularExpression._ + + test("epsilon") { + (ε matches "") should be (true) + } + + test("epsilon*") { + ((ε *) matches "") should be (true) + } } \ No newline at end of file