From ab80cd73131d753a773c6292c7fb1d17efda6a48 Mon Sep 17 00:00:00 2001 From: Josh Date: Sun, 21 Feb 2016 23:03:58 -0800 Subject: [PATCH 1/4] Implemented chars, strings and unions --- src/main/scala/dsls/regex/Program.scala | 40 ++++++++++++++++--------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/src/main/scala/dsls/regex/Program.scala b/src/main/scala/dsls/regex/Program.scala index 38a0302..96ac592 100644 --- a/src/main/scala/dsls/regex/Program.scala +++ b/src/main/scala/dsls/regex/Program.scala @@ -1,4 +1,5 @@ package dsls.regex +import RegularExpression._ object Program extends App { @@ -9,16 +10,27 @@ object Program extends App { * val zero = '0' * etc. ***************************************************************************/ - val zero = Literal('0') - val one = Literal('1') - val two = Literal('2') - val three = Literal('3') - val four = Literal('4') - val five = Literal('5') - val six = Literal('6') - val seven = Literal('7') - val eight = Literal('8') - val nine = Literal('9') +// val zero = Literal('0') +// val one = Literal('1') +// val two = Literal('2') +// val three = Literal('3') +// val four = Literal('4') +// val five = Literal('5') +// val six = Literal('6') +// val seven = Literal('7') +// val eight = Literal('8') +// val nine = Literal('9') + + val zero = '0' + val one = '1' + val two = '2' + val three = '3' + val four = '4' + val five = '5' + val six = '6' + val seven = '7' + val eight = '8' + val nine = '9' require(zero matches "0") require(one matches "1") @@ -37,7 +49,7 @@ object Program extends App { * Make it possible to replace the definition of answer with: * val answer = "42" ***************************************************************************/ - val answer = Concat(four, two) + val answer = "42" require(answer matches "42") @@ -47,9 +59,9 @@ object Program extends App { * Make it possible to replace the definition of digit with: * val digit = '0' || '1' || '2' || '3' || '4' || '5' || '6' || '7' || '8' || '9' ***************************************************************************/ - val digit = Union(zero, Union(one, Union(two, Union(three, Union(four, - Union(five, Union(six, Union(seven, Union(eight, nine))))))))) - +// val digit = Union(zero, Union(one, Union(two, Union(three, Union(four, +// Union(five, Union(six, Union(seven, Union(eight, nine))))))))) + val digit = '0' || '1' || '2' || '3' || '4' || '5' || '6' || '7' || '8' || '9' require(digit matches "0") require(digit matches "1") require(digit matches "2") From 472c17fa4d919a5bbcfae99c1b089e1d51469f32 Mon Sep 17 00:00:00 2001 From: Josh Date: Sun, 21 Feb 2016 23:48:51 -0800 Subject: [PATCH 2/4] Implemented everything else and commented everything. Began reflection. --- .../scala/dsls/regex/RegularExpression.scala | 56 ++++++++++++++++++- 1 file changed, 55 insertions(+), 1 deletion(-) diff --git a/src/main/scala/dsls/regex/RegularExpression.scala b/src/main/scala/dsls/regex/RegularExpression.scala index 199fe1e..fc2e758 100644 --- a/src/main/scala/dsls/regex/RegularExpression.scala +++ b/src/main/scala/dsls/regex/RegularExpression.scala @@ -1,4 +1,10 @@ package dsls.regex +import scala.language.implicitConversions + +// Not necessary, but still a nice thing to do, I think. +// If I recall correctly this just tells scala that we're going to +// be using postfix operators. +import scala.language.postfixOps /** * Modify this file to implement an internal DSL for regular expressions. @@ -10,9 +16,57 @@ package dsls.regex /** The top of a class hierarchy that encodes regular expressions. */ abstract class RegularExpression { /** returns true if the given string matches this regular expression */ - def matches(string: String) = RegexMatcher.matches(string, this) + def matches(string: String) = RegexMatcher.matches(string, this) + + // Scala automatically allows us to use these in infix notation. + def ||(other: RegularExpression) = Union(this, other) + def ~(other: RegularExpression) = Concat(this, other) + + // Scala also automaticaly allows us to use this in postfix notation. + // So calling it is simply a matter of putting it after a regex. + def <*>() = Star(this) + + // Traditionally + isn't defined as a basic operation, it's defined as + // the concatenation of one copy with a starred copy, so this operation + // is also very simple. + def <+>() = Concat(this, Star(this)) + + // The syntax R{n} for a regular expression R and an integer n is exactly + // the syntax that allows us to use apply, because syntactically + // it looks like we're applying R to n (since {} and () + // can both be used for function application). + // This could be done recursively or with a mapreduce. + def apply(x:Int):RegularExpression = + if (x==0) + EPSILON + else + // We can get n copies by taking one copy and concatenating with n-1 more + Concat(this, this.apply(x-1)); +} + +// We only need this for the implicit conversions (because as implicit +// conversions, they are called without an instance.) +object RegularExpression { + // Simple. + implicit def fromChar(c: Char) = Literal(c) + + // Again, this could be done with a mapreduce as well. + implicit def fromString(s: String):RegularExpression = + if (s == "") + EPSILON + else + // Pull off one char, and recurse on the rest. + Concat(Literal(s.head), fromString(s.tail)) + } + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + + /** a regular expression that matches nothing */ object EMPTY extends RegularExpression From 9d9d9f8fdbb00aa4b464aee652932fcb6d7cd4e8 Mon Sep 17 00:00:00 2001 From: Josh Date: Mon, 22 Feb 2016 00:17:39 -0800 Subject: [PATCH 3/4] Wrote reflection --- reflection.md | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/reflection.md b/reflection.md index 0719389..c696e74 100644 --- a/reflection.md +++ b/reflection.md @@ -2,8 +2,12 @@ ## Which operators were easiest to implement and why? +Everything except for strings and repetition was pretty much trivial to implement because they could be done with just one short line of code, after figuring out the tricks. In particular, I figured out *, +, || and ~ pretty quickly. + ## Which operators were most difficult to implement and why? +Implicit conversion from chars was a bit tricky to figure out, because I had to realize that it went in the companion object. It was also tricky to realize that I had to import it directly (which I figured out with Amit's help). Strings were also slightly tricky just because it was recursive but really it wasn't hard. The hardest was probably repetition but even that was just a little bit of thinking until I realized that I could use apply. + ## Comment on the design of this internal DSL Write a few brief paragraphs that discuss: @@ -15,4 +19,14 @@ Write a few brief paragraphs that discuss: you implement it _or_ what features of Scala would prevent you from implementing it? (You don't have to write code for this part. You could say "I would use literal extension to..." or "Scala's rules for valid - identifiers prevent...") \ No newline at end of file + identifiers prevent...") + +It's a fine design for regexes. It makes describing a regex easy. Because of that anything that you want to describe as a regex is easy to say, so things that are patterned data are easy to describe. It would be useful if you need to check a list of strings to see which are phone numbers, which are license plate numbers, which consist solely of numbers and lowercase letters, etc. + +It's not useful for things that can't be described as regular expressions or are difficult to describe. It's bad if you want to check if a string matches a specific set of strings (or at least, not especially useful). It's bad if you want to describe a language that isn't regular (like the set of all palindromic strings or of prime-length strings). I don't think these are problems with the design. The design of regular expressions is strictly supposed to capture regular languages. If it captured other things, they wouldn't be implementable as efficiently. + +There are other regular expression 'helper functions' that make things easier to express, that might be nice to implement. For instance, _ for a regular expression that matches any string of length one. I believe this is acceptable in scala, but it might break some other things. Underscores are used for singletons (like in anonymous functions) but I don't think it would ever be prolematic, because singleton use wouldn't overlap with where we would use the name. The best way to implement it would be in the same way as EMPTY and EPSILON, because it's a special case like them. This would also require modifying the matcher to include this case in the matching. I don't think _ could be implemented using any combination of the existing methods, at least not in any way I can think of. + +In terms of the existing syntax it's all pretty fine. <+> and <*> could potentially be turned into + and *. I think this would be allowed unless there's some special case with those names. It might have conflicts, however. If + is string concatenation, then if we wrote "x" + "y" the compiler wouldn't look for an implicit conversion so we wouldn't get the regex we want, we would get the string "xy" (which could then function as the same regex with another implicit conversion). I'm not sure if there are any cases where this wouldn't work, but it's an interesting thought experiment. + +||, ~ and {n} are also fine if those are the operators people are familiar with. If you wanted to make a better language than this for regexes I think the only real way to do so other than adding other features would be to just ask people who use regular expressions which operators they're used to, or possibly give them multiple options. From 086bb9508ac7e6fede461d112eb81fbb3f4f61d4 Mon Sep 17 00:00:00 2001 From: Josh Date: Mon, 22 Feb 2016 17:43:37 -0800 Subject: [PATCH 4/4] Forgot to add the program file until now. Modified all of the tests in it and added a couple more. --- src/main/scala/dsls/regex/Program.scala | 49 ++++++++++++++++--------- 1 file changed, 32 insertions(+), 17 deletions(-) diff --git a/src/main/scala/dsls/regex/Program.scala b/src/main/scala/dsls/regex/Program.scala index 96ac592..754dc84 100644 --- a/src/main/scala/dsls/regex/Program.scala +++ b/src/main/scala/dsls/regex/Program.scala @@ -79,7 +79,8 @@ object Program extends App { * Make it possible to replace the definition of digit with: * val pi = '3' ~ '1' ~ '4' ***************************************************************************/ - val pi = Concat(Literal('3'), Concat(Literal('1'), Literal('4'))) + val pi = '3' ~ '1' ~ '4' + //val pi = Concat(Literal('3'), Concat(Literal('1'), Literal('4'))) require(pi matches "314") @@ -89,7 +90,8 @@ object Program extends App { * Make it possible to replace the definition of zeroOrMoreDigits with: * val zeroOrMoreDigits = digit <*> ***************************************************************************/ - val zeroOrMoreDigits = Star(digit) + val zeroOrMoreDigits = digit <*> + //val zeroOrMoreDigits = Star(digit) require(zeroOrMoreDigits matches "") require(zeroOrMoreDigits matches "0") @@ -103,7 +105,8 @@ object Program extends App { * Make it possible to replace the definition of number with: * val number = digit <+> ***************************************************************************/ - val number = Concat(digit, zeroOrMoreDigits) + val number = digit <+> + //val number = Concat(digit, zeroOrMoreDigits) require(!(number matches "")) require(number matches "0") @@ -117,7 +120,8 @@ object Program extends App { * Make it possible to replace the definition of cThree with: * val cThree = 'c'{3} ***************************************************************************/ - val cThree = Concat(Literal('c'), Concat(Literal('c'), Literal('c'))) + val cThree = 'c'{3} + //val cThree = Concat(Literal('c'), Concat(Literal('c'), Literal('c'))) require(cThree matches "ccc") @@ -127,9 +131,11 @@ object Program extends App { * the following several definitions with: * val pattern = "42" || ( ('a' <*>) ~ ('b' <+>) ~ ('c'{3})) ***************************************************************************/ - val aStar = Star(Literal('a')) - val bPlus = Concat(Literal('b'), Star(Literal('b'))) - val pattern = Union(answer, Concat(aStar, Concat(bPlus, cThree))) +// val aStar = Star(Literal('a')) +// val bPlus = Concat(Literal('b'), Star(Literal('b'))) +// val pattern = Union(answer, Concat(aStar, Concat(bPlus, cThree))) + + val pattern = "42" || ( ('a' <*>) ~ ('b' <+>) ~ ('c'{3})) require(pattern matches "42") require(pattern matches "bccc") @@ -145,13 +151,16 @@ object Program extends App { * the following several definitions with: * val helloworld = ("hello" <*>) ~ "world" ***************************************************************************/ - val hello = Concat(Literal('h'), Concat(Literal('e'), Concat(Literal('l'), - Concat(Literal('l'), Literal('o'))))) +// val hello = Concat(Literal('h'), Concat(Literal('e'), Concat(Literal('l'), +// Concat(Literal('l'), Literal('o'))))) +// +// val world = Concat(Literal('w'), Concat(Literal('o'), Concat(Literal('r'), +// Concat(Literal('l'), Literal('d'))))) +// +// val helloworld = Concat(Star(hello), world) +// - val world = Concat(Literal('w'), Concat(Literal('o'), Concat(Literal('r'), - Concat(Literal('l'), Literal('d'))))) - - val helloworld = Concat(Star(hello), world) + val helloworld = ("hello" <*>) ~ "world" require(helloworld matches "helloworld") require(helloworld matches "world") @@ -164,10 +173,16 @@ object Program extends App { * the following several definitions with: * val telNumber = '(' ~ digit{3} ~ ')' ~ digit{3} ~ '-' ~ digit{4} ***************************************************************************/ - val threeDigits = Concat(digit, Concat(digit, digit)) - val fourDigits = Concat(threeDigits, digit) - val areaCode = Concat(Literal('('), Concat(threeDigits, Literal(')'))) - val telNumber = Concat(areaCode, Concat(threeDigits, Concat(Literal('-'), fourDigits))) +// val threeDigits = Concat(digit, Concat(digit, digit)) +// val fourDigits = Concat(threeDigits, digit) +// val areaCode = Concat(Literal('('), Concat(threeDigits, Literal(')'))) +// val telNumber = Concat(areaCode, Concat(threeDigits, Concat(Literal('-'), fourDigits))) + + val telNumber = '(' ~ digit{3} ~ ')' ~ digit{3} ~ '-' ~ digit{4} require(telNumber matches "(202)456-1111") + require(telNumber matches "(999)999-9999") + require(!(telNumber matches "202-456-1111")) + require(!(telNumber matches "(202)-456-111")) + require(!(telNumber matches "(202)-4a6-1111")) }