begin working on tokenizing identifiers

Git issue references: Gamelan music currently playing: Gendhing Rangu-Rangu Co-authoered-by: Mouse Reeve <[email protected]>
asmr-hex · Apr 3, 2018 · 97460a6 · 97460a6
1 parent 13c5cc5
commit 97460a6
Show file tree

Hide file tree

Showing 8 changed files with 89 additions and 2 deletions.
diff --git a/forest.go b/forest.go
@@ -28,6 +28,10 @@ type Expr struct {
 	Globals  map[string]AST
 }
 
+// def f():
+//   1 + 1
+//   return 2 + 2
+
 // evaluate an expression
 func (e *Expr) Eval() (string, error) {
 	execData := &ExecutionData{
@@ -65,7 +69,7 @@ type Variable struct {
 func (v *Variable) Eval() (string, error) {
 	return executer.Run(
 		&ExecutionData{
-			ComputationType: VARIABLE,
+			ComputationType: VARIABLE_IDENTIFIER,
 		},
 	)
 }

diff --git a/lexer.go b/lexer.go
@@ -51,6 +51,9 @@ func (c *Compterpreter) GetNextToken() (Token, error) {
 		case c.IsNumber(c.CurrentChar):
 			// get full multidigit number token
 			err = c.TokenizeNumber(c.CurrentChar)
+		case c.IsIdentifierFirstSymbol(c.CurrentChar):
+			// is it a keyword?
+			// is it a function/variable identifier?
 		case c.IsPunctuation(c.CurrentChar):
 			err = c.TokenizePunctuation(c.CurrentChar)
 		default:
@@ -82,6 +85,14 @@ func (c *Compterpreter) IsNumber(r rune) bool {
 	return unicode.IsDigit(r)
 }
 
+func (c *Compterpreter) IsIdentifierFirstSymbol(r rune) bool {
+	return VALID_IDENTIFIER_FIRST_SYMBOL.MatchString(string(r))
+}
+
+func (c *Compterpreter) IsIdentifier(r rune) bool {
+	return VALID_IDENTIFIER_SYMBOL.MatchString(string(r))
+}
+
 func (c *Compterpreter) IsOperator(r rune) bool {
 	for _, symbol := range c.Symbols.Operators {
 		if string(r) == symbol {
@@ -136,6 +147,31 @@ func (c *Compterpreter) TokenizeNumber(r rune) error {
 	return nil
 }
 
+func (c *Compterpreter) TokenizeIdentifier(r rune) error {
+	c.CurrentToken.Type = IDENTIFIER
+	c.CurrentToken.Value = c.CurrentToken.Value + string(r)
+
+	// check to see if we need to include the next character in the
+	// current token
+	if err := c.Advance(); err != nil {
+		return err
+	}
+
+	if c.IsIdentifier(c.CurrentChar) {
+		c.TokenizeIdentifier(c.CurrentChar)
+	}
+
+	// at this point, we have our current token, but we want to
+	// check whether it is a keyword of an identifier
+	for _, keyword := range c.Symbols.Keywords {
+		if c.CurrentToken.Value == keyword {
+			c.CurrentToken.Type = KEYWORD
+		}
+	}
+
+	return nil
+}
+
 func (c *Compterpreter) TokenizeOperator(r rune) error {
 	c.CurrentToken.Type = OPERATOR
 	c.CurrentToken.Value = c.CurrentToken.Value + string(r)

diff --git a/lexer_test.go b/lexer_test.go
@@ -60,6 +60,19 @@ func (s *LexerSuite) TestIsOperator() {
 	}
 }
 
+func (s *LexerSuite) TestIsIdentifierFirstSymbol() {
+	conf := &Config{SrcFileName: "test/test.doc"}
+	compt := NewCompterpreter(conf)
+	for _, operator := range []rune{'a', 'A', 'z', 'Z', '_'} {
+		ok := compt.IsIdentifierFirstSymbol(operator)
+		s.True(ok)
+	}
+	for _, operator := range []rune{'❧', '0', ' '} {
+		ok := compt.IsIdentifierFirstSymbol(operator)
+		s.False(ok)
+	}
+}
+
 func (s *LexerSuite) TestIsPunctuation() {
 	conf := &Config{SrcFileName: "test/test.doc"}
 	compt := NewCompterpreter(conf)
@@ -92,6 +105,25 @@ func (s *LexerSuite) TestTokenizeOperator() {
 	}
 }
 
+func (s *LexerSuite) TestTokenizeIdentifier() {
+	conf := &Config{SrcFileName: "test/test_identifiers.doc"}
+	compt := NewCompterpreter(conf)
+
+	err := compt.LoadSourceCode()
+	s.NoError(err)
+
+	compt.Advance()
+	// advance ptr to first character
+	for _, op := range []string{"myVariable"} {
+		compt.CurrentToken = Token{}
+		compt.TokenizeOperator(compt.CurrentChar)
+		if string(compt.CurrentChar) == "EOF" {
+			break
+		}
+		s.EqualValues(compt.CurrentToken.Value, op)
+	}
+}
+
 func (s *LexerSuite) TestLex() {
 	conf := &Config{SrcFileName: "test/test_tokenize.doc"}
 	compt := NewCompterpreter(conf)

diff --git a/symbols.go b/symbols.go
@@ -1,5 +1,7 @@
 package dockerlang
 
+import "regexp"
+
 const (
 	ADDITION_OPERATOR       = "+"
 	SUBTRACTION_OPERATOR    = "†"
@@ -13,6 +15,9 @@ const (
 
 	R_PAREN_PUNCTION    = "("
 	L_PAREN_PUNCUTATION = ")"
+
+	VARIABLE_IDENTIFIER = "VARIABLE_IDENTIFIER"
+	FUNCTION_IDENTIFIER = "FUNCTION_IDENTIFIER"
 )
 
 var (
@@ -27,6 +32,9 @@ var (
 		EXIT_OPERATOR:           1,
 		NOOP:                    1,
 	}
+
+	VALID_IDENTIFIER_FIRST_SYMBOL = regexp.MustCompile("[a-zA-Z_]")
+	VALID_IDENTIFIER_SYMBOL       = regexp.MustCompile("[a-zA-Z_\\d]")
 )
 
 // all the language-defined tokens in dockerlang

diff --git a/test/identifiers.doc b/test/identifiers.doc
@@ -0,0 +1 @@
+
diff --git a/test/test_identifiers.doc b/test/test_identifiers.doc
@@ -0,0 +1 @@
+myVariable
diff --git a/test/variable.doc b/test/variable.doc
@@ -0,0 +1,4 @@
+(≡ aVariable 0)
+
+if asdf
+ifIamKewl
diff --git a/token.go b/token.go
@@ -2,7 +2,8 @@ package dockerlang
 
 const (
 	OPERATOR    = "OPERATOR"
-	VARIABLE    = "VARIABLE"
+	IDENTIFIER  = "IDENTIFIER"
+	KEYWORD     = "KEYWORD"
 	INT         = "INTEGER"
 	PUNCTUATION = "PUNCTUATION" // parens
 )