Skip to content

Commit

Permalink
begin working on tokenizing identifiers
Browse files Browse the repository at this point in the history
Git issue references:

Gamelan music currently playing: Gendhing Rangu-Rangu

Co-authoered-by: Mouse Reeve <[email protected]>
  • Loading branch information
connorwalsh committed Apr 3, 2018
1 parent 13c5cc5 commit 97460a6
Show file tree
Hide file tree
Showing 8 changed files with 89 additions and 2 deletions.
6 changes: 5 additions & 1 deletion forest.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@ type Expr struct {
Globals map[string]AST
}

// def f():
// 1 + 1
// return 2 + 2

// evaluate an expression
func (e *Expr) Eval() (string, error) {
execData := &ExecutionData{
Expand Down Expand Up @@ -65,7 +69,7 @@ type Variable struct {
func (v *Variable) Eval() (string, error) {
return executer.Run(
&ExecutionData{
ComputationType: VARIABLE,
ComputationType: VARIABLE_IDENTIFIER,
},
)
}
Expand Down
36 changes: 36 additions & 0 deletions lexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ func (c *Compterpreter) GetNextToken() (Token, error) {
case c.IsNumber(c.CurrentChar):
// get full multidigit number token
err = c.TokenizeNumber(c.CurrentChar)
case c.IsIdentifierFirstSymbol(c.CurrentChar):
// is it a keyword?
// is it a function/variable identifier?
case c.IsPunctuation(c.CurrentChar):
err = c.TokenizePunctuation(c.CurrentChar)
default:
Expand Down Expand Up @@ -82,6 +85,14 @@ func (c *Compterpreter) IsNumber(r rune) bool {
return unicode.IsDigit(r)
}

func (c *Compterpreter) IsIdentifierFirstSymbol(r rune) bool {
return VALID_IDENTIFIER_FIRST_SYMBOL.MatchString(string(r))
}

func (c *Compterpreter) IsIdentifier(r rune) bool {
return VALID_IDENTIFIER_SYMBOL.MatchString(string(r))
}

func (c *Compterpreter) IsOperator(r rune) bool {
for _, symbol := range c.Symbols.Operators {
if string(r) == symbol {
Expand Down Expand Up @@ -136,6 +147,31 @@ func (c *Compterpreter) TokenizeNumber(r rune) error {
return nil
}

func (c *Compterpreter) TokenizeIdentifier(r rune) error {
c.CurrentToken.Type = IDENTIFIER
c.CurrentToken.Value = c.CurrentToken.Value + string(r)

// check to see if we need to include the next character in the
// current token
if err := c.Advance(); err != nil {
return err
}

if c.IsIdentifier(c.CurrentChar) {
c.TokenizeIdentifier(c.CurrentChar)
}

// at this point, we have our current token, but we want to
// check whether it is a keyword of an identifier
for _, keyword := range c.Symbols.Keywords {
if c.CurrentToken.Value == keyword {
c.CurrentToken.Type = KEYWORD
}
}

return nil
}

func (c *Compterpreter) TokenizeOperator(r rune) error {
c.CurrentToken.Type = OPERATOR
c.CurrentToken.Value = c.CurrentToken.Value + string(r)
Expand Down
32 changes: 32 additions & 0 deletions lexer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,19 @@ func (s *LexerSuite) TestIsOperator() {
}
}

func (s *LexerSuite) TestIsIdentifierFirstSymbol() {
conf := &Config{SrcFileName: "test/test.doc"}
compt := NewCompterpreter(conf)
for _, operator := range []rune{'a', 'A', 'z', 'Z', '_'} {
ok := compt.IsIdentifierFirstSymbol(operator)
s.True(ok)
}
for _, operator := range []rune{'❧', '0', ' '} {
ok := compt.IsIdentifierFirstSymbol(operator)
s.False(ok)
}
}

func (s *LexerSuite) TestIsPunctuation() {
conf := &Config{SrcFileName: "test/test.doc"}
compt := NewCompterpreter(conf)
Expand Down Expand Up @@ -92,6 +105,25 @@ func (s *LexerSuite) TestTokenizeOperator() {
}
}

func (s *LexerSuite) TestTokenizeIdentifier() {
conf := &Config{SrcFileName: "test/test_identifiers.doc"}
compt := NewCompterpreter(conf)

err := compt.LoadSourceCode()
s.NoError(err)

compt.Advance()
// advance ptr to first character
for _, op := range []string{"myVariable"} {
compt.CurrentToken = Token{}
compt.TokenizeOperator(compt.CurrentChar)
if string(compt.CurrentChar) == "EOF" {
break
}
s.EqualValues(compt.CurrentToken.Value, op)
}
}

func (s *LexerSuite) TestLex() {
conf := &Config{SrcFileName: "test/test_tokenize.doc"}
compt := NewCompterpreter(conf)
Expand Down
8 changes: 8 additions & 0 deletions symbols.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package dockerlang

import "regexp"

const (
ADDITION_OPERATOR = "+"
SUBTRACTION_OPERATOR = "†"
Expand All @@ -13,6 +15,9 @@ const (

R_PAREN_PUNCTION = "("
L_PAREN_PUNCUTATION = ")"

VARIABLE_IDENTIFIER = "VARIABLE_IDENTIFIER"
FUNCTION_IDENTIFIER = "FUNCTION_IDENTIFIER"
)

var (
Expand All @@ -27,6 +32,9 @@ var (
EXIT_OPERATOR: 1,
NOOP: 1,
}

VALID_IDENTIFIER_FIRST_SYMBOL = regexp.MustCompile("[a-zA-Z_]")
VALID_IDENTIFIER_SYMBOL = regexp.MustCompile("[a-zA-Z_\\d]")
)

// all the language-defined tokens in dockerlang
Expand Down
1 change: 1 addition & 0 deletions test/identifiers.doc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

1 change: 1 addition & 0 deletions test/test_identifiers.doc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
myVariable
4 changes: 4 additions & 0 deletions test/variable.doc
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
(≡ aVariable 0)

if asdf
ifIamKewl
3 changes: 2 additions & 1 deletion token.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@ package dockerlang

const (
OPERATOR = "OPERATOR"
VARIABLE = "VARIABLE"
IDENTIFIER = "IDENTIFIER"
KEYWORD = "KEYWORD"
INT = "INTEGER"
PUNCTUATION = "PUNCTUATION" // parens
)
Expand Down

0 comments on commit 97460a6

Please sign in to comment.