Skip to content

Commit

Permalink
Updated Lexer
Browse files Browse the repository at this point in the history
> added a new function "NewLexerFromYamlConfig" that can generate
a lexer based on yaml config source.
> added corresponding tests and examples demonstrating the config
fields.
> added an example "minilisp" inside the examples folder demonstrating
using the NewLexerFromYamlConfig function.
  • Loading branch information
nayas360 committed Dec 9, 2017
1 parent bef4084 commit b80f54f
Show file tree
Hide file tree
Showing 8 changed files with 268 additions and 4 deletions.
107 changes: 106 additions & 1 deletion example_lexer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ import (

func ExampleNewLexer() {
// Create a new lexer with lisp like syntax
lexer := goply.NewLexer("(+ 10 20)")
// The stray = is has no matching rule but should not cause an error
lexer := goply.NewLexer("= (+ 10 20)")
// match left parenthesis
lexer.AddRule("<lparen>", "\\(")
// match right parenthesis
Expand All @@ -24,7 +25,37 @@ func ExampleNewLexer() {
if err != nil {
panic(err)
}
// print out the tokens
for _, token := range tokens {
fmt.Printf("Got %s : %s\n", token.Type, token.Value)
}
// Output:
// Got <lparen> : (
// Got <op_plus> : +
// Got <number> : 10
// Got <number> : 20
// Got <rparen> : )
}

func ExampleNewLexerStrict() {
// Create a new lexer with lisp like syntax
lexer := goply.NewLexerStrict("(+ 10 20)")
// match left parenthesis
lexer.AddRule("<lparen>", "\\(")
// match right parenthesis
lexer.AddRule("<rparen>", "\\)")
// operator +
lexer.AddRule("<op_plus>", "\\+")
// a integer number
lexer.AddRule("<number>", "[0-9]+")
// ignore all whitespace
lexer.Ignore("\\s+")
// get the tokens
tokens, err := lexer.GetTokens()
if err != nil {
panic(err)
}
// print out the tokens
for _, token := range tokens {
fmt.Printf("Got %s : %s\n", token.Type, token.Value)
}
Expand All @@ -35,3 +66,77 @@ func ExampleNewLexer() {
// Got <number> : 20
// Got <rparen> : )
}

func ExampleNewLexerFromYamlConfig() {
// The yaml config source
// strict_mode is true by default
yamlSource := `
lexer:
rules :
- type : "<var_kw>"
regex : "var"
- type : "<eq>"
regex : "="
- type : "<integer>"
regex : "[0-9]+"
ignore :
- "\\s+"
`
source := "var = 123"
// try to generate a lexer from the given source and yaml config
lex, err := goply.NewLexerFromYamlConfig([]byte(yamlSource), source)
if err != nil {
panic(err)
}
// get the tokens
tokens, err := lex.GetTokens()
if err != nil {
panic(err)
}
// print out the tokens
for _, token := range tokens {
fmt.Printf("Got %s : %s\n", token.Type, token.Value)
}
// Output:
// Got <var_kw> : var
// Got <eq> : =
// Got <integer> : 123
}

func ExampleNewLexerFromYamlConfig_lenient() {
// The yaml config source
// The strict_mode field sets the strictness of the lexer
// it is true by default
yamlSource := `
lexer:
strict_mode : false
rules :
- type : "<var_kw>"
regex : "var"
- type : "<eq>"
regex : "="
- type : "<integer>"
regex : "[0-9]+"
ignore :
- "\\s+"
`
source := "var = 123"
// try to generate a lexer from the given source and yaml config
lex, err := goply.NewLexerFromYamlConfig([]byte(yamlSource), source)
if err != nil {
panic(err)
}
// get the tokens
tokens, err := lex.GetTokens()
if err != nil {
panic(err)
}
// print out the tokens
for _, token := range tokens {
fmt.Printf("Got %s : %s\n", token.Type, token.Value)
}
// Output:
// Got <var_kw> : var
// Got <eq> : =
// Got <integer> : 123
}
15 changes: 15 additions & 0 deletions examples/minilisp/lex.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# this file should exist in the same directory as the binary
lexer:
rules :
- type : "<lparen>"
regex : "\\("
- type : "<rparen>"
regex : "\\)"
- type : "<op_plus>"
regex : "\\+"
- type : "<op_minus>"
regex : "-"
- type : "<integer>"
regex : "[0-9]+"
ignore :
- "\\s+"
54 changes: 54 additions & 0 deletions examples/minilisp/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
package main

import (
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strings"

"github.com/nayas360/goply"
)

var source = `(+ 10 (- 15 5))`

func main() {
yamlConfig, err := getYamlConfig()
if err != nil {
panic(err)
}
// get the lexer from the config and source
lex, err := goply.NewLexerFromYamlConfig(yamlConfig, source)
if err != nil {
panic(err)
}
// get the tokens
tokens, err := lex.GetTokens()
if err != nil {
panic(err)
}
// print the tokens
for _, token := range tokens {
fmt.Printf("Got %s : %s\n", token.Type, token.Value)
}
}

func getYamlConfig() ([]byte, error) {
var GOP string

for _, p := range os.Environ() {
ps := strings.Split(p, "=")
if ps[0] == "GOPATH" {
GOP = strings.Split(ps[1], ":")[0]
}
}

// load lexer definition from file
ycp, err := filepath.Abs(GOP + "/src/github.com/nayas360/goply/examples/minilisp/lex.yml")
if err != nil {
panic(err)
}
// read the config
yamlConfig, err := ioutil.ReadFile(ycp)
return yamlConfig, err
}
2 changes: 1 addition & 1 deletion lexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import (
"strings"
)

// struct having related fields for representing a lexer
// Struct having related fields for representing a lexer
type Lexer struct {
ls LexerState // internal state of the lexer
lexRules map[string]*regexp.Regexp // mapping from Type names to regex Rules to be used with a token
Expand Down
3 changes: 2 additions & 1 deletion lexer_state.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package goply

// struct having fields for representing the state of a lexer
// Struct having fields for representing the state of a lexer
// it is passed as an argument to lexerErrorFunc
type LexerState struct {
SourceLength int // the length of the source string
Source string // the source string itself
Expand Down
53 changes: 53 additions & 0 deletions lexer_yaml.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
package goply

import (
"fmt"
"regexp"

"gopkg.in/yaml.v2"
)

// Struct used to read the yaml config into
type goplyYamlConfig struct {
Lexer struct {
StrictMode bool `yaml:"strict_mode,omitempty"`
Rules []struct {
Type string `yaml:"type"`
Regex string `yaml:"regex"`
} `yaml:"rules"`
Ignore []string `yaml:"ignore"`
} `yaml:"lexer"`
}

// Create a lexer from a yaml config
// the config should be the config source and not a file path
// this allows loading the config from file as well as memory
// like the source file
// returns an error if could not read the yaml properly
func NewLexerFromYamlConfig(yamlConfig []byte, source string) (*Lexer, error) {
var gyc goplyYamlConfig
// strict mode set to true by default
gyc.Lexer.StrictMode = true
err := yaml.UnmarshalStrict([]byte(yamlConfig), &gyc)
if err != nil {
return nil, err
}
lex := &Lexer{ls: LexerState{SourceLength: len(source) - 1, Source: source},
lexRules: make(map[string]*regexp.Regexp), lexerErrorFunc: defaultLexerError, strictMode: gyc.Lexer.StrictMode}

for _, rule := range gyc.Lexer.Rules {
if rule.Type != "" && rule.Regex != "" {
lex.AddRule(rule.Type, rule.Regex)
} else {
return nil, fmt.Errorf("malformed config file, \"type\" or \"regex\" fields missing from a rule")
}
}
for _, rule := range gyc.Lexer.Ignore {
if rule != "" {
lex.Ignore(rule)
} else {
return nil, fmt.Errorf("malformed config file, empty rule in \"ignore\"")
}
}
return lex, nil
}
36 changes: 36 additions & 0 deletions lexer_yaml_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
package goply_test

import (
"fmt"
"testing"

"github.com/nayas360/goply"
)

func TestNewLexerFromYamlConfig(t *testing.T) {
yamlSource := `
lexer:
rules :
- type : "<var_kw>"
regex : "var"
- type : "<eq>"
regex : "="
- type : "<integer>"
regex : "[0-9]+"
ignore :
- "\\s+"
`
source := "var = 123"
lex, err := goply.NewLexerFromYamlConfig([]byte(yamlSource), source)
if err != nil {
t.Errorf("could not create a new lexer from yaml config, %s", err)
}
tokens, err := lex.GetTokens()
if err != nil {
t.Errorf("got error instead of tokens, %s", err)
}
fmt.Println(tokens)
if len(tokens) != 3 {
t.Errorf("expected 3 tokens got, %s", len(tokens))
}
}
2 changes: 1 addition & 1 deletion token.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package goply

// struct having related fields for representing a token
// Struct having related fields for representing a token
type Token struct {
Type string // the Type of token
Value string // the Value of the token
Expand Down

0 comments on commit b80f54f

Please sign in to comment.