From b80f54fe7a9cb6c84d1e3762a61e5ab75e842dfb Mon Sep 17 00:00:00 2001 From: Sayan Dutta Date: Sat, 9 Dec 2017 23:37:14 +0530 Subject: [PATCH] Updated Lexer > added a new function "NewLexerFromYamlConfig" that can generate a lexer based on yaml config source. > added corresponding tests and examples demonstrating the config fields. > added an example "minilisp" inside the examples folder demonstrating using the NewLexerFromYamlConfig function. --- example_lexer_test.go | 107 +++++++++++++++++++++++++++++++++++++- examples/minilisp/lex.yml | 15 ++++++ examples/minilisp/main.go | 54 +++++++++++++++++++ lexer.go | 2 +- lexer_state.go | 3 +- lexer_yaml.go | 53 +++++++++++++++++++ lexer_yaml_test.go | 36 +++++++++++++ token.go | 2 +- 8 files changed, 268 insertions(+), 4 deletions(-) create mode 100644 examples/minilisp/lex.yml create mode 100644 examples/minilisp/main.go create mode 100644 lexer_yaml.go create mode 100644 lexer_yaml_test.go diff --git a/example_lexer_test.go b/example_lexer_test.go index bbf5d7c..5ffbdbf 100644 --- a/example_lexer_test.go +++ b/example_lexer_test.go @@ -8,7 +8,8 @@ import ( func ExampleNewLexer() { // Create a new lexer with lisp like syntax - lexer := goply.NewLexer("(+ 10 20)") + // The stray = is has no matching rule but should not cause an error + lexer := goply.NewLexer("= (+ 10 20)") // match left parenthesis lexer.AddRule("", "\\(") // match right parenthesis @@ -24,7 +25,37 @@ func ExampleNewLexer() { if err != nil { panic(err) } + // print out the tokens + for _, token := range tokens { + fmt.Printf("Got %s : %s\n", token.Type, token.Value) + } + // Output: + // Got : ( + // Got : + + // Got : 10 + // Got : 20 + // Got : ) +} +func ExampleNewLexerStrict() { + // Create a new lexer with lisp like syntax + lexer := goply.NewLexerStrict("(+ 10 20)") + // match left parenthesis + lexer.AddRule("", "\\(") + // match right parenthesis + lexer.AddRule("", "\\)") + // operator + + lexer.AddRule("", "\\+") + // a integer number + lexer.AddRule("", "[0-9]+") + // ignore all whitespace + lexer.Ignore("\\s+") + // get the tokens + tokens, err := lexer.GetTokens() + if err != nil { + panic(err) + } + // print out the tokens for _, token := range tokens { fmt.Printf("Got %s : %s\n", token.Type, token.Value) } @@ -35,3 +66,77 @@ func ExampleNewLexer() { // Got : 20 // Got : ) } + +func ExampleNewLexerFromYamlConfig() { + // The yaml config source + // strict_mode is true by default + yamlSource := ` +lexer: + rules : + - type : "" + regex : "var" + - type : "" + regex : "=" + - type : "" + regex : "[0-9]+" + ignore : + - "\\s+" +` + source := "var = 123" + // try to generate a lexer from the given source and yaml config + lex, err := goply.NewLexerFromYamlConfig([]byte(yamlSource), source) + if err != nil { + panic(err) + } + // get the tokens + tokens, err := lex.GetTokens() + if err != nil { + panic(err) + } + // print out the tokens + for _, token := range tokens { + fmt.Printf("Got %s : %s\n", token.Type, token.Value) + } + // Output: + // Got : var + // Got : = + // Got : 123 +} + +func ExampleNewLexerFromYamlConfig_lenient() { + // The yaml config source + // The strict_mode field sets the strictness of the lexer + // it is true by default + yamlSource := ` +lexer: + strict_mode : false + rules : + - type : "" + regex : "var" + - type : "" + regex : "=" + - type : "" + regex : "[0-9]+" + ignore : + - "\\s+" +` + source := "var = 123" + // try to generate a lexer from the given source and yaml config + lex, err := goply.NewLexerFromYamlConfig([]byte(yamlSource), source) + if err != nil { + panic(err) + } + // get the tokens + tokens, err := lex.GetTokens() + if err != nil { + panic(err) + } + // print out the tokens + for _, token := range tokens { + fmt.Printf("Got %s : %s\n", token.Type, token.Value) + } + // Output: + // Got : var + // Got : = + // Got : 123 +} diff --git a/examples/minilisp/lex.yml b/examples/minilisp/lex.yml new file mode 100644 index 0000000..83030a9 --- /dev/null +++ b/examples/minilisp/lex.yml @@ -0,0 +1,15 @@ +# this file should exist in the same directory as the binary +lexer: + rules : + - type : "" + regex : "\\(" + - type : "" + regex : "\\)" + - type : "" + regex : "\\+" + - type : "" + regex : "-" + - type : "" + regex : "[0-9]+" + ignore : + - "\\s+" \ No newline at end of file diff --git a/examples/minilisp/main.go b/examples/minilisp/main.go new file mode 100644 index 0000000..a21f841 --- /dev/null +++ b/examples/minilisp/main.go @@ -0,0 +1,54 @@ +package main + +import ( + "fmt" + "io/ioutil" + "os" + "path/filepath" + "strings" + + "github.com/nayas360/goply" +) + +var source = `(+ 10 (- 15 5))` + +func main() { + yamlConfig, err := getYamlConfig() + if err != nil { + panic(err) + } + // get the lexer from the config and source + lex, err := goply.NewLexerFromYamlConfig(yamlConfig, source) + if err != nil { + panic(err) + } + // get the tokens + tokens, err := lex.GetTokens() + if err != nil { + panic(err) + } + // print the tokens + for _, token := range tokens { + fmt.Printf("Got %s : %s\n", token.Type, token.Value) + } +} + +func getYamlConfig() ([]byte, error) { + var GOP string + + for _, p := range os.Environ() { + ps := strings.Split(p, "=") + if ps[0] == "GOPATH" { + GOP = strings.Split(ps[1], ":")[0] + } + } + + // load lexer definition from file + ycp, err := filepath.Abs(GOP + "/src/github.com/nayas360/goply/examples/minilisp/lex.yml") + if err != nil { + panic(err) + } + // read the config + yamlConfig, err := ioutil.ReadFile(ycp) + return yamlConfig, err +} diff --git a/lexer.go b/lexer.go index c67fa1c..c096c2b 100644 --- a/lexer.go +++ b/lexer.go @@ -6,7 +6,7 @@ import ( "strings" ) -// struct having related fields for representing a lexer +// Struct having related fields for representing a lexer type Lexer struct { ls LexerState // internal state of the lexer lexRules map[string]*regexp.Regexp // mapping from Type names to regex Rules to be used with a token diff --git a/lexer_state.go b/lexer_state.go index 7f040e9..c7a3b06 100644 --- a/lexer_state.go +++ b/lexer_state.go @@ -1,6 +1,7 @@ package goply -// struct having fields for representing the state of a lexer +// Struct having fields for representing the state of a lexer +// it is passed as an argument to lexerErrorFunc type LexerState struct { SourceLength int // the length of the source string Source string // the source string itself diff --git a/lexer_yaml.go b/lexer_yaml.go new file mode 100644 index 0000000..dd3f66d --- /dev/null +++ b/lexer_yaml.go @@ -0,0 +1,53 @@ +package goply + +import ( + "fmt" + "regexp" + + "gopkg.in/yaml.v2" +) + +// Struct used to read the yaml config into +type goplyYamlConfig struct { + Lexer struct { + StrictMode bool `yaml:"strict_mode,omitempty"` + Rules []struct { + Type string `yaml:"type"` + Regex string `yaml:"regex"` + } `yaml:"rules"` + Ignore []string `yaml:"ignore"` + } `yaml:"lexer"` +} + +// Create a lexer from a yaml config +// the config should be the config source and not a file path +// this allows loading the config from file as well as memory +// like the source file +// returns an error if could not read the yaml properly +func NewLexerFromYamlConfig(yamlConfig []byte, source string) (*Lexer, error) { + var gyc goplyYamlConfig + // strict mode set to true by default + gyc.Lexer.StrictMode = true + err := yaml.UnmarshalStrict([]byte(yamlConfig), &gyc) + if err != nil { + return nil, err + } + lex := &Lexer{ls: LexerState{SourceLength: len(source) - 1, Source: source}, + lexRules: make(map[string]*regexp.Regexp), lexerErrorFunc: defaultLexerError, strictMode: gyc.Lexer.StrictMode} + + for _, rule := range gyc.Lexer.Rules { + if rule.Type != "" && rule.Regex != "" { + lex.AddRule(rule.Type, rule.Regex) + } else { + return nil, fmt.Errorf("malformed config file, \"type\" or \"regex\" fields missing from a rule") + } + } + for _, rule := range gyc.Lexer.Ignore { + if rule != "" { + lex.Ignore(rule) + } else { + return nil, fmt.Errorf("malformed config file, empty rule in \"ignore\"") + } + } + return lex, nil +} diff --git a/lexer_yaml_test.go b/lexer_yaml_test.go new file mode 100644 index 0000000..fea8eb9 --- /dev/null +++ b/lexer_yaml_test.go @@ -0,0 +1,36 @@ +package goply_test + +import ( + "fmt" + "testing" + + "github.com/nayas360/goply" +) + +func TestNewLexerFromYamlConfig(t *testing.T) { + yamlSource := ` +lexer: + rules : + - type : "" + regex : "var" + - type : "" + regex : "=" + - type : "" + regex : "[0-9]+" + ignore : + - "\\s+" +` + source := "var = 123" + lex, err := goply.NewLexerFromYamlConfig([]byte(yamlSource), source) + if err != nil { + t.Errorf("could not create a new lexer from yaml config, %s", err) + } + tokens, err := lex.GetTokens() + if err != nil { + t.Errorf("got error instead of tokens, %s", err) + } + fmt.Println(tokens) + if len(tokens) != 3 { + t.Errorf("expected 3 tokens got, %s", len(tokens)) + } +} diff --git a/token.go b/token.go index 5f00f35..4aefe24 100755 --- a/token.go +++ b/token.go @@ -1,6 +1,6 @@ package goply -// struct having related fields for representing a token +// Struct having related fields for representing a token type Token struct { Type string // the Type of token Value string // the Value of the token