-
Notifications
You must be signed in to change notification settings - Fork 0
/
tokenizer.go
95 lines (81 loc) · 1.93 KB
/
tokenizer.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
package main
import (
"bytes"
"fmt"
"os"
"regexp"
"text/scanner"
)
type Token struct {
kind string
value string
line int
column int
}
type Tokenizer struct {
content []byte
fileName string
tokens []Token
currentLine int
currentColumn int
}
var keywords = [...]string{
"SubClassOf", "EquivalentClasses", "DisjointClasses", "SameIndividual", "DifferentIndividuals",
"ObjectIntersectionOf", "ObjectUnionOf", "ObjectComplementOf", "ObjectOneOf"}
var isAlpha = regexp.MustCompile("^[a-zA-Z]+").MatchString
var isSeparator = regexp.MustCompile("[(|)]").MatchString
func (t *Tokenizer) Scan() {
var s scanner.Scanner
t.currentLine = 1
t.currentColumn = 0
s.Init(bytes.NewReader(t.content))
s.Filename = t.fileName
for tok := s.Next(); tok != scanner.EOF; tok = s.Next() {
t.currentColumn++
if isAlpha(string(tok)) {
literalInRunes := []rune{tok}
for {
tok = s.Peek()
if isAlpha(string(tok)) {
literalInRunes = append(literalInRunes, tok)
s.Next()
t.currentColumn++
} else {
break
}
}
literal := string(literalInRunes)
if isKeyword(literal) {
t.addToken("KEYWORD", literal)
} else {
t.addToken("ALPHA", literal)
}
} else if tok == ' ' {
continue
} else if tok == '\n' {
t.currentColumn = 0
t.currentLine++
continue
} else if tok == ':' {
t.addToken(":", ":")
} else if isSeparator(string(tok)) {
t.addToken(string(tok), string(tok))
} else {
fmt.Fprintf(os.Stderr, "%s:%d:%d unexpected charater '%s' \n", t.fileName, t.currentLine, t.currentColumn, string(tok))
os.Exit(1)
}
}
t.addToken("EOF", "EOF")
}
func isKeyword(str string) bool {
for _, keyw := range keywords {
if keyw == str {
return true
}
}
return false
}
func (t *Tokenizer) addToken(kind string, value string) {
token := Token{kind: kind, value: value, line: t.currentLine, column: t.currentColumn}
t.tokens = append(t.tokens, token)
}