-
Notifications
You must be signed in to change notification settings - Fork 0
/
lexer.cpp
139 lines (131 loc) · 3.57 KB
/
lexer.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
//
// Created by 23766 on 2024/10/5.
//
#include "lexer.h"
#include <utility>
Lexer::Lexer(std::string input): input(std::move(input)),position(0),readPosition(0) {
readChar();
}
Lexer::~Lexer() = default;
void Lexer::readChar() {
if (readPosition >= input.length()){
ch = 0;
} else {
ch = input[readPosition];
}
position = readPosition;
readPosition += 1;
}
Token Lexer::NextToken() {
Token tok;
skipWhitespace();
switch (ch) {
case '=':
if (peekChar() == '='){
auto ch_t = ch;
readChar();
auto literal_t = std::string{ch_t} + std::string{ch};
tok = {TokenType::EQ, literal_t};
} else {
tok = {TokenType::ASSIGN, std::string{ch}};
}
break;
case '-':
tok = {TokenType::MINUS, std::string{ch} };
break;
case '!':
if (peekChar() == '=') {
auto ch_t = ch;
readChar();
auto literal_t = std::string{ch_t} + std::string{ch};
tok = {TokenType::NOT_EQ, literal_t};
} else {
tok = {TokenType::BANG, std::string{ch}};
}
break;
case '/':
tok = {TokenType::SLASH, std::string{ch}};
break;
case '*':
tok = {TokenType::ASTERISK, std::string{ch}};
break;
case '<':
tok = {TokenType::LT, std::string{ch}};
break;
case '>':
tok = {TokenType::GT, std::string{ch}};
break;
case ';':
tok = {TokenType::SEMICOLON, std::string{ch}};
break;
case '(':
tok = {TokenType::LPAREN, std::string{ch}};
break;
case ')':
tok = {TokenType::RPAREN, std::string{ch}};
break;
case ',':
tok = {TokenType::COMMA, std::string{ch}};
break;
case '+':
tok = {TokenType::PLUS, std::string{ch}};
break;
case '{':
tok = {TokenType::LBRACE, std::string{ch}};
break;
case '}':
tok = {TokenType::RBRACE, std::string{ch}};
break;
case 0:
tok = {TokenType::EOF_, ""};
break;
default:
if (isLetter(ch)){
tok.literal = readIdentifier();
tok.type = LookupIndent(tok.literal);
return tok;
}else if (isDigit(ch)) {
tok.type = TokenType::INT;
tok.literal = readNumber();
return tok;
}else {
tok = {TokenType::ILLEGAL, std::string{ch}};
}
}
readChar();
return tok;
}
std::string Lexer::readIdentifier() {
auto pos = position;
while (isLetter(ch)){
readChar();
}
return input.substr(pos,position-pos);
// 开始位置 pos count 长度
}
std::string Lexer::readNumber() {
auto pos = position;
while (isDigit(ch)){
readChar();
}
return input.substr(pos, position-pos);
}
void Lexer::skipWhitespace() {
while (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r'){
readChar();
}
}
char Lexer::peekChar() {
if (readPosition >= input.length()) {
return 0;
} else {
return input[readPosition];
}
}
// helper function
static bool isLetter(char ch){
return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_';
}
static bool isDigit(char ch){
return '0' <= ch && ch <= '9';
}