-
Notifications
You must be signed in to change notification settings - Fork 1
/
bashlex.rb
138 lines (127 loc) · 3.34 KB
/
bashlex.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
require 'strscan'
class Lexer
attr_reader :str
def initialize(str)
@str = StringScanner.new str
end
def op?(str)
(not str.empty?) and OPS.detect{|k,v| v.start_with? str}
end
TOKENS = {
root: [
[nil, /[;\n]/m],
[nil, /[|]/],
[:keyword, /\$\(\(/, :math],
[:BACKTICK, /\$\(/, :paren],
[:keyword, /\${#?/, :curly],
[:BACKTICK, /`/, :backticks],
#'basic': [
[:SPACE, /#.*/],
[:escape, /\\[\w\W]/],
[:ASGNWORD, /(\b\w+)(\s*)(=)/],
# [:operator, /[\[\]{}()=]/],
[:here, /<<</],
[:operator, /&&|\|\|/],
#'data': [
[:DQUOTE, /"/, :dquote],
[:SQUOTE, /'(\\\\|\\[0-7]+|\\.|[^'\\])*'/],
[:SPACE, /[ \t]+/],
[:WORD, /[^\s{}|();\n$"\'`\\<]+/],
[:number, /\d+(?= |\Z)/],
[:WORD, /\$#?(\w+|.)/],
[:WORD, /</]
],
curly: [
[:keyword, /}/, :pop],
[:keyword, /:-/],
[:varname, /[a-zA-Z0-9_]+/],
[:punct, /[^}:"'`$]+/],
[:punct, /:/],
],
paren: [
[:ENDBACKTICK, /\)/, :pop]
],
math: [
[:keyword, /\)\)/, :pop],
[:operator, /[-+*\/%^|&]|\*\*|\|\|/],
[:number, /\d+/]
],
backticks: [
[:ENDBACKTICK, /`/, :pop]
],
dquote: [
[:ENDDQUOTE, /"/, :pop],
[:BACKTICK, /\$\(/, :paren],
[:BACKTICK, /`/, :backticks],
[:VAR, /[$][a-zA-Z0-9_]+/],
[:WORD, /([^"$`\\]|\\.)+/],
],
}
SPECIALS = %w|
if then else elif fi case esac for while until do done in function time { } ! [[ ]] coproc
|
RESTART = %W[
\n ; ( ) | & { } && ! |& do elif else if || ;; then time coproc until while
] + [nil]
def run
prev_tokens = []
simple_run do |token|
if token.first == :WORD
if token.last == "in" and prev_tokens[-1].first == :WORD and prev_tokens[-2].first == "for"
token = ["in", "in"]
elsif token.first == :WORD and prev_tokens.last and RESTART.include?(prev_tokens.last.last) and SPECIALS.include?(token.last)
token = [token.last, token.last]
end
end
yield token
prev_tokens.push token
prev_tokens.shift if prev_tokens.size > 2
end
end
def simple_run
mode = :root
backtick = []
modestack = []
until @str.eos?
text, type, match, switch = nil
if mode == :root
tokens = TOKENS[:root]
else
tokens = TOKENS[mode] + TOKENS[:root]
end
for entry in tokens
type, match, switch = entry
break if text = @str.scan(match)
end
if text
if type == :SPACE # or mode == :backticks or type == :backtick
# yield [type, mode, text]
elsif type == :ENDBACKTICK
backtick << [:EOF, false]
yield [:BACKTICK, backtick]
backtick = []
else
if [:backticks, :paren].include? mode
backtick << [(type or text), text]
else
yield [(type or text), text] unless type == :BACKTICK
end
end
if switch == :pop
mode = modestack.pop
elsif switch
modestack.push mode
mode = switch
end
else
x = @str.getch
yield [x, x]
end
end
yield [:EOF, false]
end
end
if $PROGRAM_NAME == __FILE__
lex = Lexer.new("foo | bar | baz; for i in `ls *.txt`;do echo \"Foo $i has 5 \\$\"\necho bar | grep --color=auto 'a'; done # This is a comment")
lex.run { |q| p q; puts caller[1].split(':')[1] }
end