forked from tetratelabs/wazero
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathassembler.go
200 lines (162 loc) · 8.04 KB
/
assembler.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
package asm
import (
"fmt"
"math"
)
// Register represents architecture-specific registers.
type Register byte
// NilRegister is the only architecture-independent register, and
// can be used to indicate that no register is specified.
const NilRegister Register = 0
// Instruction represents architecture-specific instructions.
type Instruction uint16 // to accommodate the high cardinality of vector ops
// ConditionalRegisterState represents architecture-specific conditional
// register's states.
type ConditionalRegisterState byte
// ConditionalRegisterStateUnset is the only architecture-independent conditional state, and
// can be used to indicate that no conditional state is specified.
const ConditionalRegisterStateUnset ConditionalRegisterState = 0
// Node represents a node in the linked list of assembled operations.
type Node interface {
fmt.Stringer
// AssignJumpTarget assigns the given target node as the destination of
// jump instruction for this Node.
AssignJumpTarget(target Node)
// AssignDestinationConstant assigns the given constant as the destination
// of the instruction for this node.
AssignDestinationConstant(value ConstantValue)
// AssignSourceConstant assigns the given constant as the source
// of the instruction for this node.
AssignSourceConstant(value ConstantValue)
// OffsetInBinary returns the offset of this node in the assembled binary.
OffsetInBinary() NodeOffsetInBinary
}
// NodeOffsetInBinary represents an offset of this node in the final binary.
type NodeOffsetInBinary = uint64
// ConstantValue represents a constant value used in an instruction.
type ConstantValue = int64
// StaticConst represents an arbitrary constant bytes which are pooled and emitted by assembler into the binary.
// These constants can be referenced by instructions.
type StaticConst struct {
Raw []byte
// OffsetInBinary is the offset of this static const in the result binary.
OffsetInBinary uint64
// offsetFinalizedCallbacks holds callbacks which are called when .OffsetInBinary is finalized by assembler implementation.
offsetFinalizedCallbacks []func(offsetOfConstInBinary uint64)
}
// NewStaticConst returns the pointer to the new NewStaticConst for given bytes.
func NewStaticConst(raw []byte) *StaticConst {
return &StaticConst{Raw: raw}
}
// AddOffsetFinalizedCallback adds a callback into offsetFinalizedCallbacks.
func (s *StaticConst) AddOffsetFinalizedCallback(cb func(offsetOfConstInBinary uint64)) {
s.offsetFinalizedCallbacks = append(s.offsetFinalizedCallbacks, cb)
}
// SetOffsetInBinary finalizes the offset of this StaticConst, and invokes callbacks.
func (s *StaticConst) SetOffsetInBinary(offset uint64) {
s.OffsetInBinary = offset
for _, cb := range s.offsetFinalizedCallbacks {
cb(offset)
}
}
// StaticConstPool holds a bulk of StaticConst which are yet to be emitted into the binary.
type StaticConstPool struct {
// FirstUseOffsetInBinary holds the offset of the first instruction which accesses this const pool .
FirstUseOffsetInBinary *NodeOffsetInBinary
Consts []*StaticConst
// addedConsts is used to deduplicate the consts to reduce the final size of binary.
// Note: we can use map on .consts field and remove this field,
// but we have the separate field for deduplication in order to have deterministic assembling behavior.
addedConsts map[*StaticConst]struct{}
// PoolSizeInBytes is the current size of the pool in bytes.
PoolSizeInBytes int
}
func NewStaticConstPool() StaticConstPool {
return StaticConstPool{addedConsts: map[*StaticConst]struct{}{}}
}
// Reset resets the *StaticConstPool for reuse.
func (p *StaticConstPool) Reset() {
for _, c := range p.Consts {
delete(p.addedConsts, c)
}
// Reuse the slice to avoid re-allocations.
p.Consts = p.Consts[:0]
p.PoolSizeInBytes = 0
p.FirstUseOffsetInBinary = nil
}
// AddConst adds a *StaticConst into the pool if it's not already added.
func (p *StaticConstPool) AddConst(c *StaticConst, useOffset NodeOffsetInBinary) {
if _, ok := p.addedConsts[c]; ok {
return
}
if p.FirstUseOffsetInBinary == nil {
p.FirstUseOffsetInBinary = &useOffset
}
p.Consts = append(p.Consts, c)
p.PoolSizeInBytes += len(c.Raw)
p.addedConsts[c] = struct{}{}
}
// AssemblerBase is the common interface for assemblers among multiple architectures.
//
// Note: some of them can be implemented in an arch-independent way, but not all can be
// implemented as such. However, we intentionally put such arch-dependant methods here
// in order to provide the common documentation interface.
type AssemblerBase interface {
// Reset resets the state of Assembler implementation and mark it ready for
// the compilation of the new function compilation.
Reset()
// Assemble produces the final binary for the assembled operations.
Assemble() ([]byte, error)
// SetJumpTargetOnNext instructs the assembler that the next node must be
// assigned to the given node's jump destination.
SetJumpTargetOnNext(node Node)
// BuildJumpTable calculates the offsets between the first instruction `initialInstructions[0]`
// and others (e.g. initialInstructions[3]), and wrote the calculated offsets into pre-allocated
// `table` StaticConst in little endian.
BuildJumpTable(table *StaticConst, initialInstructions []Node)
// AllocateNOP allocates Node for NOP instruction.
AllocateNOP() Node
// Add appends the given `Node` in the assembled linked list.
Add(Node)
// CompileStandAlone adds an instruction to take no arguments.
CompileStandAlone(instruction Instruction) Node
// CompileConstToRegister adds an instruction where source operand is `value` as constant and destination is `destinationReg` register.
CompileConstToRegister(instruction Instruction, value ConstantValue, destinationReg Register) Node
// CompileRegisterToRegister adds an instruction where source and destination operands are registers.
CompileRegisterToRegister(instruction Instruction, from, to Register)
// CompileMemoryToRegister adds an instruction where source operands is the memory address specified by `sourceBaseReg+sourceOffsetConst`
// and the destination is `destinationReg` register.
CompileMemoryToRegister(
instruction Instruction,
sourceBaseReg Register,
sourceOffsetConst ConstantValue,
destinationReg Register,
)
// CompileRegisterToMemory adds an instruction where source operand is `sourceRegister` register and the destination is the
// memory address specified by `destinationBaseRegister+destinationOffsetConst`.
CompileRegisterToMemory(
instruction Instruction,
sourceRegister Register,
destinationBaseRegister Register,
destinationOffsetConst ConstantValue,
)
// CompileJump adds jump-type instruction and returns the corresponding Node in the assembled linked list.
CompileJump(jmpInstruction Instruction) Node
// CompileJumpToRegister adds jump-type instruction whose destination is the memory address specified by `reg` register.
CompileJumpToRegister(jmpInstruction Instruction, reg Register)
// CompileReadInstructionAddress adds an ADR instruction to set the absolute address of "target instruction"
// into destinationRegister. "target instruction" is specified by beforeTargetInst argument and
// the target is determined by "the instruction right after beforeTargetInst type".
//
// For example, if `beforeTargetInst == RET` and we have the instruction sequence like
// `ADR -> X -> Y -> ... -> RET -> MOV`, then the `ADR` instruction emitted by this function set the absolute
// address of `MOV` instruction into the destination register.
CompileReadInstructionAddress(destinationRegister Register, beforeAcquisitionTargetInstruction Instruction)
}
// JumpTableMaximumOffset represents the limit on the size of jump table in bytes.
// When users try loading an extremely large WebAssembly binary which contains a br_table
// statement with approximately 4294967296 (2^32) targets. Realistically speaking, that kind of binary
// could result in more than ten gigabytes of native compiled code where we have to care about
// huge stacks whose height might exceed 32-bit range, and such huge stack doesn't work with the
// current implementation.
const JumpTableMaximumOffset = math.MaxUint32