Skip to content

Commit 4a2bf63

Browse files
authored
Preserve pointer provenance in the Rust backend. (#870)
* Preserve pointer provenance in the Rust backend. Use the new `Pointer` and `Length` types in the Rust backend to emit code that uses `*mut c_void` and `usize` instead of `i32` when working with pointers and array lengths. To represent `PointerOrI64`, use a `MaybeUninit<u64>`, since that type can hold any `u64` and is documented to also preserve provenance. This change happens to get the generated Rust code close to supporting memory64, however it isn't complete; the abi code still emits hard-coded `+ 4` offsets for loading the length of a pointer+length pair in memory. * Fully-qualify `core` in macro expansions. * Fix bitcasts between i32 and pointer. * Implement Bitcast::I32ToP and PToI32 in more backends. * Use `*mut u8` instead of `*mut c_void` for pointers. And switch to `add` from `byte_add`. * Add documentation to `Bitcast` arms about provenance.
1 parent ec84c8e commit 4a2bf63

File tree

9 files changed

+273
-108
lines changed

9 files changed

+273
-108
lines changed

Cargo.lock

Lines changed: 32 additions & 32 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,11 @@ clap = { version = "4.3.19", features = ["derive"] }
2929
env_logger = "0.10.0"
3030
indexmap = "2.0.0"
3131

32-
wasmparser = "0.200.0"
33-
wasm-encoder = "0.200.0"
34-
wasm-metadata = "0.200.0"
35-
wit-parser = "0.200.0"
36-
wit-component = "0.200.0"
32+
wasmparser = "0.201.0"
33+
wasm-encoder = "0.201.0"
34+
wasm-metadata = "0.201.0"
35+
wit-parser = "0.201.0"
36+
wit-component = "0.201.0"
3737

3838
wit-bindgen-core = { path = 'crates/core', version = '0.19.2' }
3939
wit-bindgen-c = { path = 'crates/c', version = '0.19.2' }

crates/c/src/lib.rs

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2205,13 +2205,18 @@ impl Bindgen for FunctionBindgen<'_, '_> {
22052205
op
22062206
));
22072207
}
2208-
Bitcast::I32ToI64 => {
2208+
Bitcast::I32ToI64 | Bitcast::PToP64 => {
22092209
results.push(format!("(int64_t) {}", op));
22102210
}
2211-
Bitcast::I64ToI32 => {
2211+
Bitcast::I64ToI32 | Bitcast::P64ToP => {
22122212
results.push(format!("(int32_t) {}", op));
22132213
}
2214-
Bitcast::None => results.push(op.to_string()),
2214+
Bitcast::I64ToP64 | Bitcast::P64ToI64 => {
2215+
results.push(format!("{}", op));
2216+
}
2217+
Bitcast::I32ToP | Bitcast::PToI32 | Bitcast::None => {
2218+
results.push(op.to_string())
2219+
}
22152220
}
22162221
}
22172222
}
@@ -2869,11 +2874,17 @@ impl Bindgen for FunctionBindgen<'_, '_> {
28692874
}
28702875
}
28712876

2872-
Instruction::I32Load { offset } => self.load("int32_t", *offset, operands, results),
2877+
Instruction::I32Load { offset }
2878+
| Instruction::PointerLoad { offset }
2879+
| Instruction::LengthLoad { offset } => {
2880+
self.load("int32_t", *offset, operands, results)
2881+
}
28732882
Instruction::I64Load { offset } => self.load("int64_t", *offset, operands, results),
28742883
Instruction::F32Load { offset } => self.load("float", *offset, operands, results),
28752884
Instruction::F64Load { offset } => self.load("double", *offset, operands, results),
2876-
Instruction::I32Store { offset } => self.store("int32_t", *offset, operands),
2885+
Instruction::I32Store { offset }
2886+
| Instruction::PointerStore { offset }
2887+
| Instruction::LengthStore { offset } => self.store("int32_t", *offset, operands),
28772888
Instruction::I64Store { offset } => self.store("int64_t", *offset, operands),
28782889
Instruction::F32Store { offset } => self.store("float", *offset, operands),
28792890
Instruction::F64Store { offset } => self.store("double", *offset, operands),
@@ -3015,6 +3026,9 @@ fn wasm_type(ty: WasmType) -> &'static str {
30153026
WasmType::I64 => "int64_t",
30163027
WasmType::F32 => "float",
30173028
WasmType::F64 => "double",
3029+
WasmType::Pointer => "uintptr_t",
3030+
WasmType::PointerOrI64 => "int64_t",
3031+
WasmType::Length => "size_t",
30183032
}
30193033
}
30203034

crates/core/src/abi.rs

Lines changed: 58 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,11 @@ def_instruction! {
113113
/// it, using the specified constant offset.
114114
F64Load { offset: i32 } : [1] => [1],
115115

116+
/// Like `I32Load` or `I64Load`, but for loading pointer values.
117+
PointerLoad { offset: i32 } : [1] => [1],
118+
/// Like `I32Load` or `I64Load`, but for loading array length values.
119+
LengthLoad { offset: i32 } : [1] => [1],
120+
116121
/// Pops an `i32` address from the stack and then an `i32` value.
117122
/// Stores the value in little-endian at the pointer specified plus the
118123
/// constant `offset`.
@@ -138,6 +143,11 @@ def_instruction! {
138143
/// constant `offset`.
139144
F64Store { offset: i32 } : [2] => [0],
140145

146+
/// Like `I32Store` or `I64Store`, but for storing pointer values.
147+
PointerStore { offset: i32 } : [2] => [0],
148+
/// Like `I32Store` or `I64Store`, but for storing array length values.
149+
LengthStore { offset: i32 } : [2] => [0],
150+
141151
// Scalar lifting/lowering
142152

143153
/// Converts an interface type `char` value to a 32-bit integer
@@ -526,6 +536,24 @@ pub enum Bitcast {
526536
I64ToI32,
527537
I64ToF32,
528538

539+
// PointerOrI64<->Pointer conversions. These preserve provenance.
540+
//
541+
// These are used when pointer values are being stored in
542+
// (PToP64) and loaded out of (P64ToP) PointerOrI64 values, so they
543+
// always have to preserve provenance.
544+
P64ToP,
545+
PToP64,
546+
547+
// Pointer<->integer conversions. These do not preserve provenance.
548+
//
549+
// These are used when integer values are being stored in
550+
// (I64ToP64 and I32ToP) and loaded out of (P64ToI64 and PToI32) pointer
551+
// or PointerOrI64 values, so they never have any provenance to preserve.
552+
P64ToI64,
553+
I64ToP64,
554+
I32ToP,
555+
PToI32,
556+
529557
None,
530558
}
531559

@@ -1517,9 +1545,9 @@ impl<'a, B: Bindgen> Generator<'a, B> {
15171545
// and the length into the high address.
15181546
self.lower(ty);
15191547
self.stack.push(addr.clone());
1520-
self.emit(&Instruction::I32Store { offset: offset + 4 });
1548+
self.emit(&Instruction::LengthStore { offset: offset + 4 });
15211549
self.stack.push(addr);
1522-
self.emit(&Instruction::I32Store { offset });
1550+
self.emit(&Instruction::PointerStore { offset });
15231551
}
15241552

15251553
fn write_fields_to_memory<'b>(
@@ -1689,9 +1717,9 @@ impl<'a, B: Bindgen> Generator<'a, B> {
16891717
// Read the pointer/len and then perform the standard lifting
16901718
// proceses.
16911719
self.stack.push(addr.clone());
1692-
self.emit(&Instruction::I32Load { offset });
1720+
self.emit(&Instruction::PointerLoad { offset });
16931721
self.stack.push(addr);
1694-
self.emit(&Instruction::I32Load { offset: offset + 4 });
1722+
self.emit(&Instruction::LengthLoad { offset: offset + 4 });
16951723
self.lift(ty);
16961724
}
16971725

@@ -1742,9 +1770,9 @@ impl<'a, B: Bindgen> Generator<'a, B> {
17421770
match *ty {
17431771
Type::String => {
17441772
self.stack.push(addr.clone());
1745-
self.emit(&Instruction::I32Load { offset });
1773+
self.emit(&Instruction::PointerLoad { offset });
17461774
self.stack.push(addr);
1747-
self.emit(&Instruction::I32Load { offset: offset + 4 });
1775+
self.emit(&Instruction::LengthLoad { offset: offset + 4 });
17481776
self.emit(&Instruction::GuestDeallocateString);
17491777
}
17501778

@@ -1772,9 +1800,9 @@ impl<'a, B: Bindgen> Generator<'a, B> {
17721800
self.finish_block(0);
17731801

17741802
self.stack.push(addr.clone());
1775-
self.emit(&Instruction::I32Load { offset });
1803+
self.emit(&Instruction::PointerLoad { offset });
17761804
self.stack.push(addr);
1777-
self.emit(&Instruction::I32Load { offset: offset + 4 });
1805+
self.emit(&Instruction::LengthLoad { offset: offset + 4 });
17781806
self.emit(&Instruction::GuestDeallocateList { element });
17791807
}
17801808

@@ -1862,7 +1890,12 @@ fn cast(from: WasmType, to: WasmType) -> Bitcast {
18621890
use WasmType::*;
18631891

18641892
match (from, to) {
1865-
(I32, I32) | (I64, I64) | (F32, F32) | (F64, F64) => Bitcast::None,
1893+
(I32, I32)
1894+
| (I64, I64)
1895+
| (F32, F32)
1896+
| (F64, F64)
1897+
| (Pointer, Pointer)
1898+
| (Length, Length) => Bitcast::None,
18661899

18671900
(I32, I64) => Bitcast::I32ToI64,
18681901
(F32, I32) => Bitcast::F32ToI32,
@@ -1875,7 +1908,22 @@ fn cast(from: WasmType, to: WasmType) -> Bitcast {
18751908
(F32, I64) => Bitcast::F32ToI64,
18761909
(I64, F32) => Bitcast::I64ToF32,
18771910

1878-
(F32, F64) | (F64, F32) | (F64, I32) | (I32, F64) => unreachable!(),
1911+
(I64, PointerOrI64) => Bitcast::I64ToP64,
1912+
(PointerOrI64, I64) => Bitcast::P64ToI64,
1913+
(Pointer, PointerOrI64) => Bitcast::PToP64,
1914+
(PointerOrI64, Pointer) => Bitcast::P64ToP,
1915+
1916+
(I32, Pointer) => Bitcast::I32ToP,
1917+
(Pointer, I32) => Bitcast::PToI32,
1918+
1919+
(Pointer | PointerOrI64 | Length, _)
1920+
| (_, Pointer | PointerOrI64 | Length)
1921+
| (F32, F64)
1922+
| (F64, F32)
1923+
| (F64, I32)
1924+
| (I32, F64) => {
1925+
unreachable!("Don't know how to bitcast from {:?} to {:?}", from, to);
1926+
}
18791927
}
18801928
}
18811929

0 commit comments

Comments
 (0)