Skip to content

Commit 1ac069b

Browse files
Merge branch 'trunk' into public-deserializer
2 parents f286ab3 + d8baf43 commit 1ac069b

18 files changed

+224
-101
lines changed

.github/workflows/cifuzz.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
},
2828
{
2929
"name": "Upload Crash",
30-
"uses": "actions/upload-artifact@v3",
30+
"uses": "actions/upload-artifact@v4",
3131
"if": "failure() && steps.build.outcome == 'success'",
3232
"with": {
3333
"name": "artifacts",

.github/workflows/cross_platform.yml

+2-1
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,8 @@
114114
# "mips64-unknown-linux-muslabi64",
115115
# "mips64el-unknown-linux-muslabi64",
116116
# "mipsel-unknown-linux-musl",
117-
"sparc64-unknown-linux-gnu",
117+
# Could not link to `getrandom`
118+
# "sparc64-unknown-linux-gnu",
118119
# BLOCKEDTODO(https://github.com/cross-rs/cross/issues/975): currently broken
119120
# "sparcv9-sun-solaris",
120121
"thumbv7neon-linux-androideabi",

.github/workflows/rust.yml

+2-41
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ fi",
140140
"uses": "actions-rs/toolchain@v1",
141141
"with": {
142142
"profile": "minimal",
143-
"toolchain": "stable",
143+
"toolchain": "1.85.0",
144144
"override": true,
145145
"components": "rustfmt, clippy"
146146
},
@@ -176,7 +176,7 @@ fi",
176176
"uses": "actions-rs/toolchain@v1",
177177
"with": {
178178
"profile": "minimal",
179-
"toolchain": "stable",
179+
"toolchain": "1.85.0",
180180
"override": true,
181181
},
182182
"name": "Install Rust stable"
@@ -190,45 +190,6 @@ fi",
190190
"name": "Run compatibility tests"
191191
}
192192
]
193-
},
194-
"coverage": {
195-
"name": "Code Coverage",
196-
"runs-on": "ubuntu-latest",
197-
"steps": [
198-
{
199-
"uses": "actions/checkout@v4",
200-
"name": "Checkout"
201-
},
202-
{
203-
"uses": "actions-rs/toolchain@v1",
204-
"with": {
205-
"profile": "minimal",
206-
"toolchain": "nightly",
207-
"override": true
208-
},
209-
"name": "Install Rust nightly"
210-
},
211-
{
212-
"name": "Run cargo-tarpaulin",
213-
"uses": "actions-rs/[email protected]",
214-
"with": {
215-
"version": "0.19.1",
216-
"args": "--all --all-features"
217-
}
218-
},
219-
{
220-
"name": "Upload to codecov.io",
221-
"uses": "codecov/codecov-action@v3"
222-
},
223-
{
224-
"name": "Archive code coverage results",
225-
"uses": "actions/upload-artifact@v3",
226-
"with": {
227-
"name": "code-coverage-report",
228-
"path": "cobertura.xml"
229-
}
230-
}
231-
]
232193
}
233194
}
234195
}

compatibility/README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -46,4 +46,4 @@ pub fn test() {
4646

4747
For examples, see the existing cases in `compatibility/src/`.
4848

49-
- [ ] Open a [pull request](https://github.com/bincode-org/bincode/pulls) with the title `Bincode 1 compatiblity: <name of your project>`
49+
- [ ] Open a [pull request](https://github.com/bincode-org/bincode/pulls) with the title `Bincode 1 compatibility: <name of your project>`

derive/Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,4 +19,4 @@ description = "Implementation of #[derive(Encode, Decode)] for bincode"
1919
proc-macro = true
2020

2121
[dependencies]
22-
virtue = "0.0.17"
22+
virtue = "0.0.18"

derive/src/derive_struct.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ impl DeriveStruct {
1212
pub fn generate_encode(self, generator: &mut Generator) -> Result<()> {
1313
let crate_name = &self.attributes.crate_name;
1414
generator
15-
.impl_for(&format!("{}::Encode", crate_name))
15+
.impl_for(format!("{}::Encode", crate_name))
1616
.modify_generic_constraints(|generics, where_constraints| {
1717
if let Some((bounds, lit)) =
1818
(self.attributes.encode_bounds.as_ref()).or(self.attributes.bounds.as_ref())

docs/spec.md

+173-24
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,72 @@
1-
# Serialization specification
1+
# Serialization Specification
22

3-
*NOTE*: Serialization is done by `bincode_derive` by default. If you enable the `serde` flag, serialization with `serde-derive` is supported as well. `serde-derive` has the same guarantees as `bincode_derive` for now.
3+
_NOTE_: This specification is primarily defined in the context of Rust, but aims to be implementable across different programming languages.
44

5-
Related issue: <https://github.com/serde-rs/serde/issues/1756#issuecomment-689682123>
5+
## Definitions
66

7-
## Endian
7+
- **Variant**: A specific constructor or case of an enum type.
8+
- **Variant Payload**: The associated data of a specific enum variant.
9+
- **Discriminant**: A unique identifier for an enum variant, typically represented as an integer.
10+
- **Basic Types**: Primitive types that have a direct, well-defined binary representation.
811

9-
By default `bincode` will serialize values in little endian encoding. This can be overwritten in the `Config`.
12+
## Endianness
1013

11-
## Basic types
14+
By default, this serialization format uses little-endian byte order for basic numeric types. This means multi-byte values are encoded with their least significant byte first.
1215

13-
Boolean types are encoded with 1 byte for each boolean type, with `0` being `false`, `1` being true. Whilst deserializing every other value will throw an error.
16+
Endianness can be configured with the following methods, allowing for big-endian serialization when required:
1417

15-
All basic numeric types will be encoded based on the configured [IntEncoding](#intencoding).
18+
- [`with_big_endian`](https://docs.rs/bincode/2.0.0-rc/bincode/config/struct.Configuration.html#method.with_big_endian)
19+
- [`with_little_endian`](https://docs.rs/bincode/2.0.0-rc/bincode/config/struct.Configuration.html#method.with_little_endian)
1620

17-
All floating point types will take up exactly 4 (for `f32`) or 8 (for `f64`) bytes.
21+
### Byte Order Considerations
22+
23+
- Multi-byte values (integers, floats) are affected by endianness
24+
- Single-byte values (u8, i8) are not affected
25+
- Struct and collection serialization order is not changed by endianness
26+
27+
## Basic Types
28+
29+
### Boolean Encoding
30+
31+
- Encoded as a single byte
32+
- `false` is represented by `0`
33+
- `true` is represented by `1`
34+
- During deserialization, values other than 0 and 1 will result in an error [`DecodeError::InvalidBooleanValue`](https://docs.rs/bincode/2.0.0-rc/bincode/error/enum.DecodeError.html#variant.InvalidBooleanValue)
35+
36+
### Numeric Types
37+
38+
- Encoded based on the configured [IntEncoding](#intencoding)
39+
- Signed integers use 2's complement representation
40+
- Floating point types use IEEE 754-2008 standard
41+
- `f32`: 4 bytes (binary32)
42+
- `f64`: 8 bytes (binary64)
43+
44+
#### Floating Point Special Values
45+
46+
- Subnormal numbers are preserved
47+
- Also known as denormalized numbers
48+
- Maintain their exact bit representation
49+
- `NaN` values are preserved
50+
- Both quiet and signaling `NaN` are kept as-is
51+
- Bit pattern of `NaN` is maintained exactly
52+
- No normalization or transformation of special values occurs
53+
- Serialization and deserialization do not alter the bit-level representation
54+
- Consistent with IEEE 754-2008 standard for floating-point arithmetic
55+
56+
### Character Encoding
57+
58+
- `char` is encoded as a 32-bit unsigned integer representing its Unicode Scalar Value
59+
- Valid Unicode Scalar Value range:
60+
- 0x0000 to 0xD7FF (Basic Multilingual Plane)
61+
- 0xE000 to 0x10FFFF (Supplementary Planes)
62+
- Surrogate code points (0xD800 to 0xDFFF) are not valid
63+
- Invalid Unicode characters can be acquired via unsafe code, this is handled as:
64+
- during serialization: data is written as-is
65+
- during deserialization: an error is raised [`DecodeError::InvalidCharEncoding`](https://docs.rs/bincode/2.0.0-rc/bincode/error/enum.DecodeError.html#variant.InvalidCharEncoding)
66+
- No additional metadata or encoding scheme beyond the raw code point value
1867

1968
All tuples have no additional bytes, and are encoded in their specified order, e.g.
69+
2070
```rust
2171
let tuple = (u32::min_value(), i32::max_value()); // 8 bytes
2272
let encoded = bincode::encode_to_vec(tuple, bincode::config::legacy()).unwrap();
@@ -27,9 +77,11 @@ assert_eq!(encoded.as_slice(), &[
2777
```
2878

2979
## IntEncoding
80+
3081
Bincode currently supports 2 different types of `IntEncoding`. With the default config, `VarintEncoding` is selected.
3182

3283
### VarintEncoding
84+
3385
Encoding an unsigned integer v (of any type excepting u8/i8) works as follows:
3486

3587
1. If `u < 251`, encode it as a single byte with that value.
@@ -54,7 +106,7 @@ See the documentation of [FixintEncoding](https://docs.rs/bincode/2.0.0-rc/binco
54106

55107
Enums are encoded with their variant first, followed by optionally the variant fields. The variant index is based on the `IntEncoding` during serialization.
56108

57-
Both named and unnamed fields are serialized with their values only, and therefor encode to the same value.
109+
Both named and unnamed fields are serialized with their values only, and therefore encode to the same value.
58110

59111
```rust
60112
#[derive(bincode::Encode)]
@@ -87,6 +139,7 @@ assert_eq!(encoded.as_slice(), &[
87139
```
88140

89141
### Options
142+
90143
`Option<T>` is always serialized using a single byte for the discriminant, even in `Fixint` encoding (which normally uses a `u32` for discriminant).
91144

92145
```rust
@@ -105,17 +158,29 @@ assert_eq!(encoded.as_slice(), &[
105158

106159
# Collections
107160

108-
Collections are encoded with their length value first, following by each entry of the collection. The length value is based on your `IntEncoding`.
161+
## General Collection Serialization
109162

110-
**note**: fixed array length may not have their `len` encoded. See [Arrays](#arrays)
163+
Collections are encoded with their length value first, followed by each entry of the collection. The length value is based on the configured `IntEncoding`.
111164

112-
```rust
113-
let list = vec![
114-
0u8,
115-
1u8,
116-
2u8
117-
];
165+
### Serialization Considerations
166+
167+
- Length is always serialized first
168+
- Entries are serialized in the order they are returned from the iterator implementation.
169+
- Iteration order depends on the collection type
170+
- Ordered collections (e.g., `Vec`): Iteration from lowest to highest index
171+
- Unordered collections (e.g., `HashMap`): Implementation-defined iteration order
172+
- Duplicate keys are not checked in bincode, but may be resulting in an error when decoding a container from a list of pairs.
173+
174+
### Handling of Specific Collection Types
118175

176+
#### Linear Collections (`Vec`, Arrays, etc.)
177+
178+
- Serialized by iterating from lowest to highest index
179+
- Length prefixed
180+
- Each item serialized sequentially
181+
182+
```rust
183+
let list = vec![0u8, 1u8, 2u8];
119184
let encoded = bincode::encode_to_vec(list, bincode::config::legacy()).unwrap();
120185
assert_eq!(encoded.as_slice(), &[
121186
3, 0, 0, 0, 0, 0, 0, 0, // length of 3u64
@@ -125,29 +190,63 @@ assert_eq!(encoded.as_slice(), &[
125190
]);
126191
```
127192

128-
This also applies to e.g. `HashMap`, where each entry is a [tuple](#basic-types) of the key and value.
193+
#### Key-Value Collections (`HashMap`, etc.)
194+
195+
- Serialized as a sequence of key-value pairs
196+
- Iteration order is implementation-defined
197+
- Each entry is a tuple of (key, value)
198+
199+
### Special Collection Considerations
200+
201+
- Bincode will serialize the entries based on the iterator order.
202+
- Deserialization is deterministic but the collection implementation might not guarantee the same order as serialization.
203+
204+
**Note**: Fixed-length arrays do not have their length encoded. See [Arrays](#arrays) for details.
129205

130206
# String and &str
131207

132-
Both `String` and `&str` are treated as a `Vec<u8>`. See [Collections](#collections) for more information.
208+
## Encoding Principles
209+
210+
- Strings are encoded as UTF-8 byte sequences
211+
- No null terminator is added
212+
- No Byte Order Mark (BOM) is written
213+
- Unicode non-characters are preserved
214+
215+
### Encoding Details
216+
217+
- Length is encoded first using the configured `IntEncoding`
218+
- Raw UTF-8 bytes follow the length
219+
- Supports the full range of valid UTF-8 sequences
220+
- `U+0000` and other code points can appear freely within the string
221+
222+
### Unicode Handling
223+
224+
- During serialization, the string is encoded as a sequence of the given bytes.
225+
- Rust strings are UTF-8 encoded by default, but this is not enforced by bincode
226+
- No normalization or transformation of text
227+
- If an invalid UTF-8 sequence is encountered during decoding, an [`DecodeError::Utf8`](https://docs.rs/bincode/2.0.0-rc/bincode/error/enum.DecodeError.html#variant.Utf8) error is raised
133228

134229
```rust
135-
let str = "Hello"; // Could also be `String::new(...)`
230+
let str = "Hello 🌍"; // Mixed ASCII and Unicode
136231

137232
let encoded = bincode::encode_to_vec(str, bincode::config::legacy()).unwrap();
138233
assert_eq!(encoded.as_slice(), &[
139-
5, 0, 0, 0, 0, 0, 0, 0, // length of the string, 5 bytes
140-
b'H', b'e', b'l', b'l', b'o'
234+
10, 0, 0, 0, 0, 0, 0, 0, // length of the string, 10 bytes
235+
b'H', b'e', b'l', b'l', b'o', b' ', 0xF0, 0x9F, 0x8C, 0x8D // UTF-8 encoded string
141236
]);
142237
```
143238

239+
### Comparison with Other Types
240+
241+
- Treated similarly to `Vec<u8>` in serialization
242+
- See [Collections](#collections) for more information about length and entry encoding
243+
144244
# Arrays
145245

146246
Array length is never encoded.
147247

148248
Note that `&[T]` is encoded as a [Collection](#collections).
149249

150-
151250
```rust
152251
let arr: [u8; 5] = [10, 20, 30, 40, 50];
153252
let encoded = bincode::encode_to_vec(arr, bincode::config::legacy()).unwrap();
@@ -184,3 +283,53 @@ assert_eq!(encoded.as_slice(), &[
184283
]);
185284
```
186285

286+
## TupleEncoding
287+
288+
Tuple fields are serialized in first-to-last declaration order, with no additional metadata.
289+
290+
- No length prefix is added
291+
- Fields are encoded sequentially
292+
- No padding or alignment adjustments are made
293+
- Order of serialization is deterministic and matches the tuple's declaration order
294+
295+
## StructEncoding
296+
297+
Struct fields are serialized in first-to-last declaration order, with no metadata representing field names.
298+
299+
- No length prefix is added
300+
- Fields are encoded sequentially
301+
- No padding or alignment adjustments are made
302+
- Order of serialization is deterministic and matches the struct's field declaration order
303+
- Both named and unnamed fields are serialized identically
304+
305+
## EnumEncoding
306+
307+
Enum variants are encoded with a discriminant followed by optional variant payload.
308+
309+
### Discriminant Allocation
310+
311+
- Discriminants are automatically assigned by the derive macro in declaration order
312+
- First variant starts at 0
313+
- Subsequent variants increment by 1
314+
- Explicit discriminant indices are currently not supported
315+
- Discriminant is always represented as a `u32` during serialization. See [Discriminant Representation](#discriminant-representation) for more details.
316+
- Maintains the original enum variant semantics during encoding
317+
318+
### Variant Payload Encoding
319+
320+
- Tuple variants: Fields serialized in declaration order
321+
- Struct variants: Fields serialized in declaration order
322+
- Unit variants: No additional data encoded
323+
324+
### Discriminant Representation
325+
326+
- Always encoded as a `u32`
327+
- Encoding method depends on the configured `IntEncoding`
328+
- `VarintEncoding`: Variable-length encoding
329+
- `FixintEncoding`: Fixed 4-byte representation
330+
331+
### Handling of Variant Payloads
332+
333+
- Payload is serialized immediately after the discriminant
334+
- No additional metadata about field names or types
335+
- Payload structure matches the variant's definition

src/de/mod.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,7 @@ pub trait BorrowDecoder<'de>: Decoder {
215215
fn borrow_reader(&mut self) -> &mut Self::BR;
216216
}
217217

218-
impl<'a, T> Decoder for &'a mut T
218+
impl<T> Decoder for &mut T
219219
where
220220
T: Decoder,
221221
{
@@ -242,7 +242,7 @@ where
242242
}
243243
}
244244

245-
impl<'a, 'de, T> BorrowDecoder<'de> for &'a mut T
245+
impl<'de, T> BorrowDecoder<'de> for &mut T
246246
where
247247
T: BorrowDecoder<'de>,
248248
{

0 commit comments

Comments
 (0)