Skip to content

Commit b087e7f

Browse files
nirosyszslayton
andauthored
Add 1.1 binary reader support for strings and integers. (#754)
* Add reader support for ints * Add reader support for strings * Address feedback; remove silly size_of test, add Invalid opcode type, simplify sequence of matches, and use value_body to handle offsets * Address feedback; implement From<FixedInt> for Int * Update src/lazy/binary/raw/v1_1/value.rs Co-authored-by: Zack Slayton <[email protected]> --------- Co-authored-by: Zack Slayton <[email protected]>
1 parent 849de8e commit b087e7f

File tree

6 files changed

+170
-31
lines changed

6 files changed

+170
-31
lines changed

src/lazy/binary/raw/v1_1/immutable_buffer.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ use crate::binary::var_uint::VarUInt;
33
use crate::lazy::binary::encoded_value::EncodedValue;
44
use crate::lazy::binary::raw::v1_1::value::LazyRawBinaryValue_1_1;
55
use crate::lazy::binary::raw::v1_1::{Header, LengthType, Opcode, ION_1_1_OPCODES};
6+
use crate::lazy::encoder::binary::v1_1::fixed_int::FixedInt;
67
use crate::lazy::encoder::binary::v1_1::flex_int::FlexInt;
78
use crate::lazy::encoder::binary::v1_1::flex_uint::FlexUInt;
89
use crate::result::IonFailure;
@@ -330,6 +331,14 @@ impl<'a> ImmutableBuffer<'a> {
330331
Ok(lazy_value)
331332
}
332333

334+
pub fn read_fixed_int(self, length: usize) -> ParseResult<'a, FixedInt> {
335+
let int_bytes = self
336+
.peek_n_bytes(length)
337+
.ok_or_else(|| IonError::incomplete("a FixedInt", self.offset()))?;
338+
let fixed_int = FixedInt::read(int_bytes, length, 0)?;
339+
Ok((fixed_int, self.consume(length)))
340+
}
341+
333342
/// Reads an annotations wrapper and its associated value from the buffer. The caller must confirm
334343
/// that the next byte in the buffer (`type_descriptor`) begins an annotations wrapper.
335344
fn read_annotated_value(

src/lazy/binary/raw/v1_1/reader.rs

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,4 +200,104 @@ mod tests {
200200

201201
Ok(())
202202
}
203+
204+
#[test]
205+
fn integers() -> IonResult<()> {
206+
use num_bigint::BigInt;
207+
208+
#[rustfmt::skip]
209+
let data: Vec<u8> = vec![
210+
// IVM
211+
0xE0, 0x01, 0x01, 0xEA,
212+
213+
// Integer: 0
214+
0x50,
215+
216+
// Integer: 17
217+
0x51, 0x11,
218+
219+
// Integer: -944
220+
0x52, 0x50, 0xFC,
221+
222+
// Integer: 1
223+
0xF5, 0x03, 0x01,
224+
225+
// Integer: 147573952589676412929
226+
0xF5, 0x13, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08,
227+
];
228+
229+
let mut reader = LazyRawBinaryReader_1_1::new(&data);
230+
let _ivm = reader.next()?.expect_ivm()?;
231+
232+
assert_eq!(
233+
reader.next()?.expect_value()?.read()?.expect_int()?,
234+
0.into()
235+
);
236+
assert_eq!(
237+
reader.next()?.expect_value()?.read()?.expect_int()?,
238+
17.into()
239+
);
240+
assert_eq!(
241+
reader.next()?.expect_value()?.read()?.expect_int()?,
242+
(-944).into()
243+
);
244+
245+
assert_eq!(
246+
reader.next()?.expect_value()?.read()?.expect_int()?,
247+
1.into()
248+
);
249+
250+
assert_eq!(
251+
reader.next()?.expect_value()?.read()?.expect_int()?,
252+
BigInt::parse_bytes(b"147573952589676412929", 10)
253+
.unwrap()
254+
.into()
255+
);
256+
Ok(())
257+
}
258+
259+
#[test]
260+
fn strings() -> IonResult<()> {
261+
#[rustfmt::skip]
262+
let data: Vec<u8> = vec![
263+
// IVM
264+
0xe0, 0x01, 0x01, 0xea,
265+
266+
// String: ""
267+
0x80,
268+
269+
// String: "hello"
270+
0x85, 0x68, 0x65, 0x6c, 0x6c, 0x6f,
271+
272+
// String: "fourteen bytes"
273+
0x8E, 0x66, 0x6F, 0x75, 0x72, 0x74, 0x65, 0x65, 0x6E, 0x20, 0x62, 0x79, 0x74, 0x65,
274+
0x73,
275+
276+
// String: "variable length encoding"
277+
0xF8, 0x31, 0x76, 0x61, 0x72, 0x69, 0x61, 0x62, 0x6C, 0x65, 0x20, 0x6C, 0x65,
278+
0x6E, 0x67, 0x74, 0x68, 0x20, 0x65, 0x6E, 0x63, 0x6f, 0x64, 0x69, 0x6E, 0x67,
279+
];
280+
281+
let mut reader = LazyRawBinaryReader_1_1::new(&data);
282+
let _ivm = reader.next()?.expect_ivm()?;
283+
284+
assert_eq!(reader.next()?.expect_value()?.read()?.expect_string()?, "");
285+
286+
assert_eq!(
287+
reader.next()?.expect_value()?.read()?.expect_string()?,
288+
"hello"
289+
);
290+
291+
assert_eq!(
292+
reader.next()?.expect_value()?.read()?.expect_string()?,
293+
"fourteen bytes"
294+
);
295+
296+
assert_eq!(
297+
reader.next()?.expect_value()?.read()?.expect_string()?,
298+
"variable length encoding"
299+
);
300+
301+
Ok(())
302+
}
203303
}

src/lazy/binary/raw/v1_1/type_code.rs

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -15,17 +15,16 @@ use crate::IonType;
1515
pub enum OpcodeType {
1616
EExpressionWithAddress, // 0x00-0x4F -
1717
EExpressionAddressFollows, // 0x40-0x4F -
18-
19-
Integer, // 0x50-0x58 - Integer up to 8 bytes wide.
20-
Float, // 0x5A-0x5D -
21-
Boolean, // 0x5E-0x5F -
22-
Decimal, // 0x60-0x6F -
23-
Timestamp, // 0x70-0x7F -
24-
String, // 0x80-0x80 -
25-
InlineSymbol, // 0x90-0x9F -
26-
List, // 0xA0-0xAF -
27-
SExpression, // 0xB0-0xBF -
28-
StructEmpty, // 0xC0 -
18+
Integer, // 0x50-0x58 - Integer up to 8 bytes wide
19+
Float, // 0x5A-0x5D -
20+
Boolean, // 0x5E-0x5F -
21+
Decimal, // 0x60-0x6F -
22+
Timestamp, // 0x70-0x7F -
23+
String, // 0x80-0x80 -
24+
InlineSymbol, // 0x90-0x9F -
25+
List, // 0xA0-0xAF -
26+
SExpression, // 0xB0-0xBF -
27+
StructEmpty, // 0xC0 -
2928
// reserved
3029
StructSymAddress, // 0xD2-0xDF -
3130
// reserved
@@ -40,9 +39,11 @@ pub enum OpcodeType {
4039
Nop, // 0xEC-0xED -
4140
// Reserved
4241
SystemMacroInvoke, // 0xEF -
43-
// delimited container end
44-
// delimited list start
45-
// delimited s-expression start
42+
// delimited container end
43+
// delimited list start
44+
// delimited s-expression start
45+
LargeInteger, // 0xF5 - Integer preceeded by FlexUInt length
46+
Invalid, // Represents an encoded value that does not match a defined opcode.
4647
}
4748

4849
impl TryFrom<OpcodeType> for IonType {

src/lazy/binary/raw/v1_1/type_descriptor.rs

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -39,24 +39,21 @@ impl Opcode {
3939
let (high_nibble, low_nibble) = (byte >> 4, byte & 0x0F);
4040
use OpcodeType::*;
4141

42-
let opcode_type = match (high_nibble, low_nibble) {
43-
(0x5, 0xE..=0xF) => Boolean,
44-
(0xE, 0x0) => IonVersionMarker,
45-
(0xE, 0xA) => NullNull,
46-
(0xE, 0xC..=0xD) => Nop,
47-
_ => Boolean, // Temporary, until everything is implemented to satisfy the LUT.
48-
};
49-
let ion_type = match opcode_type {
50-
NullNull => Some(IonType::Null),
51-
Nop => None,
52-
IonVersionMarker => None,
53-
Boolean => Some(IonType::Bool),
54-
_ => panic!("the provided ion type code is either not implemented, or invalid"),
42+
let (opcode_type, length_code, ion_type) = match (high_nibble, low_nibble) {
43+
(0x5, 0x0..=0x8) => (Integer, low_nibble, Some(IonType::Int)),
44+
(0x5, 0xE..=0xF) => (Boolean, low_nibble, Some(IonType::Bool)),
45+
(0x8, _) => (String, low_nibble, Some(IonType::String)),
46+
(0xE, 0x0) => (IonVersionMarker, low_nibble, None),
47+
(0xE, 0xA) => (NullNull, low_nibble, Some(IonType::Null)),
48+
(0xE, 0xC..=0xD) => (Nop, low_nibble, None),
49+
(0xF, 0x5) => (LargeInteger, low_nibble, Some(IonType::Int)),
50+
(0xF, 0x8) => (String, 0xFF, Some(IonType::String)), // 0xFF indicates >15 byte string.
51+
_ => (Invalid, low_nibble, None),
5552
};
5653
Opcode {
5754
ion_type,
5855
opcode_type,
59-
length_code: low_nibble,
56+
length_code,
6057
}
6158
}
6259

@@ -112,8 +109,10 @@ impl Header {
112109
use LengthType::*;
113110
match (self.ion_type_code, self.length_code) {
114111
(OpcodeType::Boolean, 0xE..=0xF) => InOpcode(0),
112+
(OpcodeType::Integer, n) => InOpcode(n),
115113
(OpcodeType::Nop, 0xC) => InOpcode(0),
116114
(OpcodeType::NullNull, 0xA) => InOpcode(0),
115+
(OpcodeType::String, 0..=15) => InOpcode(self.length_code),
117116
_ => FlexUIntFollows,
118117
}
119118
}

src/lazy/binary/raw/v1_1/value.rs

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ use crate::{
2020
},
2121
result::IonFailure,
2222
types::SymbolId,
23-
IonResult, IonType, RawSymbolTokenRef,
23+
IonError, IonResult, IonType, RawSymbolTokenRef,
2424
};
2525

2626
#[derive(Debug, Copy, Clone)]
@@ -199,7 +199,25 @@ impl<'top> LazyRawBinaryValue_1_1<'top> {
199199

200200
/// Helper method called by [`Self::read`]. Reads the current value as an int.
201201
fn read_int(&self) -> ValueParseResult<'top, BinaryEncoding_1_1> {
202-
todo!();
202+
use crate::lazy::encoder::binary::v1_1::fixed_int::FixedInt;
203+
debug_assert!(self.encoded_value.ion_type() == IonType::Int);
204+
205+
let header = &self.encoded_value.header();
206+
let representation = header.type_code();
207+
let value = match (representation, header.length_code as usize) {
208+
(OpcodeType::Integer, 0x0) => 0.into(),
209+
(OpcodeType::Integer, n) => {
210+
// We have n bytes following that make up our integer.
211+
self.input.consume(1).read_fixed_int(n)?.0.into()
212+
}
213+
(OpcodeType::LargeInteger, 0x5) => {
214+
// We have a FlexUInt size, then big int.
215+
let value_bytes = self.value_body()?;
216+
FixedInt::read(value_bytes, value_bytes.len(), 0)?.into()
217+
}
218+
_ => unreachable!("integer encoding with illegal length_code found"),
219+
};
220+
Ok(RawValueRef::Int(value))
203221
}
204222

205223
/// Helper method called by [`Self::read`]. Reads the current value as a float.
@@ -229,7 +247,13 @@ impl<'top> LazyRawBinaryValue_1_1<'top> {
229247

230248
/// Helper method called by [`Self::read`]. Reads the current value as a string.
231249
fn read_string(&self) -> ValueParseResult<'top, BinaryEncoding_1_1> {
232-
todo!();
250+
use crate::lazy::str_ref::StrRef;
251+
252+
debug_assert!(self.encoded_value.ion_type() == IonType::String);
253+
let raw_bytes = self.value_body()?;
254+
let text = std::str::from_utf8(raw_bytes)
255+
.map_err(|_| IonError::decoding_error("found string with invalid UTF-8 data"))?;
256+
Ok(RawValueRef::String(StrRef::from(text)))
233257
}
234258

235259
/// Helper method called by [`Self::read`]. Reads the current value as a blob.

src/lazy/encoder/binary/v1_1/fixed_int.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,12 @@ impl FixedInt {
9393
}
9494
}
9595

96+
impl From<FixedInt> for Int {
97+
fn from(other: FixedInt) -> Self {
98+
other.value
99+
}
100+
}
101+
96102
#[cfg(test)]
97103
mod tests {
98104
use num_bigint::BigInt;

0 commit comments

Comments
 (0)