Skip to content

Commit 1b86dfd

Browse files
committed
feat: encode FixedSizeBinary in JSON as hex
Adds encoding support to the JSON writer for the FixedSizeBinary DataType A test was added as well
1 parent 91f0b17 commit 1b86dfd

File tree

3 files changed

+49
-1
lines changed

3 files changed

+49
-1
lines changed

arrow-json/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ tokio = { version = "1.27", default-features = false, features = ["io-util"] }
5656
bytes = "1.4"
5757
criterion = { version = "0.5", default-features = false }
5858
rand = { version = "0.8", default-features = false, features = ["std", "std_rng"] }
59+
hex = "0.4.3"
5960

6061
[[bench]]
6162
name = "serde"

arrow-json/src/writer.rs

+31-1
Original file line numberDiff line numberDiff line change
@@ -833,7 +833,9 @@ mod tests {
833833

834834
use serde_json::json;
835835

836-
use arrow_array::builder::{Int32Builder, Int64Builder, MapBuilder, StringBuilder};
836+
use arrow_array::builder::{
837+
FixedSizeBinaryBuilder, Int32Builder, Int64Builder, MapBuilder, StringBuilder,
838+
};
837839
use arrow_buffer::{Buffer, NullBuffer, OffsetBuffer, ToByteSlice};
838840
use arrow_data::ArrayData;
839841

@@ -2137,4 +2139,32 @@ mod tests {
21372139

21382140
Ok(())
21392141
}
2142+
2143+
#[test]
2144+
fn test_writer_fixed_size_binary() {
2145+
let size = 32;
2146+
let schema = SchemaRef::new(Schema::new(vec![Field::new(
2147+
"bytes",
2148+
DataType::FixedSizeBinary(size),
2149+
false,
2150+
)]));
2151+
2152+
let mut builder = FixedSizeBinaryBuilder::new(size);
2153+
let v = hex::decode("a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447")
2154+
.unwrap();
2155+
builder.append_value(v).unwrap();
2156+
let array = Arc::new(builder.finish()) as ArrayRef;
2157+
let batch = RecordBatch::try_new(schema, vec![array]).unwrap();
2158+
2159+
let mut buf = Vec::new();
2160+
{
2161+
let mut writer = LineDelimitedWriter::new(&mut buf);
2162+
writer.write(&batch).unwrap();
2163+
}
2164+
2165+
assert_eq!(
2166+
"{\"bytes\":a948904f2f0f479b8f8197694b30184b0d2ed1c1cd2a1ec0fb85d299a192a447}\n",
2167+
String::from_utf8(buf).unwrap()
2168+
);
2169+
}
21402170
}

arrow-json/src/writer/encoder.rs

+17
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,11 @@ fn make_encoder_impl<'a>(
9999
(Box::new(MapEncoder::try_new(array, options)?) as _, array.nulls().cloned())
100100
}
101101

102+
DataType::FixedSizeBinary(_) => {
103+
let array = array.as_any().downcast_ref::<FixedSizeBinaryArray>().unwrap();
104+
(Box::new(FixedSizeBinaryEncoder(array.clone())) as _, array.nulls().cloned())
105+
}
106+
102107
DataType::Struct(fields) => {
103108
let array = array.as_struct();
104109
let encoders = fields.iter().zip(array.columns()).map(|(field, array)| {
@@ -443,3 +448,15 @@ impl<'a> Encoder for MapEncoder<'a> {
443448
out.push(b'}');
444449
}
445450
}
451+
452+
struct FixedSizeBinaryEncoder(FixedSizeBinaryArray);
453+
454+
impl Encoder for FixedSizeBinaryEncoder {
455+
fn encode(&mut self, idx: usize, out: &mut Vec<u8>) {
456+
let v = self.0.value(idx);
457+
for byte in v {
458+
// this write is infallible
459+
write!(out, "{byte:02x}").unwrap();
460+
}
461+
}
462+
}

0 commit comments

Comments
 (0)