Skip to content

Commit 8de9de7

Browse files
committed
fix uuid derive
1 parent 30767a6 commit 8de9de7

File tree

3 files changed

+57
-24
lines changed

3 files changed

+57
-24
lines changed

parquet_derive/src/parquet_field.rs

+51-24
Original file line numberDiff line numberDiff line change
@@ -316,25 +316,23 @@ impl Field {
316316
let logical_type = self.ty.logical_type();
317317
let repetition = self.ty.repetition();
318318
let converted_type = self.ty.converted_type();
319+
let length = self.ty.length();
320+
321+
let mut builder = quote! {
322+
ParquetType::primitive_type_builder(#field_name, #physical_type)
323+
.with_logical_type(#logical_type)
324+
.with_repetition(#repetition)
325+
};
319326

320327
if let Some(converted_type) = converted_type {
321-
quote! {
322-
fields.push(ParquetType::primitive_type_builder(#field_name, #physical_type)
323-
.with_logical_type(#logical_type)
324-
.with_repetition(#repetition)
325-
.with_converted_type(#converted_type)
326-
.build().unwrap().into()
327-
)
328-
}
329-
} else {
330-
quote! {
331-
fields.push(ParquetType::primitive_type_builder(#field_name, #physical_type)
332-
.with_logical_type(#logical_type)
333-
.with_repetition(#repetition)
334-
.build().unwrap().into()
335-
)
336-
}
328+
builder = quote! { #builder.with_converted_type(#converted_type) };
329+
}
330+
331+
if let Some(length) = length {
332+
builder = quote! { #builder.with_length(#length) };
337333
}
334+
335+
quote! { fields.push(#builder.build().unwrap().into()) }
338336
}
339337

340338
fn option_into_vals(&self) -> proc_macro2::TokenStream {
@@ -394,7 +392,7 @@ impl Field {
394392
quote! { rec.#field_name.signed_duration_since(::chrono::NaiveDate::from_ymd(1970, 1, 1)).num_days() as i32 }
395393
}
396394
Some(ThirdPartyType::Uuid) => {
397-
quote! { (&rec.#field_name.to_string()[..]).into() }
395+
quote! { rec.#field_name.as_bytes().to_vec().into() }
398396
}
399397
_ => {
400398
if self.is_a_byte_buf {
@@ -430,7 +428,7 @@ impl Field {
430428
}
431429
}
432430
Some(ThirdPartyType::Uuid) => {
433-
quote! { ::uuid::Uuid::parse_str(vals[i].data().convert()).unwrap() }
431+
quote! { ::uuid::Uuid::from_bytes(vals[i].data().try_into().unwrap()) }
434432
}
435433
_ => match &self.ty {
436434
Type::TypePath(_) => match self.ty.last_part().as_str() {
@@ -638,11 +636,40 @@ impl Type {
638636
}
639637
"f32" => BasicType::FLOAT,
640638
"f64" => BasicType::DOUBLE,
641-
"String" | "str" | "Uuid" => BasicType::BYTE_ARRAY,
639+
"String" | "str" => BasicType::BYTE_ARRAY,
640+
"Uuid" => BasicType::FIXED_LEN_BYTE_ARRAY,
642641
f => unimplemented!("{} currently is not supported", f),
643642
}
644643
}
645644

645+
fn length(&self) -> Option<i32> {
646+
let last_part = self.last_part();
647+
let leaf_type = self.leaf_type_recursive();
648+
649+
match leaf_type {
650+
Type::Array(ref first_type) => {
651+
if let Type::TypePath(_) = **first_type {
652+
if last_part == "u8" {
653+
return Some(1);
654+
}
655+
}
656+
}
657+
Type::Vec(ref first_type) | Type::Slice(ref first_type) => {
658+
if let Type::TypePath(_) = **first_type {
659+
if last_part == "u8" {
660+
return None;
661+
}
662+
}
663+
}
664+
_ => (),
665+
}
666+
667+
match last_part.trim() {
668+
"Uuid" => Some(16),
669+
_ => None,
670+
}
671+
}
672+
646673
fn logical_type(&self) -> proc_macro2::TokenStream {
647674
let last_part = self.last_part();
648675
let leaf_type = self.leaf_type_recursive();
@@ -1328,8 +1355,8 @@ mod test {
13281355
let when = Field::from(&fields[0]);
13291356
assert_eq!(when.writer_snippet().to_string(),(quote!{
13301357
{
1331-
let vals : Vec<_> = records.iter().map(|rec| (&rec.unique_id.to_string()[..]).into() ).collect();
1332-
if let ColumnWriter::ByteArrayColumnWriter(ref mut typed) = column_writer.untyped() {
1358+
let vals : Vec<_> = records.iter().map(|rec| rec.unique_id.as_bytes().to_vec().into() ).collect();
1359+
if let ColumnWriter::FixedLenByteArrayColumnWriter(ref mut typed) = column_writer.untyped() {
13331360
typed.write_batch(&vals[..], None, None) ?;
13341361
} else {
13351362
panic!("Schema and struct disagree on type for {}" , stringify!{ unique_id })
@@ -1349,7 +1376,7 @@ mod test {
13491376
}
13501377
}).collect();
13511378

1352-
if let ColumnWriter::ByteArrayColumnWriter(ref mut typed) = column_writer.untyped() {
1379+
if let ColumnWriter::FixedLenByteArrayColumnWriter(ref mut typed) = column_writer.untyped() {
13531380
typed.write_batch(&vals[..], Some(&definition_levels[..]), None) ?;
13541381
} else {
13551382
panic!("Schema and struct disagree on type for {}" , stringify!{ maybe_unique_id })
@@ -1371,13 +1398,13 @@ mod test {
13711398
assert_eq!(when.reader_snippet().to_string(),(quote!{
13721399
{
13731400
let mut vals = Vec::new();
1374-
if let ColumnReader::ByteArrayColumnReader(mut typed) = column_reader {
1401+
if let ColumnReader::FixedLenByteArrayColumnReader(mut typed) = column_reader {
13751402
typed.read_records(num_records, None, None, &mut vals)?;
13761403
} else {
13771404
panic!("Schema and struct disagree on type for {}", stringify!{ unique_id });
13781405
}
13791406
for (i, r) in &mut records[..num_records].iter_mut().enumerate() {
1380-
r.unique_id = ::uuid::Uuid::parse_str(vals[i].data().convert()).unwrap();
1407+
r.unique_id = ::uuid::Uuid::from_bytes(vals[i].data().try_into().unwrap());
13811408
}
13821409
}
13831410
}).to_string());

parquet_derive_test/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -32,3 +32,4 @@ rust-version = { workspace = true }
3232
parquet = { workspace = true }
3333
parquet_derive = { path = "../parquet_derive", default-features = false }
3434
chrono = { workspace = true }
35+
uuid = { version = "1", features = ["v4"] }

parquet_derive_test/src/lib.rs

+5
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ struct ACompleteRecord<'a> {
4242
pub borrowed_maybe_a_string: &'a Option<String>,
4343
pub borrowed_maybe_a_str: &'a Option<&'a str>,
4444
pub now: chrono::NaiveDateTime,
45+
pub uuid: uuid::Uuid,
4546
pub byte_vec: Vec<u8>,
4647
pub maybe_byte_vec: Option<Vec<u8>>,
4748
pub borrowed_byte_vec: &'a [u8],
@@ -61,6 +62,7 @@ struct APartiallyCompleteRecord {
6162
pub double: f64,
6263
pub now: chrono::NaiveDateTime,
6364
pub date: chrono::NaiveDate,
65+
pub uuid: uuid::Uuid,
6466
pub byte_vec: Vec<u8>,
6567
}
6668

@@ -105,6 +107,7 @@ mod tests {
105107
OPTIONAL BINARY borrowed_maybe_a_string (STRING);
106108
OPTIONAL BINARY borrowed_maybe_a_str (STRING);
107109
REQUIRED INT64 now (TIMESTAMP_MILLIS);
110+
REQUIRED FIXED_LEN_BYTE_ARRAY (16) uuid (UUID);
108111
REQUIRED BINARY byte_vec;
109112
OPTIONAL BINARY maybe_byte_vec;
110113
REQUIRED BINARY borrowed_byte_vec;
@@ -144,6 +147,7 @@ mod tests {
144147
borrowed_maybe_a_string: &maybe_a_string,
145148
borrowed_maybe_a_str: &maybe_a_str,
146149
now: chrono::Utc::now().naive_local(),
150+
uuid: uuid::Uuid::new_v4(),
147151
byte_vec: vec![0x65, 0x66, 0x67],
148152
maybe_byte_vec: Some(vec![0x88, 0x89, 0x90]),
149153
borrowed_byte_vec: &borrowed_byte_vec,
@@ -179,6 +183,7 @@ mod tests {
179183
double: std::f64::NAN,
180184
now: chrono::Utc::now().naive_local(),
181185
date: chrono::naive::NaiveDate::from_ymd_opt(2015, 3, 14).unwrap(),
186+
uuid: uuid::Uuid::new_v4(),
182187
byte_vec: vec![0x65, 0x66, 0x67],
183188
}];
184189

0 commit comments

Comments
 (0)