Skip to content

Commit

Permalink
Move PrimitiveValueEncoder to encoding mod
Browse files Browse the repository at this point in the history
  • Loading branch information
Jefffrey committed Aug 21, 2024
1 parent 38ceeca commit 04c09f1
Show file tree
Hide file tree
Showing 6 changed files with 37 additions and 32 deletions.
4 changes: 2 additions & 2 deletions src/encoding/byte.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@

use bytes::{BufMut, BytesMut};

use crate::{error::Result, memory::EstimateMemory, writer::column::PrimitiveValueEncoder};
use crate::{error::Result, memory::EstimateMemory};
use std::io::Read;

use super::util::read_u8;
use super::{util::read_u8, PrimitiveValueEncoder};

const MAX_LITERAL_LENGTH: usize = 128;
const MIN_REPEAT_LENGTH: usize = 3;
Expand Down
3 changes: 2 additions & 1 deletion src/encoding/float.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,10 @@ use snafu::ResultExt;
use crate::{
error::{self, Result},
memory::EstimateMemory,
writer::column::PrimitiveValueEncoder,
};

use super::PrimitiveValueEncoder;

/// Generically represent f32 and f64.
// TODO: figure out how to use num::traits::FromBytes instead of rolling our own?
pub trait Float: num::Float + std::fmt::Debug + num::traits::ToBytes {
Expand Down
24 changes: 24 additions & 0 deletions src/encoding/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,13 @@ use std::{
ops::{BitOrAssign, ShlAssign},
};

use bytes::Bytes;
use num::{traits::CheckedShl, PrimInt, Signed};

use crate::{
column::Column,
error::{InvalidColumnEncodingSnafu, Result},
memory::EstimateMemory,
proto::column_encoding::Kind as ProtoColumnKind,
};

Expand All @@ -49,6 +51,28 @@ pub mod rle_v2;
pub mod timestamp;
mod util;

/// Encodes primitive values into an internal buffer, usually with a specialized run length
/// encoding for better compression.
pub trait PrimitiveValueEncoder<V>: EstimateMemory
where
V: Copy,
{
fn new() -> Self;

fn write_one(&mut self, value: V);

fn write_slice(&mut self, values: &[V]) {
for &value in values {
self.write_one(value);
}
}

/// Take the encoded bytes, replacing it with an empty buffer.
// TODO: Figure out how to retain the allocation instead of handing
// it off each time.
fn take_inner(&mut self) -> Bytes;
}

pub fn get_unsigned_rle_reader<R: Read + Send + 'static>(
column: &Column,
reader: R,
Expand Down
4 changes: 2 additions & 2 deletions src/encoding/rle_v2/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use std::{io::Read, marker::PhantomData};

use bytes::BytesMut;

use crate::{error::Result, memory::EstimateMemory, writer::column::PrimitiveValueEncoder};
use crate::{error::Result, memory::EstimateMemory};

use self::{
delta::{read_delta_values, write_fixed_delta, write_varying_delta},
Expand All @@ -30,7 +30,7 @@ use self::{

use super::{
util::{calculate_percentile_bits, try_read_u8},
EncodingSign, NInt, VarintSerde,
EncodingSign, NInt, PrimitiveValueEncoder, VarintSerde,
};

pub mod delta;
Expand Down
26 changes: 2 additions & 24 deletions src/writer/column.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,11 @@ use arrow::{
ArrowPrimitiveType, Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type,
},
};
use bytes::Bytes;

use crate::{
encoding::{
byte::ByteRleWriter, float::FloatValueEncoder, rle_v2::RleWriterV2, SignedEncoding,
byte::ByteRleWriter, float::FloatValueEncoder, rle_v2::RleWriterV2, PrimitiveValueEncoder,
SignedEncoding,
},
error::Result,
memory::EstimateMemory,
Expand All @@ -51,28 +51,6 @@ pub trait ColumnStripeEncoder: EstimateMemory {
fn finish(&mut self) -> Vec<Stream>;
}

/// Encodes primitive values into an internal buffer, usually with a specialized run length
/// encoding for better compression.
pub trait PrimitiveValueEncoder<V>: EstimateMemory
where
V: Copy,
{
fn new() -> Self;

fn write_one(&mut self, value: V);

fn write_slice(&mut self, values: &[V]) {
for &value in values {
self.write_one(value);
}
}

/// Take the encoded bytes, replacing it with an empty buffer.
// TODO: Figure out how to retain the allocation instead of handing
// it off each time.
fn take_inner(&mut self) -> Bytes;
}

// TODO: simplify these generics, probably overcomplicating things here

/// Encoder for primitive ORC types (e.g. int, float). Uses a specific [`PrimitiveValueEncoder`] to
Expand Down
8 changes: 5 additions & 3 deletions src/writer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,11 @@ use std::fmt::Debug;
use arrow::{array::BooleanBufferBuilder, buffer::NullBuffer};
use bytes::Bytes;

use crate::{encoding::byte::ByteRleWriter, memory::EstimateMemory, proto};

use self::column::PrimitiveValueEncoder;
use crate::{
encoding::{byte::ByteRleWriter, PrimitiveValueEncoder},
memory::EstimateMemory,
proto,
};

pub mod column;
pub mod stripe;
Expand Down

0 comments on commit 04c09f1

Please sign in to comment.