Skip to content

Commit

Permalink
Add TPC-H q2
Browse files Browse the repository at this point in the history
  • Loading branch information
holicc committed Nov 8, 2024
1 parent 1ec1c10 commit d2fdb98
Show file tree
Hide file tree
Showing 3 changed files with 242 additions and 53 deletions.
141 changes: 120 additions & 21 deletions qurious/src/execution/session.rs
Original file line number Diff line number Diff line change
Expand Up @@ -262,9 +262,62 @@ mod tests {
}

#[test]
#[ignore]
fn test_create_table() -> Result<()> {
let session = ExecuteSession::new()?;
session.sql(
let tables = vec![
"CREATE TABLE IF NOT EXISTS supplier (
s_suppkey BIGINT,
s_name VARCHAR,
s_address VARCHAR,
s_nationkey BIGINT,
s_phone VARCHAR,
s_acctbal DECIMAL(15, 2),
s_comment VARCHAR,
s_rev VARCHAR,
);",
"CREATE TABLE IF NOT EXISTS part (
p_partkey BIGINT,
p_name VARCHAR,
p_mfgr VARCHAR,
p_brand VARCHAR,
p_type VARCHAR,
p_size INTEGER,
p_container VARCHAR,
p_retailprice DECIMAL(15, 2),
p_comment VARCHAR,
p_rev VARCHAR,
);",
"CREATE TABLE IF NOT EXISTS partsupp (
ps_partkey BIGINT,
ps_suppkey BIGINT,
ps_availqty INTEGER,
ps_supplycost DECIMAL(15, 2),
ps_comment VARCHAR,
ps_rev VARCHAR,
);",
"CREATE TABLE IF NOT EXISTS customer (
c_custkey BIGINT,
c_name VARCHAR,
c_address VARCHAR,
c_nationkey BIGINT,
c_phone VARCHAR,
c_acctbal DECIMAL(15, 2),
c_mktsegment VARCHAR,
c_comment VARCHAR,
c_rev VARCHAR,
);",
"CREATE TABLE IF NOT EXISTS orders (
o_orderkey BIGINT,
o_custkey BIGINT,
o_orderstatus VARCHAR,
o_totalprice DECIMAL(15, 2),
o_orderdate DATE,
o_orderpriority VARCHAR,
o_clerk VARCHAR,
o_shippriority INTEGER,
o_comment VARCHAR,
o_rev VARCHAR,
);",
"CREATE TABLE IF NOT EXISTS lineitem (
l_orderkey BIGINT,
l_partkey BIGINT,
Expand All @@ -284,9 +337,32 @@ mod tests {
l_comment VARCHAR,
l_rev VARCHAR,
);",
)?;
"CREATE TABLE IF NOT EXISTS nation (
n_nationkey BIGINT,
n_name VARCHAR,
n_regionkey BIGINT,
n_comment VARCHAR,
n_rev VARCHAR,
);",
"CREATE TABLE IF NOT EXISTS region (
r_regionkey BIGINT,
r_name VARCHAR,
r_comment VARCHAR,
r_rev VARCHAR,
);",
];

let session = ExecuteSession::new()?;
for table in tables {
session.sql(table)?;
}
// session.sql("COPY LINEITEM FROM './tests/test.tbl' ( DELIMITER '|' );")?;
session.sql("COPY LINEITEM FROM './tests/tpch/data/lineitem.tbl' ( DELIMITER '|' );")?;
// session.sql("COPY LINEITEM FROM './tests/tpch/data/lineitem.tbl' ( DELIMITER '|' );")?;
session.sql("COPY PART FROM './tests/tpch/data/part.tbl' ( DELIMITER '|' );")?;
session.sql("COPY SUPPLIER FROM './tests/tpch/data/supplier.tbl' ( DELIMITER '|' );")?;
session.sql("COPY PARTSUPP FROM './tests/tpch/data/partsupp.tbl' ( DELIMITER '|' );")?;
session.sql("COPY NATION FROM './tests/tpch/data/nation.tbl' ( DELIMITER '|' );")?;
session.sql("COPY REGION FROM './tests/tpch/data/region.tbl' ( DELIMITER '|' );")?;

// session.sql("create table t(v1 int not null, v2 int not null, v3 double not null)")?;

Expand All @@ -302,26 +378,49 @@ mod tests {
let batch = session.sql(
"
select
l_returnflag,
l_linestatus,
sum(l_quantity) as sum_qty,
sum(l_extendedprice) as sum_base_price,
sum(l_extendedprice * (1 - l_discount)) as sum_disc_price,
sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge,
avg(l_quantity) as avg_qty,
avg(l_extendedprice) as avg_price,
avg(l_discount) as avg_disc,
count(*) as count_order
s_acctbal,
s_name,
n_name,
p_partkey,
p_mfgr,
s_address,
s_phone,
s_comment
from
lineitem
part,
supplier,
partsupp,
nation,
region
where
l_shipdate <= date '1998-09-02'
group by
l_returnflag,
l_linestatus
p_partkey = ps_partkey
and s_suppkey = ps_suppkey
and p_size = 15
and p_type like '%BRASS'
and s_nationkey = n_nationkey
and n_regionkey = r_regionkey
and r_name = 'EUROPE'
and ps_supplycost = (
select
min(ps_supplycost)
from
partsupp,
supplier,
nation,
region
where
p_partkey = ps_partkey
and s_suppkey = ps_suppkey
and s_nationkey = n_nationkey
and n_regionkey = r_regionkey
and r_name = 'EUROPE'
)
order by
l_returnflag,
l_linestatus;
s_acctbal desc,
n_name,
s_name,
p_partkey
limit 10;
",
)?;

Expand Down
98 changes: 66 additions & 32 deletions qurious/src/utils/array.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
use crate::error::{Error, Result};
use crate::{
arrow_err,
error::{Error, Result},
internal_err,
};
use arrow::{
array::{
new_null_array, Array, ArrayRef, BooleanArray, Date32Array, Date64Array, Float16Array, Float32Array,
Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, StringArray, Time32MillisecondArray,
Time32SecondArray, Time64MicrosecondArray, Time64NanosecondArray, TimestampMicrosecondArray,
TimestampMillisecondArray, TimestampNanosecondArray, TimestampSecondArray, UInt16Array, UInt32Array,
UInt64Array, UInt8Array,
new_null_array, Array, ArrayRef, AsArray, BooleanArray, Date32Array, Date64Array, Decimal128Array,
Decimal256Array, Float16Array, Float32Array, Float64Array, Int16Array, Int32Array, Int64Array, Int8Array,
StringArray, Time32MillisecondArray, Time32SecondArray, Time64MicrosecondArray, Time64NanosecondArray,
TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray, TimestampSecondArray,
UInt16Array, UInt32Array, UInt64Array, UInt8Array,
},
datatypes::TimeUnit,
datatypes::{DataType, Decimal128Type, Decimal256Type, DecimalType, TimeUnit},
};
use std::sync::Arc;

Expand Down Expand Up @@ -106,46 +110,76 @@ macro_rules! cast_and_get_decimal {

pub fn repeat_array(ary: &ArrayRef, index: usize, size: usize) -> Result<ArrayRef> {
match ary.data_type() {
arrow::datatypes::DataType::Null => Ok(new_null_array(ary.data_type(), size)),
arrow::datatypes::DataType::Boolean => build_primary_array!(BooleanArray, ary, index, size),
arrow::datatypes::DataType::Utf8 => build_primary_array!(StringArray, ary, index, size),
arrow::datatypes::DataType::Int8 => build_primary_array!(Int8Array, ary, index, size),
arrow::datatypes::DataType::Int16 => build_primary_array!(Int16Array, ary, index, size),
arrow::datatypes::DataType::Int32 => build_primary_array!(Int32Array, ary, index, size),
arrow::datatypes::DataType::Int64 => build_primary_array!(Int64Array, ary, index, size),
arrow::datatypes::DataType::UInt8 => build_primary_array!(UInt8Array, ary, index, size),
arrow::datatypes::DataType::UInt16 => build_primary_array!(UInt16Array, ary, index, size),
arrow::datatypes::DataType::UInt32 => build_primary_array!(UInt32Array, ary, index, size),
arrow::datatypes::DataType::UInt64 => build_primary_array!(UInt64Array, ary, index, size),
arrow::datatypes::DataType::Float16 => build_primary_array!(Float16Array, ary, index, size),
arrow::datatypes::DataType::Float32 => build_primary_array!(Float32Array, ary, index, size),
arrow::datatypes::DataType::Float64 => build_primary_array!(Float64Array, ary, index, size),
arrow::datatypes::DataType::Date32 => build_primary_array!(Date32Array, ary, index, size),
arrow::datatypes::DataType::Date64 => build_primary_array!(Date64Array, ary, index, size),
arrow::datatypes::DataType::Time32(TimeUnit::Second) => {
DataType::Null => Ok(new_null_array(ary.data_type(), size)),
DataType::Boolean => build_primary_array!(BooleanArray, ary, index, size),
DataType::Utf8 => build_primary_array!(StringArray, ary, index, size),
DataType::Int8 => build_primary_array!(Int8Array, ary, index, size),
DataType::Int16 => build_primary_array!(Int16Array, ary, index, size),
DataType::Int32 => build_primary_array!(Int32Array, ary, index, size),
DataType::Int64 => build_primary_array!(Int64Array, ary, index, size),
DataType::UInt8 => build_primary_array!(UInt8Array, ary, index, size),
DataType::UInt16 => build_primary_array!(UInt16Array, ary, index, size),
DataType::UInt32 => build_primary_array!(UInt32Array, ary, index, size),
DataType::UInt64 => build_primary_array!(UInt64Array, ary, index, size),
DataType::Float16 => build_primary_array!(Float16Array, ary, index, size),
DataType::Float32 => build_primary_array!(Float32Array, ary, index, size),
DataType::Float64 => build_primary_array!(Float64Array, ary, index, size),
DataType::Date32 => build_primary_array!(Date32Array, ary, index, size),
DataType::Date64 => build_primary_array!(Date64Array, ary, index, size),
DataType::Time32(TimeUnit::Second) => {
build_primary_array!(Time32SecondArray, ary, index, size)
}
arrow::datatypes::DataType::Time32(TimeUnit::Millisecond) => {
DataType::Time32(TimeUnit::Millisecond) => {
build_primary_array!(Time32MillisecondArray, ary, index, size)
}
arrow::datatypes::DataType::Time64(TimeUnit::Microsecond) => {
DataType::Time64(TimeUnit::Microsecond) => {
build_primary_array!(Time64MicrosecondArray, ary, index, size)
}
arrow::datatypes::DataType::Time64(TimeUnit::Nanosecond) => {
DataType::Time64(TimeUnit::Nanosecond) => {
build_primary_array!(Time64NanosecondArray, ary, index, size)
}
arrow::datatypes::DataType::Timestamp(TimeUnit::Second, tz) => {
DataType::Timestamp(TimeUnit::Second, tz) => {
build_timestamp_array!(TimestampSecondArray, ary, index, size, tz.clone())
}
arrow::datatypes::DataType::Timestamp(TimeUnit::Millisecond, tz) => {
DataType::Timestamp(TimeUnit::Millisecond, tz) => {
build_timestamp_array!(TimestampMillisecondArray, ary, index, size, tz.clone())
}
arrow::datatypes::DataType::Timestamp(TimeUnit::Microsecond, tz) => {
DataType::Timestamp(TimeUnit::Microsecond, tz) => {
build_timestamp_array!(TimestampMicrosecondArray, ary, index, size, tz.clone())
}
arrow::datatypes::DataType::Timestamp(TimeUnit::Nanosecond, tz) => {
DataType::Timestamp(TimeUnit::Nanosecond, tz) => {
build_timestamp_array!(TimestampNanosecondArray, ary, index, size, tz.clone())
}
_ => todo!(),
DataType::Decimal128(precision, scale) => {
let array = ary.as_primitive::<Decimal128Type>();

if array.is_null(index) {
return Ok(new_null_array(
&Decimal128Type::TYPE_CONSTRUCTOR(*precision, *scale),
size,
));
}

Decimal128Array::from_iter_values(vec![array.value(index); size])
.with_precision_and_scale(*precision, *scale)
.map(|v| Arc::new(v) as Arc<dyn Array>)
.map_err(|e| arrow_err!(e))
}
DataType::Decimal256(precision, scale) => {
let array = ary.as_primitive::<Decimal256Type>();

if array.is_null(index) {
return Ok(new_null_array(
&Decimal256Type::TYPE_CONSTRUCTOR(*precision, *scale),
size,
));
}

Decimal256Array::from_iter_values(vec![array.value(index); size])
.with_precision_and_scale(*precision, *scale)
.map(|v| Arc::new(v) as Arc<dyn Array>)
.map_err(|e| arrow_err!(e))
}
_ => internal_err!("Unsupported data type {}", ary.data_type()),
}
}
56 changes: 56 additions & 0 deletions qurious/tests/tpch/q2.slt
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
query RTTITTTT
select
s_acctbal,
s_name,
n_name,
p_partkey,
p_mfgr,
s_address,
s_phone,
s_comment
from
part,
supplier,
partsupp,
nation,
region
where
p_partkey = ps_partkey
and s_suppkey = ps_suppkey
and p_size = 15
and p_type like '%BRASS'
and s_nationkey = n_nationkey
and n_regionkey = r_regionkey
and r_name = 'EUROPE'
and ps_supplycost = (
select
min(ps_supplycost)
from
partsupp,
supplier,
nation,
region
where
p_partkey = ps_partkey
and s_suppkey = ps_suppkey
and s_nationkey = n_nationkey
and n_regionkey = r_regionkey
and r_name = 'EUROPE'
)
order by
s_acctbal desc,
n_name,
s_name,
p_partkey
limit 10;
----
9828.21 Supplier#000000647 UNITED KINGDOM 13120 Manufacturer#5 x5U7MBZmwfG9 33-258-202-4782 s the slyly even ideas poach fluffily
9508.37 Supplier#000000070 FRANCE 3563 Manufacturer#1 INWNH2w,OOWgNDq0BRCcBwOMQc6PdFDc4 16-821-608-1166 ests sleep quickly express ideas. ironic ideas haggle about the final T
9508.37 Supplier#000000070 FRANCE 17268 Manufacturer#4 INWNH2w,OOWgNDq0BRCcBwOMQc6PdFDc4 16-821-608-1166 ests sleep quickly express ideas. ironic ideas haggle about the final T
9453.01 Supplier#000000802 ROMANIA 10021 Manufacturer#5 ,6HYXb4uaHITmtMBj4Ak57Pd 29-342-882-6463 gular frets. permanently special multipliers believe blithely alongs
9453.01 Supplier#000000802 ROMANIA 13275 Manufacturer#4 ,6HYXb4uaHITmtMBj4Ak57Pd 29-342-882-6463 gular frets. permanently special multipliers believe blithely alongs
9192.1 Supplier#000000115 UNITED KINGDOM 13325 Manufacturer#1 nJ 2t0f7Ve,wL1,6WzGBJLNBUCKlsV 33-597-248-1220 es across the carefully express accounts boost caref
9032.15 Supplier#000000959 GERMANY 4958 Manufacturer#4 8grA EHBnwOZhO 17-108-642-3106 nding dependencies nag furiou
8702.02 Supplier#000000333 RUSSIA 11810 Manufacturer#3 MaVf XgwPdkiX4nfJGOis8Uu2zKiIZH 32-508-202-6136 oss the deposits cajole carefully even pinto beans. regular foxes detect alo
8615.5 Supplier#000000812 FRANCE 10551 Manufacturer#2 8qh4tezyScl5bidLAysvutB,,ZI2dn6xP 16-585-724-6633 y quickly regular deposits? quickly pending packages after the caref
8615.5 Supplier#000000812 FRANCE 13811 Manufacturer#4 8qh4tezyScl5bidLAysvutB,,ZI2dn6xP 16-585-724-6633 y quickly regular deposits? quickly pending packages after the caref

0 comments on commit d2fdb98

Please sign in to comment.