Skip to content

Commit

Permalink
Merge branch 'main' into zhidong/adaptive-perftest
Browse files Browse the repository at this point in the history
  • Loading branch information
Gun9niR authored Apr 29, 2024
2 parents 7bf69b5 + 5958b3d commit 3ff7285
Show file tree
Hide file tree
Showing 20 changed files with 1,511 additions and 449 deletions.
7 changes: 7 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 8 additions & 3 deletions dev_scripts/which_queries_work.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/bin/bash
benchmark_name=$1
USAGE="Usage: $0 [job|tpch]"
USAGE="Usage: $0 [job|joblight|tpch]"

if [ $# -ne 1 ]; then
echo >&2 $USAGE
Expand All @@ -9,8 +9,13 @@ fi

if [[ "$benchmark_name" == "job" ]]; then
all_ids="1a,1b,1c,1d,2a,2b,2c,2d,3a,3b,3c,4a,4b,4c,5a,5b,5c,6a,6b,6c,6d,6e,6f,7a,7b,7c,8a,8b,8c,8d,9a,9b,9c,9d,10a,10b,10c,11a,11b,11c,11d,12a,12b,12c,13a,13b,13c,13d,14a,14b,14c,15a,15b,15c,15d,16a,16b,16c,16d,17a,17b,17c,17d,17e,17f,18a,18b,18c,19a,19b,19c,19d,20a,20b,20c,21a,21b,21c,22a,22b,22c,22d,23a,23b,23c,24a,24b,25a,25b,25c,26a,26b,26c,27a,27b,27c,28a,28b,28c,29a,29b,29c,30a,30b,30c,31a,31b,31c,32a,32b,33a,33b,33c"
vec_var_name="WORKING_QUERY_IDS"
elif [[ "$benchmark_name" == "joblight" ]]; then
all_ids="1a,1b,1c,1d,2a,3a,3b,3c,4a,4b,4c,5a,5b,5c,6a,6b,6c,6d,6e,7a,7b,7c,8a,8b,8c,9a,9b,10a,10b,10c,11a,11b,11c,12a,12b,12c,13a,14a,14b,14c,15a,15b,15c,16a,17a,17b,17c,18a,18b,18c,19a,19b,20a,20b,20c,21a,21b,22a,22b,22c,23a,23b,24a,24b,25a,26a,26b,27a,27b,28a"
vec_var_name="WORKING_JOBLIGHT_QUERY_IDS"
elif [[ "$benchmark_name" == "tpch" ]]; then
all_ids="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22"
vec_var_name="WORKING_JOB_QUERY_IDS"
else
echo >&2 $USAGE
exit 1
Expand All @@ -19,7 +24,7 @@ fi
successful_ids=()
IFS=','
for id in $all_ids; do
cargo run --bin optd-perftest cardtest --benchmark-name $benchmark_name --query-ids $id &>/dev/null
cargo run --bin optd-perftest cardtest $benchmark_name --query-ids $id &>/dev/null

if [ $? -eq 0 ]; then
echo >&2 $id succeeded
Expand All @@ -32,7 +37,7 @@ done
echo >&2
echo " Useful Outputs"
echo "================"
working_query_ids_vec="pub const WORKING_QUERY_IDS: &[&str] = &[\"${successful_ids[0]}\""
working_query_ids_vec="pub const ${vec_var_name}: &[&str] = &[\"${successful_ids[0]}\""
IFS=" "
for id in "${successful_ids[@]:1}"; do
working_query_ids_vec+=", \"$id\""
Expand Down
2 changes: 2 additions & 0 deletions optd-datafusion-repr/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,5 @@ assert_approx_eq = "1.1.0"
serde = { version = "1.0", features = ["derive"] }
serde_with = {version = "3.7.0", features = ["json"]}
bincode = "1.3.3"
union-find = { git = "https://github.com/Gun9niR/union-find-rs.git", rev = "794821514f7daefcbb8d5f38ef04e62fc18b5665" }
test-case = "3.3"
89 changes: 86 additions & 3 deletions optd-datafusion-repr/src/cost/base_cost.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,10 @@ mod join;
mod limit;
pub(crate) mod stats;

use crate::{plan_nodes::OptRelNodeTyp, properties::column_ref::ColumnRef};
use crate::{
plan_nodes::OptRelNodeTyp,
properties::column_ref::{BaseTableColumnRef, ColumnRef},
};
use itertools::Itertools;
use optd_core::{
cascades::{CascadesOptimizer, RelNodeContext},
Expand Down Expand Up @@ -207,7 +210,7 @@ impl<
&self,
col_ref: &ColumnRef,
) -> Option<&ColumnCombValueStats<M, D>> {
if let ColumnRef::BaseTableColumnRef { table, col_idx } = col_ref {
if let ColumnRef::BaseTableColumnRef(BaseTableColumnRef { table, col_idx }) = col_ref {
self.get_column_comb_stats(table, &[*col_idx])
} else {
None
Expand Down Expand Up @@ -314,6 +317,8 @@ mod tests {

pub const TABLE1_NAME: &str = "table1";
pub const TABLE2_NAME: &str = "table2";
pub const TABLE3_NAME: &str = "table3";
pub const TABLE4_NAME: &str = "table4";

// one column is sufficient for all filter selectivity tests
pub fn create_one_column_cost_model(per_column_stats: TestPerColumnStats) -> TestOptCostModel {
Expand All @@ -327,7 +332,7 @@ mod tests {
)
}

/// Two columns is sufficient for all join selectivity tests
/// Create a cost model with two columns, one for each table. Each column has 100 values.
pub fn create_two_table_cost_model(
tbl1_per_column_stats: TestPerColumnStats,
tbl2_per_column_stats: TestPerColumnStats,
Expand All @@ -340,6 +345,84 @@ mod tests {
)
}

/// Create a cost model with three columns, one for each table. Each column has 100 values.
pub fn create_three_table_cost_model(
tbl1_per_column_stats: TestPerColumnStats,
tbl2_per_column_stats: TestPerColumnStats,
tbl3_per_column_stats: TestPerColumnStats,
) -> TestOptCostModel {
OptCostModel::new(
vec![
(
String::from(TABLE1_NAME),
TableStats::new(
100,
vec![(vec![0], tbl1_per_column_stats)].into_iter().collect(),
),
),
(
String::from(TABLE2_NAME),
TableStats::new(
100,
vec![(vec![0], tbl2_per_column_stats)].into_iter().collect(),
),
),
(
String::from(TABLE3_NAME),
TableStats::new(
100,
vec![(vec![0], tbl3_per_column_stats)].into_iter().collect(),
),
),
]
.into_iter()
.collect(),
)
}

/// Create a cost model with three columns, one for each table. Each column has 100 values.
pub fn create_four_table_cost_model(
tbl1_per_column_stats: TestPerColumnStats,
tbl2_per_column_stats: TestPerColumnStats,
tbl3_per_column_stats: TestPerColumnStats,
tbl4_per_column_stats: TestPerColumnStats,
) -> TestOptCostModel {
OptCostModel::new(
vec![
(
String::from(TABLE1_NAME),
TableStats::new(
100,
vec![(vec![0], tbl1_per_column_stats)].into_iter().collect(),
),
),
(
String::from(TABLE2_NAME),
TableStats::new(
100,
vec![(vec![0], tbl2_per_column_stats)].into_iter().collect(),
),
),
(
String::from(TABLE3_NAME),
TableStats::new(
100,
vec![(vec![0], tbl3_per_column_stats)].into_iter().collect(),
),
),
(
String::from(TABLE4_NAME),
TableStats::new(
100,
vec![(vec![0], tbl4_per_column_stats)].into_iter().collect(),
),
),
]
.into_iter()
.collect(),
)
}

/// We need custom row counts because some join algorithms rely on the row cnt
pub fn create_two_table_cost_model_custom_row_cnts(
tbl1_per_column_stats: TestPerColumnStats,
Expand Down
15 changes: 8 additions & 7 deletions optd-datafusion-repr/src/cost/base_cost/agg.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@ use optd_core::{
use serde::{de::DeserializeOwned, Serialize};

use crate::{
cost::{
base_cost::stats::{Distribution, MostCommonValues},
base_cost::DEFAULT_NUM_DISTINCT,
cost::base_cost::{
stats::{Distribution, MostCommonValues},
DEFAULT_NUM_DISTINCT,
},
plan_nodes::{ExprList, OptRelNode, OptRelNodeTyp},
properties::column_ref::{ColumnRef, ColumnRefPropertyBuilder},
properties::column_ref::{BaseTableColumnRef, ColumnRef, ColumnRefPropertyBuilder},
};

use super::{OptCostModel, DEFAULT_UNK_SEL};
Expand Down Expand Up @@ -61,13 +61,14 @@ impl<
} else {
// Multiply the n-distinct of all the group by columns.
// TODO: improve with multi-dimensional n-distinct
let base_table_col_refs = optimizer
let group_col_refs = optimizer
.get_property_by_group::<ColumnRefPropertyBuilder>(context.group_id, 1);
base_table_col_refs
group_col_refs
.column_refs()
.iter()
.take(group_by.len())
.map(|col_ref| match col_ref {
ColumnRef::BaseTableColumnRef { table, col_idx } => {
ColumnRef::BaseTableColumnRef(BaseTableColumnRef { table, col_idx }) => {
let table_stats = self.per_table_stats_map.get(table);
let column_stats = table_stats.and_then(|table_stats| {
table_stats.column_comb_stats.get(&vec![*col_idx])
Expand Down
Loading

0 comments on commit 3ff7285

Please sign in to comment.