Skip to content

Commit

Permalink
Modify the tests of filter and agg
Browse files Browse the repository at this point in the history
  • Loading branch information
lanlou1554 committed Nov 19, 2024
1 parent 8c4191f commit 1569fc5
Show file tree
Hide file tree
Showing 13 changed files with 581 additions and 491 deletions.
100 changes: 62 additions & 38 deletions optd-cost-model/src/cost/agg.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@ use crate::{
common::{
nodes::{ArcPredicateNode, PredicateType, ReprPredicateNode},
predicates::{attr_index_pred::AttrIndexPred, list_pred::ListPred},
types::TableId,
properties::attr_ref::{AttrRef, BaseTableAttrRef},
types::GroupId,
},
cost_model::CostModelImpl,
stats::DEFAULT_NUM_DISTINCT,
Expand All @@ -13,6 +14,7 @@ use crate::{
impl<S: CostModelStorageManager> CostModelImpl<S> {
pub async fn get_agg_row_cnt(
&self,
group_id: GroupId,
group_by: ArcPredicateNode,
) -> CostModelResult<EstimatedStatistic> {
let group_by = ListPred::from_pred_node(group_by).unwrap();
Expand All @@ -32,12 +34,9 @@ impl<S: CostModelStorageManager> CostModelImpl<S> {
"Expected AttributeRef predicate".to_string(),
)
})?;
let is_derived = todo!();
if is_derived {
row_cnt *= DEFAULT_NUM_DISTINCT;
} else {
let table_id = todo!();
let attr_idx = attr_ref.attr_index();
if let AttrRef::BaseTableAttrRef(BaseTableAttrRef { table_id, attr_idx }) =
self.memo.get_attribute_ref(group_id, attr_ref.attr_index())
{
// TODO: Only query ndistinct instead of all kinds of stats.
let stats_option =
self.get_attribute_comb_stats(table_id, &[attr_idx]).await?;
Expand All @@ -50,6 +49,9 @@ impl<S: CostModelStorageManager> CostModelImpl<S> {
}
};
row_cnt *= ndistinct;
} else {
// TOOD: Handle derived attributes.
row_cnt *= DEFAULT_NUM_DISTINCT;
}
}
_ => {
Expand All @@ -65,7 +67,7 @@ impl<S: CostModelStorageManager> CostModelImpl<S> {

#[cfg(test)]
mod tests {
use std::collections::HashMap;
use std::{collections::HashMap, ops::Deref};

use crate::{
common::{
Expand All @@ -75,48 +77,59 @@ mod tests {
values::Value,
},
cost_model::tests::{
attr_index, cnst, create_mock_cost_model, empty_list, empty_per_attr_stats, list,
TestPerAttributeStats,
attr_index, cnst, create_mock_cost_model, create_mock_cost_model_with_attr_types,
empty_list, empty_per_attr_stats, list, TestPerAttributeStats, TEST_ATTR1_BASE_INDEX,
TEST_ATTR2_BASE_INDEX, TEST_ATTR3_BASE_INDEX, TEST_GROUP1_ID, TEST_TABLE1_ID,
},
stats::{utilities::simple_map::SimpleMap, MostCommonValues, DEFAULT_NUM_DISTINCT},
EstimatedStatistic,
};

#[tokio::test]
async fn test_agg_no_stats() {
let table_id = TableId(0);
let cost_model = create_mock_cost_model(vec![table_id], vec![], vec![None]);
let cost_model = create_mock_cost_model_with_attr_types(
vec![TEST_TABLE1_ID],
vec![],
vec![HashMap::from([
(TEST_ATTR1_BASE_INDEX, ConstantType::Int32),
(TEST_ATTR2_BASE_INDEX, ConstantType::Int32),
])],
vec![None],
);

// Group by empty list should return 1.
let group_bys = empty_list();
assert_eq!(
cost_model.get_agg_row_cnt(group_bys).await.unwrap(),
cost_model
.get_agg_row_cnt(TEST_GROUP1_ID, group_bys)
.await
.unwrap(),
EstimatedStatistic(1.0)
);

// Group by single column should return the default value since there are no stats.
let group_bys = list(vec![attr_index(0)]);
assert_eq!(
cost_model.get_agg_row_cnt(group_bys).await.unwrap(),
cost_model
.get_agg_row_cnt(TEST_GROUP1_ID, group_bys)
.await
.unwrap(),
EstimatedStatistic(DEFAULT_NUM_DISTINCT as f64)
);

// Group by two columns should return the default value squared since there are no stats.
let group_bys = list(vec![attr_index(0), attr_index(1)]);
assert_eq!(
cost_model.get_agg_row_cnt(group_bys).await.unwrap(),
cost_model
.get_agg_row_cnt(TEST_GROUP1_ID, group_bys)
.await
.unwrap(),
EstimatedStatistic((DEFAULT_NUM_DISTINCT * DEFAULT_NUM_DISTINCT) as f64)
);
}

#[tokio::test]
async fn test_agg_with_stats() {
let table_id = TableId(0);
let group_id = GroupId(0);
let attr1_base_idx = 0;
let attr2_base_idx = 1;
let attr3_base_idx = 2;

let attr1_ndistinct = 12;
let attr2_ndistinct = 645;
let attr1_stats = TestPerAttributeStats::new(
Expand All @@ -132,47 +145,58 @@ mod tests {
0.0,
);

let cost_model = create_mock_cost_model(
vec![table_id],
let cost_model = create_mock_cost_model_with_attr_types(
vec![TEST_TABLE1_ID],
vec![HashMap::from([
(TEST_ATTR1_BASE_INDEX, attr1_stats),
(TEST_ATTR2_BASE_INDEX, attr2_stats),
])],
vec![HashMap::from([
(attr1_base_idx, attr1_stats),
(attr2_base_idx, attr2_stats),
(TEST_ATTR1_BASE_INDEX, ConstantType::Int32),
(TEST_ATTR2_BASE_INDEX, ConstantType::Int32),
(TEST_ATTR3_BASE_INDEX, ConstantType::Int32),
])],
vec![None],
// attr_infos,
);

// Group by empty list should return 1.
let group_bys = empty_list();
assert_eq!(
cost_model.get_agg_row_cnt(group_bys).await.unwrap(),
cost_model
.get_agg_row_cnt(TEST_GROUP1_ID, group_bys)
.await
.unwrap(),
EstimatedStatistic(1.0)
);

// Group by single column should return the n-distinct of the column.
let group_bys = list(vec![attr_index(attr1_base_idx)]); // TODO: Fix this
let group_bys = list(vec![attr_index(0)]);
assert_eq!(
cost_model.get_agg_row_cnt(group_bys).await.unwrap(),
cost_model
.get_agg_row_cnt(TEST_GROUP1_ID, group_bys)
.await
.unwrap(),
EstimatedStatistic(attr1_ndistinct as f64)
);

// Group by two columns should return the product of the n-distinct of the columns.
let group_bys = list(vec![attr_index(attr1_base_idx), attr_index(attr2_base_idx)]); // TODO: Fix this
let group_bys = list(vec![attr_index(0), attr_index(1)]);
assert_eq!(
cost_model.get_agg_row_cnt(group_bys).await.unwrap(),
cost_model
.get_agg_row_cnt(TEST_GROUP1_ID, group_bys)
.await
.unwrap(),
EstimatedStatistic((attr1_ndistinct * attr2_ndistinct) as f64)
);

// Group by multiple columns should return the product of the n-distinct of the columns. If one of the columns
// does not have stats, it should use the default value instead.
let group_bys = list(vec![
// TODO: Fix this
attr_index(attr1_base_idx),
attr_index(attr2_base_idx),
attr_index(attr3_base_idx),
]);
let group_bys = list(vec![attr_index(0), attr_index(1), attr_index(2)]);
assert_eq!(
cost_model.get_agg_row_cnt(group_bys).await.unwrap(),
cost_model
.get_agg_row_cnt(TEST_GROUP1_ID, group_bys)
.await
.unwrap(),
EstimatedStatistic((attr1_ndistinct * attr2_ndistinct * DEFAULT_NUM_DISTINCT) as f64)
);
}
Expand Down
13 changes: 12 additions & 1 deletion optd-cost-model/src/cost/filter/attribute.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,17 @@ impl<S: CostModelStorageManager> CostModelImpl<S> {
/// Also, get_attribute_equality_selectivity is a subroutine when computing range
/// selectivity, which is another reason for separating these into two functions
/// is_eq means whether it's == or !=
///
/// Currently, we only support calculating the equality selectivity for an existed attribute,
/// not a derived attribute.
/// TODO: Support derived attributes.
pub(crate) async fn get_attribute_equality_selectivity(
&self,
table_id: TableId,
attr_base_index: u64,
value: &Value,
is_eq: bool,
) -> CostModelResult<f64> {
// TODO: The attribute could be a derived attribute
let ret_sel = {
if let Some(attribute_stats) = self
.get_attribute_comb_stats(table_id, &[attr_base_index])
Expand Down Expand Up @@ -89,6 +92,10 @@ impl<S: CostModelStorageManager> CostModelImpl<S> {
}

/// Compute the frequency of values in a attribute less than the given value.
///
/// Currently, we only support calculating the equality selectivity for an existed attribute,
/// not a derived attribute.
/// TODO: Support derived attributes.
async fn get_attribute_lt_value_freq(
&self,
attribute_stats: &AttributeCombValueStats,
Expand Down Expand Up @@ -116,6 +123,10 @@ impl<S: CostModelStorageManager> CostModelImpl<S> {
/// Range predicates are handled entirely differently from equality predicates so this is its
/// own function. If it is unable to find the statistics, it returns DEFAULT_INEQ_SEL.
/// The selectivity is computed as quantile of the right bound minus quantile of the left bound.
///
/// Currently, we only support calculating the equality selectivity for an existed attribute,
/// not a derived attribute.
/// TODO: Support derived attributes.
pub(crate) async fn get_attribute_range_selectivity(
&self,
table_id: TableId,
Expand Down
Loading

0 comments on commit 1569fc5

Please sign in to comment.