Skip to content

Commit

Permalink
Improve filter tests
Browse files Browse the repository at this point in the history
  • Loading branch information
lanlou1554 committed Nov 17, 2024
1 parent 2c9240f commit d6e1825
Show file tree
Hide file tree
Showing 5 changed files with 93 additions and 98 deletions.
143 changes: 63 additions & 80 deletions optd-cost-model/src/cost/filter/controller.rs
Original file line number Diff line number Diff line change
Expand Up @@ -136,11 +136,11 @@ mod tests {

#[tokio::test]
async fn test_attr_ref_eq_constint_in_mcv() {
let mut mcvs_counts = HashMap::new();
mcvs_counts.insert(vec![Some(Value::Int32(1))], 3);
let mcvs_total_count = 10;
let per_attribute_stats = TestPerAttributeStats::new(
MostCommonValues::Counter(Counter::new_from_existing(mcvs_counts, mcvs_total_count)),
MostCommonValues::SimpleFrequency(SimpleMap::new(vec![(
vec![Some(Value::Int32(1))],
0.3,
)])),
0,
0.0,
None,
Expand Down Expand Up @@ -170,12 +170,11 @@ mod tests {

#[tokio::test]
async fn test_attr_ref_eq_constint_not_in_mcv() {
let mut mcvs_counts = HashMap::new();
mcvs_counts.insert(vec![Some(Value::Int32(1))], 20);
mcvs_counts.insert(vec![Some(Value::Int32(3))], 44);
let mcvs_total_count = 100;
let per_attribute_stats = TestPerAttributeStats::new(
MostCommonValues::Counter(Counter::new_from_existing(mcvs_counts, mcvs_total_count)),
MostCommonValues::SimpleFrequency(SimpleMap::new(vec![
(vec![Some(Value::Int32(1))], 0.2),
(vec![Some(Value::Int32(3))], 0.44),
])),
5,
0.0,
None,
Expand Down Expand Up @@ -206,11 +205,11 @@ mod tests {
/// I only have one test for NEQ since I'll assume that it uses the same underlying logic as EQ
#[tokio::test]
async fn test_attr_ref_neq_constint_in_mcv() {
let mut mcvs_counts = HashMap::new();
mcvs_counts.insert(vec![Some(Value::Int32(1))], 3);
let mcvs_total_count = 10;
let per_attribute_stats = TestPerAttributeStats::new(
MostCommonValues::Counter(Counter::new_from_existing(mcvs_counts, mcvs_total_count)),
MostCommonValues::SimpleFrequency(SimpleMap::new(vec![(
vec![Some(Value::Int32(1))],
0.3,
)])),
0,
0.0,
None,
Expand Down Expand Up @@ -240,10 +239,8 @@ mod tests {

#[tokio::test]
async fn test_attr_ref_leq_constint_no_mcvs_in_range() {
let mut mcvs_counts = HashMap::new();
let mcvs_total_count = 10;
let per_attribute_stats = TestPerAttributeStats::new(
MostCommonValues::Counter(Counter::new_from_existing(mcvs_counts, mcvs_total_count)),
MostCommonValues::SimpleFrequency(SimpleMap::new(vec![])),
10,
0.0,
Some(Distribution::SimpleDistribution(SimpleMap::new(vec![(
Expand Down Expand Up @@ -280,14 +277,13 @@ mod tests {

#[tokio::test]
async fn test_attr_ref_leq_constint_with_mcvs_in_range_not_at_border() {
let mut mcvs_counts = HashMap::new();
mcvs_counts.insert(vec![Some(Value::Int32(6))], 5);
mcvs_counts.insert(vec![Some(Value::Int32(10))], 10);
mcvs_counts.insert(vec![Some(Value::Int32(17))], 8);
mcvs_counts.insert(vec![Some(Value::Int32(25))], 7);
let mcvs_total_count = 100;
let per_attribute_stats = TestPerAttributeStats::new(
MostCommonValues::Counter(Counter::new_from_existing(mcvs_counts, mcvs_total_count)),
MostCommonValues::SimpleFrequency(SimpleMap::new(vec![
(vec![Some(Value::Int32(6))], 0.05),
(vec![Some(Value::Int32(10))], 0.1),
(vec![Some(Value::Int32(17))], 0.08),
(vec![Some(Value::Int32(25))], 0.07),
])),
10,
0.0,
Some(Distribution::SimpleDistribution(SimpleMap::new(vec![(
Expand Down Expand Up @@ -324,14 +320,13 @@ mod tests {

#[tokio::test]
async fn test_attr_ref_leq_constint_with_mcv_at_border() {
let mut mcvs_counts = HashMap::new();
mcvs_counts.insert(vec![Some(Value::Int32(6))], 5);
mcvs_counts.insert(vec![Some(Value::Int32(10))], 10);
mcvs_counts.insert(vec![Some(Value::Int32(15))], 8);
mcvs_counts.insert(vec![Some(Value::Int32(25))], 7);
let mcvs_total_count = 100;
let per_attribute_stats = TestPerAttributeStats::new(
MostCommonValues::Counter(Counter::new_from_existing(mcvs_counts, mcvs_total_count)),
MostCommonValues::SimpleFrequency(SimpleMap::new(vec![
(vec![Some(Value::Int32(6))], 0.05),
(vec![Some(Value::Int32(10))], 0.1),
(vec![Some(Value::Int32(15))], 0.08),
(vec![Some(Value::Int32(25))], 0.07),
])),
10,
0.0,
Some(Distribution::SimpleDistribution(SimpleMap::new(vec![(
Expand Down Expand Up @@ -368,10 +363,8 @@ mod tests {

#[tokio::test]
async fn test_attr_ref_lt_constint_no_mcvs_in_range() {
let mut mcvs_counts = HashMap::new();
let mcvs_total_count = 10;
let per_attribute_stats = TestPerAttributeStats::new(
MostCommonValues::Counter(Counter::new_from_existing(mcvs_counts, mcvs_total_count)),
MostCommonValues::SimpleFrequency(SimpleMap::new(vec![])),
10,
0.0,
Some(Distribution::SimpleDistribution(SimpleMap::new(vec![(
Expand Down Expand Up @@ -408,14 +401,13 @@ mod tests {

#[tokio::test]
async fn test_attr_ef_lt_constint_with_mcvs_in_range_not_at_border() {
let mut mcvs_counts = HashMap::new();
mcvs_counts.insert(vec![Some(Value::Int32(6))], 5);
mcvs_counts.insert(vec![Some(Value::Int32(10))], 10);
mcvs_counts.insert(vec![Some(Value::Int32(17))], 8);
mcvs_counts.insert(vec![Some(Value::Int32(25))], 7);
let mcvs_total_count = 100;
let per_attribute_stats = TestPerAttributeStats::new(
MostCommonValues::Counter(Counter::new_from_existing(mcvs_counts, mcvs_total_count)),
MostCommonValues::SimpleFrequency(SimpleMap::new(vec![
(vec![Some(Value::Int32(6))], 0.05),
(vec![Some(Value::Int32(10))], 0.1),
(vec![Some(Value::Int32(17))], 0.08),
(vec![Some(Value::Int32(25))], 0.07),
])),
11, /* there are 4 MCVs which together add up to 0.3. With 11 total ndistinct, each
* remaining value has freq 0.1 */
0.0,
Expand Down Expand Up @@ -453,14 +445,13 @@ mod tests {

#[tokio::test]
async fn test_attr_ref_lt_constint_with_mcv_at_border() {
let mut mcvs_counts = HashMap::new();
mcvs_counts.insert(vec![Some(Value::Int32(6))], 5);
mcvs_counts.insert(vec![Some(Value::Int32(10))], 10);
mcvs_counts.insert(vec![Some(Value::Int32(15))], 8);
mcvs_counts.insert(vec![Some(Value::Int32(25))], 7);
let mcvs_total_count = 100;
let per_attribute_stats = TestPerAttributeStats::new(
MostCommonValues::Counter(Counter::new_from_existing(mcvs_counts, mcvs_total_count)),
MostCommonValues::SimpleFrequency(SimpleMap::new(vec![
(vec![Some(Value::Int32(6))], 0.05),
(vec![Some(Value::Int32(10))], 0.1),
(vec![Some(Value::Int32(15))], 0.08),
(vec![Some(Value::Int32(25))], 0.07),
])),
11, /* there are 4 MCVs which together add up to 0.3. With 11 total ndistinct, each
* remaining value has freq 0.1 */
0.0,
Expand Down Expand Up @@ -500,10 +491,8 @@ mod tests {
/// The only interesting thing to test is that if there are nulls, those aren't included in GT
#[tokio::test]
async fn test_attr_ref_gt_constint() {
let mut mcvs_counts = HashMap::new();
let mcvs_total_count = 100;
let per_attribute_stats = TestPerAttributeStats::new(
MostCommonValues::Counter(Counter::new_from_existing(mcvs_counts, mcvs_total_count)),
MostCommonValues::SimpleFrequency(SimpleMap::new(vec![])),
10,
0.0,
Some(Distribution::SimpleDistribution(SimpleMap::new(vec![(
Expand Down Expand Up @@ -540,10 +529,8 @@ mod tests {

#[tokio::test]
async fn test_attr_ref_geq_constint() {
let mut mcvs_counts = HashMap::new();
let mcvs_total_count = 100;
let per_attribute_stats = TestPerAttributeStats::new(
MostCommonValues::Counter(Counter::new_from_existing(mcvs_counts, mcvs_total_count)),
MostCommonValues::SimpleFrequency(SimpleMap::new(vec![])),
10,
0.0,
Some(Distribution::SimpleDistribution(SimpleMap::new(vec![(
Expand Down Expand Up @@ -581,13 +568,12 @@ mod tests {

#[tokio::test]
async fn test_and() {
let mut mcvs_counts = HashMap::new();
mcvs_counts.insert(vec![Some(Value::Int32(1))], 3);
mcvs_counts.insert(vec![Some(Value::Int32(5))], 5);
mcvs_counts.insert(vec![Some(Value::Int32(8))], 2);
let mcvs_total_count = 10;
let per_attribute_stats = TestPerAttributeStats::new(
MostCommonValues::Counter(Counter::new_from_existing(mcvs_counts, mcvs_total_count)),
MostCommonValues::SimpleFrequency(SimpleMap::new(vec![
(vec![Some(Value::Int32(1))], 0.3),
(vec![Some(Value::Int32(5))], 0.5),
(vec![Some(Value::Int32(8))], 0.2),
])),
0,
0.0,
None,
Expand Down Expand Up @@ -629,13 +615,12 @@ mod tests {

#[tokio::test]
async fn test_or() {
let mut mcvs_counts = HashMap::new();
mcvs_counts.insert(vec![Some(Value::Int32(1))], 3);
mcvs_counts.insert(vec![Some(Value::Int32(5))], 5);
mcvs_counts.insert(vec![Some(Value::Int32(8))], 2);
let mcvs_total_count = 10;
let per_attribute_stats = TestPerAttributeStats::new(
MostCommonValues::Counter(Counter::new_from_existing(mcvs_counts, mcvs_total_count)),
MostCommonValues::SimpleFrequency(SimpleMap::new(vec![
(vec![Some(Value::Int32(1))], 0.3),
(vec![Some(Value::Int32(5))], 0.5),
(vec![Some(Value::Int32(8))], 0.2),
])),
0,
0.0,
None,
Expand Down Expand Up @@ -677,11 +662,11 @@ mod tests {

#[tokio::test]
async fn test_not() {
let mut mcvs_counts = HashMap::new();
mcvs_counts.insert(vec![Some(Value::Int32(1))], 3);
let mcvs_total_count = 10;
let per_attribute_stats = TestPerAttributeStats::new(
MostCommonValues::Counter(Counter::new_from_existing(mcvs_counts, mcvs_total_count)),
MostCommonValues::SimpleFrequency(SimpleMap::new(vec![(
vec![Some(Value::Int32(1))],
0.3,
)])),
0,
0.0,
None,
Expand Down Expand Up @@ -710,11 +695,11 @@ mod tests {

#[tokio::test]
async fn test_attr_ref_eq_cast_value() {
let mut mcvs_counts = HashMap::new();
mcvs_counts.insert(vec![Some(Value::Int32(1))], 3);
let mcvs_total_count = 10;
let per_attribute_stats = TestPerAttributeStats::new(
MostCommonValues::Counter(Counter::new_from_existing(mcvs_counts, mcvs_total_count)),
MostCommonValues::SimpleFrequency(SimpleMap::new(vec![(
vec![Some(Value::Int32(1))],
0.3,
)])),
0,
0.0,
None,
Expand Down Expand Up @@ -753,11 +738,11 @@ mod tests {

#[tokio::test]
async fn test_cast_attr_ref_eq_value() {
let mut mcvs_counts = HashMap::new();
mcvs_counts.insert(vec![Some(Value::Int32(1))], 3);
let mcvs_total_count = 10;
let per_attribute_stats = TestPerAttributeStats::new(
MostCommonValues::Counter(Counter::new_from_existing(mcvs_counts, mcvs_total_count)),
MostCommonValues::SimpleFrequency(SimpleMap::new(vec![(
vec![Some(Value::Int32(1))],
0.3,
)])),
0,
0.1,
None,
Expand Down Expand Up @@ -812,10 +797,8 @@ mod tests {
/// pretty good signal that the Cast was left as is.
#[tokio::test]
async fn test_cast_attr_ref_eq_attr_ref() {
let mut mcvs_counts = HashMap::new();
let mcvs_total_count = 10;
let per_attribute_stats = TestPerAttributeStats::new(
MostCommonValues::Counter(Counter::new_from_existing(mcvs_counts, mcvs_total_count)),
MostCommonValues::SimpleFrequency(SimpleMap::new(vec![])),
0,
0.0,
None,
Expand Down
14 changes: 8 additions & 6 deletions optd-cost-model/src/cost/filter/in_list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,17 +73,19 @@ mod tests {
use crate::{
common::{types::TableId, values::Value},
cost_model::tests::*,
stats::{utilities::counter::Counter, MostCommonValues},
stats::{
utilities::{counter::Counter, simple_map::SimpleMap},
MostCommonValues,
},
};

#[tokio::test]
async fn test_in_list() {
let mut mcvs_counts = HashMap::new();
mcvs_counts.insert(vec![Some(Value::Int32(1))], 8);
mcvs_counts.insert(vec![Some(Value::Int32(2))], 2);
let mcvs_total_count = 10;
let per_attribute_stats = TestPerAttributeStats::new(
MostCommonValues::Counter(Counter::new_from_existing(mcvs_counts, mcvs_total_count)),
MostCommonValues::SimpleFrequency(SimpleMap::new(vec![
(vec![Some(Value::Int32(1))], 0.8),
(vec![Some(Value::Int32(2))], 0.2),
])),
2,
0.0,
None,
Expand Down
13 changes: 6 additions & 7 deletions optd-cost-model/src/cost/filter/like.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,19 +108,18 @@ mod tests {
common::{types::TableId, values::Value},
cost_model::tests::*,
stats::{
utilities::counter::Counter, MostCommonValues, FIXED_CHAR_SEL_FACTOR,
FULL_WILDCARD_SEL_FACTOR,
utilities::{counter::Counter, simple_map::SimpleMap},
MostCommonValues, FIXED_CHAR_SEL_FACTOR, FULL_WILDCARD_SEL_FACTOR,
},
};

#[tokio::test]
async fn test_like_no_nulls() {
let mut mcvs_counts = HashMap::new();
mcvs_counts.insert(vec![Some(Value::String("abcd".into()))], 1);
mcvs_counts.insert(vec![Some(Value::String("abc".into()))], 1);
let mcvs_total_count = 10;
let per_attribute_stats = TestPerAttributeStats::new(
MostCommonValues::Counter(Counter::new_from_existing(mcvs_counts, mcvs_total_count)),
MostCommonValues::SimpleFrequency(SimpleMap::new(vec![
(vec![Some(Value::String("abcd".into()))], 0.1),
(vec![Some(Value::String("abc".into()))], 0.1),
])),
2,
0.0,
None,
Expand Down
12 changes: 11 additions & 1 deletion optd-cost-model/src/stats/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ pub type AttributeCombValue = Vec<Option<Value>>;
#[serde(tag = "type")]
pub enum MostCommonValues {
Counter(Counter<AttributeCombValue>),
SimpleFrequency(SimpleMap<AttributeCombValue>),
// Add more types here...
}

Expand All @@ -47,12 +48,14 @@ impl MostCommonValues {
pub fn freq(&self, value: &AttributeCombValue) -> Option<f64> {
match self {
MostCommonValues::Counter(counter) => counter.frequencies().get(value).copied(),
MostCommonValues::SimpleFrequency(simple_map) => simple_map.m.get(value).copied(),
}
}

pub fn total_freq(&self) -> f64 {
match self {
MostCommonValues::Counter(counter) => counter.frequencies().values().sum(),
MostCommonValues::SimpleFrequency(simple_map) => simple_map.m.values().sum(),
}
}

Expand All @@ -64,13 +67,20 @@ impl MostCommonValues {
.filter(|(val, _)| pred(val))
.map(|(_, freq)| freq)
.sum(),
MostCommonValues::SimpleFrequency(simple_map) => simple_map
.m
.iter()
.filter(|(val, _)| pred(val))
.map(|(_, freq)| freq)
.sum(),
}
}

// returns the # of entries (i.e. value + freq) in the most common values structure
pub fn cnt(&self) -> usize {
match self {
MostCommonValues::Counter(counter) => counter.frequencies().len(),
MostCommonValues::SimpleFrequency(simple_map) => simple_map.m.len(),
}
}
}
Expand All @@ -80,7 +90,7 @@ impl MostCommonValues {
#[serde(tag = "type")]
pub enum Distribution {
TDigest(TDigest<Value>),
SimpleDistribution(SimpleMap),
SimpleDistribution(SimpleMap<Value>),
// Add more types here...
}

Expand Down
Loading

0 comments on commit d6e1825

Please sign in to comment.