-
Notifications
You must be signed in to change notification settings - Fork 26
/
Copy pathIntHistogramTest.java
177 lines (148 loc) · 5.56 KB
/
IntHistogramTest.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
package simpledb;
import org.junit.Test;
import org.junit.Assert;
import simpledb.Predicate.Op;
public class IntHistogramTest {
/**
* Test to confirm that the IntHistogram implementation is constant-space
* (or, at least, reasonably small space; O(log(n)) might still work if
* your constants are good).
*/
@Test public void orderOfGrowthTest() {
// Don't bother with a timeout on this test.
// Printing debugging statements takes >> time than some inefficient algorithms.
IntHistogram h = new IntHistogram(10000, 0, 100);
// Feed the histogram more integers than would fit into our
// 128mb allocated heap (4-byte integers)
// If this fails, someone's storing every value...
for (int c = 0; c < 33554432; c++) {
h.addValue((c * 23) % 101); // Pseudo-random number; at least get a distribution
}
// Try printing out all of the values; make sure "estimateSelectivity()"
// cause any problems
double selectivity = 0.0;
for (int c = 0; c < 101; c++) {
selectivity += h.estimateSelectivity(Op.EQUALS, c);
}
// All the selectivities should add up to 1, by definition.
// Allow considerable leeway for rounding error, though
// (Java double's are good to 15 or so significant figures)
Assert.assertTrue(selectivity > 0.99);
}
/**
* Test with a minimum and a maximum that are both negative numbers.
*/
@Test public void negativeRangeTest() {
IntHistogram h = new IntHistogram(10, -60, -10);
// All of the values here are negative.
// Also, there are more of them than there are bins.
for (int c = -60; c <= -10; c++) {
h.addValue(c);
h.estimateSelectivity(Op.EQUALS, c);
}
// Even with just 10 bins and 50 values,
// the selectivity for this particular value should be at most 0.2.
Assert.assertTrue(h.estimateSelectivity(Op.EQUALS, -33) < 0.3);
// And it really shouldn't be 0.
// Though, it could easily be as low as 0.02, seeing as that's
// the fraction of elements that actually are equal to -33.
Assert.assertTrue(h.estimateSelectivity(Op.EQUALS, -33) > 0.001);
}
/**
* Make sure that equality binning does something reasonable.
*/
@Test public void opEqualsTest() {
IntHistogram h = new IntHistogram(10, 1, 10);
// Set some values
h.addValue(3);
h.addValue(3);
h.addValue(3);
// This really should return "1.0"; but,
// be conservative in case of alternate implementations
Assert.assertTrue(h.estimateSelectivity(Op.EQUALS, 3) > 0.8);
Assert.assertTrue(h.estimateSelectivity(Op.EQUALS, 8) < 0.001);
}
/**
* Make sure that GREATER_THAN binning does something reasonable.
*/
@Test public void opGreaterThanTest() {
IntHistogram h = new IntHistogram(10, 1, 10);
// Set some values
h.addValue(3);
h.addValue(3);
h.addValue(3);
h.addValue(1);
h.addValue(10);
// Be conservative in case of alternate implementations
Assert.assertTrue(h.estimateSelectivity(Op.GREATER_THAN, -1) > 0.999);
Assert.assertTrue(h.estimateSelectivity(Op.GREATER_THAN, 2) > 0.6);
Assert.assertTrue(h.estimateSelectivity(Op.GREATER_THAN, 4) < 0.4);
Assert.assertTrue(h.estimateSelectivity(Op.GREATER_THAN, 12) < 0.001);
}
/**
* Make sure that LESS_THAN binning does something reasonable.
*/
@Test public void opLessThanTest() {
IntHistogram h = new IntHistogram(10, 1, 10);
// Set some values
h.addValue(3);
h.addValue(3);
h.addValue(3);
h.addValue(1);
h.addValue(10);
// Be conservative in case of alternate implementations
Assert.assertTrue(h.estimateSelectivity(Op.LESS_THAN, -1) < 0.001);
Assert.assertTrue(h.estimateSelectivity(Op.LESS_THAN, 2) < 0.4);
Assert.assertTrue(h.estimateSelectivity(Op.LESS_THAN, 4) > 0.6);
Assert.assertTrue(h.estimateSelectivity(Op.LESS_THAN, 12) > 0.999);
}
/**
* Make sure that GREATER_THAN_OR_EQ binning does something reasonable.
*/
@Test public void opGreaterThanOrEqualsTest() {
IntHistogram h = new IntHistogram(10, 1, 10);
// Set some values
h.addValue(3);
h.addValue(3);
h.addValue(3);
h.addValue(1);
h.addValue(10);
// Be conservative in case of alternate implementations
Assert.assertTrue(h.estimateSelectivity(Op.GREATER_THAN_OR_EQ, -1) > 0.999);
Assert.assertTrue(h.estimateSelectivity(Op.GREATER_THAN_OR_EQ, 2) > 0.6);
Assert.assertTrue(h.estimateSelectivity(Op.GREATER_THAN_OR_EQ, 3) > 0.45);
Assert.assertTrue(h.estimateSelectivity(Op.GREATER_THAN_OR_EQ, 4) < 0.5);
Assert.assertTrue(h.estimateSelectivity(Op.GREATER_THAN_OR_EQ, 12) < 0.001);
}
/**
* Make sure that LESS_THAN_OR_EQ binning does something reasonable.
*/
@Test public void opLessThanOrEqualsTest() {
IntHistogram h = new IntHistogram(10, 1, 10);
// Set some values
h.addValue(3);
h.addValue(3);
h.addValue(3);
h.addValue(1);
h.addValue(10);
// Be conservative in case of alternate implementations
Assert.assertTrue(h.estimateSelectivity(Op.LESS_THAN_OR_EQ, -1) < 0.001);
Assert.assertTrue(h.estimateSelectivity(Op.LESS_THAN_OR_EQ, 2) < 0.4);
Assert.assertTrue(h.estimateSelectivity(Op.LESS_THAN_OR_EQ, 3) > 0.45);
Assert.assertTrue(h.estimateSelectivity(Op.LESS_THAN_OR_EQ, 4) > 0.6);
Assert.assertTrue(h.estimateSelectivity(Op.LESS_THAN_OR_EQ, 12) > 0.999);
}
/**
* Make sure that equality binning does something reasonable.
*/
@Test public void opNotEqualsTest() {
IntHistogram h = new IntHistogram(10, 1, 10);
// Set some values
h.addValue(3);
h.addValue(3);
h.addValue(3);
// Be conservative in case of alternate implementations
Assert.assertTrue(h.estimateSelectivity(Op.NOT_EQUALS, 3) < 0.001);
Assert.assertTrue(h.estimateSelectivity(Op.NOT_EQUALS, 8) > 0.01);
}
}