27
27
28
28
namespace datasketches {
29
29
30
+ /* *
31
+ * Sorted view for quantiles sketches (REQ, KLL and Quantiles)
32
+ */
30
33
template <
31
34
typename T,
32
35
typename Comparator, // strict weak ordering function (see C++ named requirements: Compare)
33
36
typename Allocator
34
37
>
35
38
class quantiles_sorted_view {
36
39
public:
40
+ // / Entry type
37
41
using Entry = typename std::conditional<std::is_arithmetic<T>::value, std::pair<T, uint64_t >, std::pair<const T*, uint64_t >>::type;
38
42
using AllocEntry = typename std::allocator_traits<Allocator>::template rebind_alloc<Entry>;
39
43
using Container = std::vector<Entry, AllocEntry>;
40
44
45
+ // / @private
41
46
quantiles_sorted_view (uint32_t num, const Comparator& comparator, const Allocator& allocator);
42
47
48
+ // / @private
43
49
template <typename Iterator>
44
50
void add (Iterator begin, Iterator end, uint64_t weight);
45
51
52
+ // / @private
46
53
void convert_to_cummulative ();
47
54
48
55
class const_iterator ;
56
+
57
+ /* *
58
+ * Iterator pointing to the first entry in the view.
59
+ * If the view is empty, the returned iterator must not be dereferenced or incremented.
60
+ * @return iterator pointing to the first entry
61
+ */
49
62
const_iterator begin () const ;
63
+
64
+ /* *
65
+ * Iterator pointing to the past-the-end entry in the view.
66
+ * The past-the-end entry is the hypothetical entry that would follow the last entry.
67
+ * It does not point to any entry, and must not be dereferenced or incremented.
68
+ * @return iterator pointing to the past-the-end entry
69
+ */
50
70
const_iterator end () const ;
51
71
72
+ // / @return size of the view
52
73
size_t size () const ;
53
74
75
+ /* *
76
+ * Returns an approximation to the normalized rank of the given item.
77
+ *
78
+ * <p>If the view is empty this throws std::runtime_error.
79
+ *
80
+ * @param item to be ranked
81
+ * @param inclusive if true the weight of the given item is included into the rank.
82
+ * Otherwise the rank equals the sum of the weights of all items that are less than the given item
83
+ * according to the Comparator.
84
+ *
85
+ * @return an approximate normalized rank of the given item (0 to 1 inclusive)
86
+ */
54
87
double get_rank (const T& item, bool inclusive = true ) const ;
55
88
89
+ /* *
90
+ * Quantile return type.
91
+ * This is to return quantiles either by value (for arithmetic types) or by const reference (for all other types)
92
+ */
56
93
using quantile_return_type = typename std::conditional<std::is_arithmetic<T>::value, T, const T&>::type;
94
+
95
+ /* *
96
+ * Returns an item from the sketch that is the best approximation to an item
97
+ * from the original stream with the given normalized rank.
98
+ *
99
+ * <p>If the view is empty this throws std::runtime_error.
100
+ *
101
+ * @param rank of an item in the hypothetical sorted stream.
102
+ * @param inclusive if true, the given rank is considered inclusive (includes weight of an item)
103
+ *
104
+ * @return approximate quantile associated with the given normalized rank
105
+ */
57
106
quantile_return_type get_quantile (double rank, bool inclusive = true ) const ;
58
107
59
108
using vector_double = std::vector<double , typename std::allocator_traits<Allocator>::template rebind_alloc<double >>;
109
+
110
+ /* *
111
+ * Returns an approximation to the Cumulative Distribution Function (CDF), which is the
112
+ * cumulative analog of the PMF, of the input stream given a set of split points (items).
113
+ *
114
+ * <p>If the view is empty this throws std::runtime_error.
115
+ *
116
+ * @param split_points an array of <i>m</i> unique, monotonically increasing items
117
+ * that divide the input domain into <i>m+1</i> consecutive disjoint intervals.
118
+ *
119
+ * @param size the number of split points in the array
120
+ *
121
+ * @param inclusive if true the rank of an item includes its own weight, and therefore
122
+ * if the sketch contains items equal to a slit point, then in CDF such items are
123
+ * included into the interval to the left of split point. Otherwise they are included into
124
+ * the interval to the right of split point.
125
+ *
126
+ * @return an array of m+1 doubles, which are a consecutive approximation to the CDF
127
+ * of the input stream given the split_points. The value at array position j of the returned
128
+ * CDF array is the sum of the returned values in positions 0 through j of the returned PMF
129
+ * array. This can be viewed as array of ranks of the given split points plus one more value
130
+ * that is always 1.
131
+ */
60
132
vector_double get_CDF (const T* split_points, uint32_t size, bool inclusive = true ) const ;
133
+
134
+ /* *
135
+ * Returns an approximation to the Probability Mass Function (PMF) of the input stream
136
+ * given a set of split points (items).
137
+ *
138
+ * <p>If the view is empty this throws std::runtime_error.
139
+ *
140
+ * @param split_points an array of <i>m</i> unique, monotonically increasing items
141
+ * that divide the input domain into <i>m+1</i> consecutive disjoint intervals (bins).
142
+ *
143
+ * @param size the number of split points in the array
144
+ *
145
+ * @param inclusive if true the rank of an item includes its own weight, and therefore
146
+ * if the sketch contains items equal to a slit point, then in PMF such items are
147
+ * included into the interval to the left of split point. Otherwise they are included into the interval
148
+ * to the right of split point.
149
+ *
150
+ * @return an array of m+1 doubles each of which is an approximation
151
+ * to the fraction of the input stream items (the mass) that fall into one of those intervals.
152
+ */
61
153
vector_double get_PMF (const T* split_points, uint32_t size, bool inclusive = true ) const ;
62
154
63
155
private:
@@ -122,8 +214,6 @@ class quantiles_sorted_view<T, C, A>::const_iterator: public quantiles_sorted_vi
122
214
using Base = typename quantiles_sorted_view<T, C, A>::Container::const_iterator;
123
215
using value_type = typename std::conditional<std::is_arithmetic<T>::value, typename Base::value_type, std::pair<const T&, const uint64_t >>::type;
124
216
125
- const_iterator (const Base& it, const Base& begin): Base(it), begin(begin) {}
126
-
127
217
template <typename TT = T, typename std::enable_if<std::is_arithmetic<TT>::value, int >::type = 0 >
128
218
const value_type operator *() const { return Base::operator *(); }
129
219
@@ -147,6 +237,9 @@ class quantiles_sorted_view<T, C, A>::const_iterator: public quantiles_sorted_vi
147
237
148
238
private:
149
239
Base begin;
240
+
241
+ friend class quantiles_sorted_view <T, C, A>;
242
+ const_iterator (const Base& it, const Base& begin): Base(it), begin(begin) {}
150
243
};
151
244
152
245
} /* namespace datasketches */
0 commit comments