@@ -47,6 +47,17 @@ macro_rules! cast_scalar_f64 {
47
47
} ;
48
48
}
49
49
50
+ // Cast a non-null [`ScalarValue::UInt64`] to an [`u64`], or
51
+ // panic.
52
+ macro_rules! cast_scalar_u64 {
53
+ ( $value: expr ) => {
54
+ match & $value {
55
+ ScalarValue :: UInt64 ( Some ( v) ) => * v,
56
+ v => panic!( "invalid type {:?}" , v) ,
57
+ }
58
+ } ;
59
+ }
60
+
50
61
/// This trait is implemented for each type a [`TDigest`] can operate on,
51
62
/// allowing it to support both numerical rust types (obtained from
52
63
/// `PrimitiveArray` instances), and [`ScalarValue`] instances.
@@ -142,7 +153,7 @@ pub struct TDigest {
142
153
centroids : Vec < Centroid > ,
143
154
max_size : usize ,
144
155
sum : f64 ,
145
- count : f64 ,
156
+ count : u64 ,
146
157
max : f64 ,
147
158
min : f64 ,
148
159
}
@@ -153,7 +164,7 @@ impl TDigest {
153
164
centroids : Vec :: new ( ) ,
154
165
max_size,
155
166
sum : 0_f64 ,
156
- count : 0_f64 ,
167
+ count : 0 ,
157
168
max : f64:: NAN ,
158
169
min : f64:: NAN ,
159
170
}
@@ -164,14 +175,14 @@ impl TDigest {
164
175
centroids : vec ! [ centroid. clone( ) ] ,
165
176
max_size,
166
177
sum : centroid. mean * centroid. weight ,
167
- count : 1_f64 ,
178
+ count : 1 ,
168
179
max : centroid. mean ,
169
180
min : centroid. mean ,
170
181
}
171
182
}
172
183
173
184
#[ inline]
174
- pub fn count ( & self ) -> f64 {
185
+ pub fn count ( & self ) -> u64 {
175
186
self . count
176
187
}
177
188
@@ -203,16 +214,16 @@ impl Default for TDigest {
203
214
centroids : Vec :: new ( ) ,
204
215
max_size : 100 ,
205
216
sum : 0_f64 ,
206
- count : 0_f64 ,
217
+ count : 0 ,
207
218
max : f64:: NAN ,
208
219
min : f64:: NAN ,
209
220
}
210
221
}
211
222
}
212
223
213
224
impl TDigest {
214
- fn k_to_q ( k : f64 , d : f64 ) -> f64 {
215
- let k_div_d = k / d;
225
+ fn k_to_q ( k : u64 , d : usize ) -> f64 {
226
+ let k_div_d = k as f64 / d as f64 ;
216
227
if k_div_d >= 0.5 {
217
228
let base = 1.0 - k_div_d;
218
229
1.0 - 2.0 * base * base
@@ -244,12 +255,12 @@ impl TDigest {
244
255
}
245
256
246
257
let mut result = TDigest :: new ( self . max_size ( ) ) ;
247
- result. count = self . count ( ) + ( sorted_values. len ( ) as f64 ) ;
258
+ result. count = self . count ( ) + sorted_values. len ( ) as u64 ;
248
259
249
260
let maybe_min = * sorted_values. first ( ) . unwrap ( ) ;
250
261
let maybe_max = * sorted_values. last ( ) . unwrap ( ) ;
251
262
252
- if self . count ( ) > 0.0 {
263
+ if self . count ( ) > 0 {
253
264
result. min = self . min . min ( maybe_min) ;
254
265
result. max = self . max . max ( maybe_max) ;
255
266
} else {
@@ -259,10 +270,10 @@ impl TDigest {
259
270
260
271
let mut compressed: Vec < Centroid > = Vec :: with_capacity ( self . max_size ) ;
261
272
262
- let mut k_limit: f64 = 1.0 ;
273
+ let mut k_limit: u64 = 1 ;
263
274
let mut q_limit_times_count =
264
- Self :: k_to_q ( k_limit, self . max_size as f64 ) * result. count ( ) ;
265
- k_limit += 1.0 ;
275
+ Self :: k_to_q ( k_limit, self . max_size ) * result. count ( ) as f64 ;
276
+ k_limit += 1 ;
266
277
267
278
let mut iter_centroids = self . centroids . iter ( ) . peekable ( ) ;
268
279
let mut iter_sorted_values = sorted_values. iter ( ) . peekable ( ) ;
@@ -309,8 +320,8 @@ impl TDigest {
309
320
310
321
compressed. push ( curr. clone ( ) ) ;
311
322
q_limit_times_count =
312
- Self :: k_to_q ( k_limit, self . max_size as f64 ) * result. count ( ) ;
313
- k_limit += 1.0 ;
323
+ Self :: k_to_q ( k_limit, self . max_size ) * result. count ( ) as f64 ;
324
+ k_limit += 1 ;
314
325
curr = next;
315
326
}
316
327
}
@@ -381,16 +392,16 @@ impl TDigest {
381
392
let mut centroids: Vec < Centroid > = Vec :: with_capacity ( n_centroids) ;
382
393
let mut starts: Vec < usize > = Vec :: with_capacity ( digests. len ( ) ) ;
383
394
384
- let mut count: f64 = 0. 0;
395
+ let mut count = 0 ;
385
396
let mut min = f64:: INFINITY ;
386
397
let mut max = f64:: NEG_INFINITY ;
387
398
388
399
let mut start: usize = 0 ;
389
400
for digest in digests. iter ( ) {
390
401
starts. push ( start) ;
391
402
392
- let curr_count: f64 = digest. count ( ) ;
393
- if curr_count > 0.0 {
403
+ let curr_count = digest. count ( ) ;
404
+ if curr_count > 0 {
394
405
min = min. min ( digest. min ) ;
395
406
max = max. max ( digest. max ) ;
396
407
count += curr_count;
@@ -424,8 +435,8 @@ impl TDigest {
424
435
let mut result = TDigest :: new ( max_size) ;
425
436
let mut compressed: Vec < Centroid > = Vec :: with_capacity ( max_size) ;
426
437
427
- let mut k_limit: f64 = 1.0 ;
428
- let mut q_limit_times_count = Self :: k_to_q ( k_limit, max_size as f64 ) * ( count) ;
438
+ let mut k_limit = 1 ;
439
+ let mut q_limit_times_count = Self :: k_to_q ( k_limit, max_size) * count as f64 ;
429
440
430
441
let mut iter_centroids = centroids. iter_mut ( ) ;
431
442
let mut curr = iter_centroids. next ( ) . unwrap ( ) ;
@@ -444,8 +455,8 @@ impl TDigest {
444
455
sums_to_merge = 0_f64 ;
445
456
weights_to_merge = 0_f64 ;
446
457
compressed. push ( curr. clone ( ) ) ;
447
- q_limit_times_count = Self :: k_to_q ( k_limit, max_size as f64 ) * ( count) ;
448
- k_limit += 1.0 ;
458
+ q_limit_times_count = Self :: k_to_q ( k_limit, max_size) * count as f64 ;
459
+ k_limit += 1 ;
449
460
curr = centroid;
450
461
}
451
462
}
@@ -468,8 +479,7 @@ impl TDigest {
468
479
return 0.0 ;
469
480
}
470
481
471
- let count_ = self . count ;
472
- let rank = q * count_;
482
+ let rank = q * self . count as f64 ;
473
483
474
484
let mut pos: usize ;
475
485
let mut t;
@@ -479,7 +489,7 @@ impl TDigest {
479
489
}
480
490
481
491
pos = 0 ;
482
- t = count_ ;
492
+ t = self . count as f64 ;
483
493
484
494
for ( k, centroid) in self . centroids . iter ( ) . enumerate ( ) . rev ( ) {
485
495
t -= centroid. weight ( ) ;
@@ -581,7 +591,7 @@ impl TDigest {
581
591
vec ! [
582
592
ScalarValue :: UInt64 ( Some ( self . max_size as u64 ) ) ,
583
593
ScalarValue :: Float64 ( Some ( self . sum) ) ,
584
- ScalarValue :: Float64 ( Some ( self . count) ) ,
594
+ ScalarValue :: UInt64 ( Some ( self . count) ) ,
585
595
ScalarValue :: Float64 ( Some ( self . max) ) ,
586
596
ScalarValue :: Float64 ( Some ( self . min) ) ,
587
597
ScalarValue :: List ( arr) ,
@@ -627,7 +637,7 @@ impl TDigest {
627
637
Self {
628
638
max_size,
629
639
sum : cast_scalar_f64 ! ( state[ 1 ] ) ,
630
- count : cast_scalar_f64 ! ( & state[ 2 ] ) ,
640
+ count : cast_scalar_u64 ! ( & state[ 2 ] ) ,
631
641
max,
632
642
min,
633
643
centroids,
0 commit comments