Skip to content

Commit 49df02b

Browse files
authored
chore: align log format with upstream implementation (#5)
Align output of histogram percentile distribution with upstream Java implementation.
2 parents f1b50a2 + 760aac6 commit 49df02b

File tree

8 files changed

+272
-221
lines changed

8 files changed

+272
-221
lines changed

Package.resolved

Lines changed: 0 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,5 @@
11
{
22
"pins" : [
3-
{
4-
"identity" : "spimanifest",
5-
"kind" : "remoteSourceControl",
6-
"location" : "https://github.com/SwiftPackageIndex/SPIManifest.git",
7-
"state" : {
8-
"revision" : "268fab2006be5c11411994bc76f429d9971a840a",
9-
"version" : "0.15.0"
10-
}
11-
},
123
{
134
"identity" : "swift-docc-plugin",
145
"kind" : "remoteSourceControl",
@@ -35,24 +26,6 @@
3526
"revision" : "0a5bc04095a675662cf24757cc0640aa2204253b",
3627
"version" : "1.0.2"
3728
}
38-
},
39-
{
40-
"identity" : "texttable",
41-
"kind" : "remoteSourceControl",
42-
"location" : "https://github.com/ordo-one/TextTable",
43-
"state" : {
44-
"revision" : "a27a07300cf4ae322e0079ca0a475c5583dd575f",
45-
"version" : "0.0.2"
46-
}
47-
},
48-
{
49-
"identity" : "yams",
50-
"kind" : "remoteSourceControl",
51-
"location" : "https://github.com/jpsim/Yams.git",
52-
"state" : {
53-
"revision" : "4889c132978bc6ad3e80f680011ec3dd4fead90c",
54-
"version" : "5.0.4"
55-
}
5629
}
5730
],
5831
"version" : 2

Package.swift

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,7 @@ let package = Package(
2222
dependencies: [
2323
// Dependencies declare other packages that this package depends on.
2424
.package(url: "https://github.com/apple/swift-numerics", from: "1.0.0"),
25-
.package(url: "https://github.com/ordo-one/TextTable", .upToNextMajor(from: "0.0.1")),
2625
.package(url: "https://github.com/apple/swift-docc-plugin", from: "1.1.0"),
27-
.package(url: "https://github.com/SwiftPackageIndex/SPIManifest.git", from: "0.12.0"),
2826
],
2927
targets: [
3028
// Targets are the basic building blocks of a package. A target can define a module or a test suite.
@@ -33,7 +31,6 @@ let package = Package(
3331
name: "Histogram",
3432
dependencies: [
3533
.product(name: "Numerics", package: "swift-numerics"),
36-
.product(name: "TextTable", package: "TextTable"),
3734
]),
3835
.executableTarget(
3936
name: "HistogramExample",

Sources/Histogram/Histogram.swift

Lines changed: 85 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,10 @@
88
// http://www.apache.org/licenses/LICENSE-2.0
99
//
1010

11+
// swiftlint:disable file_length type_body_length line_length identifier_name
12+
13+
import Foundation
1114
import Numerics
12-
import TextTable
1315

1416
/**
1517
* Number of significant digits for values recorded in histogram.
@@ -49,7 +51,6 @@ public enum HistogramOutputFormat {
4951
* they are encountered. Note that recording calls that cause auto-resizing may take longer to execute, as resizing
5052
* incurs allocation and copying of internal data structures.
5153
*/
52-
5354
public struct Histogram<Count: FixedWidthInteger> {
5455
/// The lowest value that can be discerned (distinguished from 0) by the histogram.
5556
public let lowestDiscernibleValue: UInt64
@@ -69,17 +70,13 @@ public struct Histogram<Count: FixedWidthInteger> {
6970
// Biggest value that can fit in bucket 0
7071
let subBucketMask: UInt64
7172

72-
@usableFromInline
73-
var maxValue: UInt64 = 0
73+
@usableFromInline var maxValue: UInt64 = 0
7474

75-
@usableFromInline
76-
var minNonZeroValue: UInt64 = .max
75+
@usableFromInline var minNonZeroValue: UInt64 = .max
7776

78-
@usableFromInline
79-
var counts: [Count]
77+
@usableFromInline var counts: [Count]
8078

81-
@usableFromInline
82-
var _totalCount: UInt64 = 0
79+
@usableFromInline var _totalCount: UInt64 = 0
8380

8481
/// Total count of all recorded values in the histogram
8582
public var totalCount: UInt64 { _totalCount }
@@ -88,7 +85,7 @@ public struct Histogram<Count: FixedWidthInteger> {
8885
public let numberOfSignificantValueDigits: SignificantDigits
8986

9087
/// Control whether or not the histogram can auto-resize and auto-adjust its ``highestTrackableValue``.
91-
public var autoResize: Bool = false
88+
public var autoResize = false
9289

9390
let subBucketHalfCountMagnitude: UInt8
9491

@@ -167,7 +164,7 @@ public struct Histogram<Count: FixedWidthInteger> {
167164
// fits in 62 bits is debatable, and it makes it harder to work through the logic.
168165
// Sums larger than 64 are totally broken as leadingZeroCountBase would go negative.
169166
precondition(unitMagnitude + subBucketHalfCountMagnitude <= 61,
170-
"Invalid arguments: Cannot represent numberOfSignificantValueDigits worth of values beyond lowestDiscernibleValue")
167+
"Invalid arguments: Cannot represent numberOfSignificantValueDigits worth of values beyond lowestDiscernibleValue")
171168

172169
// Establish leadingZeroCountBase, used in bucketIndexForValue() fast path:
173170
// subtract the bits that would be used by the largest value in bucket 0.
@@ -226,8 +223,12 @@ public struct Histogram<Count: FixedWidthInteger> {
226223
return false
227224
}
228225

229-
if index >= counts.count && autoResize {
230-
resize(newHighestTrackableValue: value)
226+
if index >= counts.count {
227+
if autoResize {
228+
resize(newHighestTrackableValue: value)
229+
} else {
230+
return false
231+
}
231232
}
232233

233234
incrementCountForIndex(index, by: count)
@@ -472,7 +473,7 @@ public struct Histogram<Count: FixedWidthInteger> {
472473
* - Returns: The mean value (in value units) of the histogram data.
473474
*/
474475
public var mean: Double {
475-
if (totalCount == 0) {
476+
if totalCount == 0 {
476477
return 0.0
477478
}
478479
var totalValue: Double = 0
@@ -488,7 +489,7 @@ public struct Histogram<Count: FixedWidthInteger> {
488489
* - Returns: The standard deviation (in value units) of the histogram data.
489490
*/
490491
public var stdDeviation: Double {
491-
if (totalCount == 0) {
492+
if totalCount == 0 {
492493
return 0.0
493494
}
494495

@@ -515,7 +516,7 @@ public struct Histogram<Count: FixedWidthInteger> {
515516
/**
516517
* Represents a value point iterated through in a Histogram, with associated stats.
517518
*/
518-
public struct IterationValue {
519+
public struct IterationValue: Equatable {
519520
/**
520521
* The actual value level that was iterated to by the iterator.
521522
*/
@@ -537,6 +538,15 @@ public struct Histogram<Count: FixedWidthInteger> {
537538
*/
538539
public let percentile: Double
539540

541+
/**
542+
* The percentile level that the iterator returning this ``IterationValue`` had iterated to.
543+
* Generally, `percentileLevelIteratedTo` will be equal to or smaller than `percentile`,
544+
* but the same value point can contain multiple iteration levels for some iterators. E.g. a
545+
* percentile iterator can stop multiple times in the exact same value point (if the count at
546+
* that value covers a range of multiple percentiles in the requested percentile iteration points).
547+
*/
548+
public let percentileLevelIteratedTo: Double
549+
540550
/**
541551
* The count of recorded values in the histogram that were added to the ``totalCountToThisValue`` as a result
542552
* on this iteration step. Since multiple iteration steps may occur with overlapping equivalent value ranges,
@@ -577,7 +587,7 @@ public struct Histogram<Count: FixedWidthInteger> {
577587

578588
var countAtThisValue: Count = 0
579589

580-
private var freshSubBucket: Bool = true
590+
private var freshSubBucket = true
581591

582592
init(histogram: Histogram) {
583593
self.histogram = histogram
@@ -600,16 +610,19 @@ public struct Histogram<Count: FixedWidthInteger> {
600610
}
601611
}
602612

603-
mutating func makeIterationValueAndUpdatePrev(value: UInt64? = nil) -> IterationValue {
613+
mutating func makeIterationValueAndUpdatePrev(value: UInt64? = nil, percentileIteratedTo: Double? = nil) -> IterationValue {
604614
let valueIteratedTo = value ?? self.valueIteratedTo
605615

606616
defer {
607617
prevValueIteratedTo = valueIteratedTo
608618
totalCountToPrevIndex = totalCountToCurrentIndex
609619
}
610620

611-
return IterationValue(value: valueIteratedTo, prevValue: prevValueIteratedTo, count: countAtThisValue,
612-
percentile: (100.0 * Double(totalCountToCurrentIndex)) / Double(arrayTotalCount),
621+
let percentile = (100.0 * Double(totalCountToCurrentIndex)) / Double(arrayTotalCount)
622+
623+
return IterationValue(
624+
value: valueIteratedTo, prevValue: prevValueIteratedTo, count: countAtThisValue,
625+
percentile: percentile, percentileLevelIteratedTo: percentileIteratedTo ?? percentile,
613626
countAddedInThisIterationStep: totalCountToCurrentIndex - totalCountToPrevIndex,
614627
totalCountToThisValue: totalCountToCurrentIndex, totalValueToThisValue: totalValueToCurrentIndex)
615628
}
@@ -665,7 +678,7 @@ public struct Histogram<Count: FixedWidthInteger> {
665678
defer {
666679
incrementIterationLevel()
667680
}
668-
return impl.makeIterationValueAndUpdatePrev()
681+
return impl.makeIterationValueAndUpdatePrev(percentileIteratedTo: percentileLevelToIterateTo)
669682
}
670683
impl.incrementSubBucket()
671684
}
@@ -1012,69 +1025,66 @@ public struct Histogram<Count: FixedWidthInteger> {
10121025
outputValueUnitScalingRatio: Double,
10131026
percentileTicksPerHalfDistance ticks: Int = 5,
10141027
format: HistogramOutputFormat = .plainText) {
1028+
// small helper to pad strings to specific widths, for some reason "%10s"/"%10@" doesn't work in String.init(format:)
1029+
func padded(_ s: String, to: Int) -> String {
1030+
if s.count < to {
1031+
return String(repeating: " ", count: to - s.count) + s
1032+
}
1033+
return s
1034+
}
10151035

10161036
if format == .csv {
1017-
return outputPercentileDistributionCsv(to: &stream, outputValueUnitScalingRatio: outputValueUnitScalingRatio, percentileTicksPerHalfDistance: ticks)
1037+
stream.write("\"Value\",\"Percentile\",\"TotalCount\",\"1/(1-Percentile)\"\n")
1038+
} else {
1039+
stream.write("\(padded("Value", to: 12)) \(padded("Percentile", to: 14)) \(padded("TotalCount", to: 10)) \(padded("1/(1-Percentile)", to: 14))\n\n")
10181040
}
10191041

1020-
let table = TextTable<IterationValue> {
1021-
let lastLine = ($0.percentile == 100.0)
1042+
let percentileFormatString = format == .csv ?
1043+
"%.\(numberOfSignificantValueDigits.rawValue)f,%.12f,%d,%.2f\n" :
1044+
"%12.\(numberOfSignificantValueDigits.rawValue)f %2.12f %10d %14.2f\n"
10221045

1023-
return [
1024-
Column("Value" <- "%.\(self.numberOfSignificantValueDigits.rawValue)f".format(Double($0.value) / outputValueUnitScalingRatio), width: 12, align: .right),
1025-
Column("Percentile" <- "%.12f".format($0.percentile / 100.0), width: 14, align: .right),
1026-
Column("TotalCount" <- $0.totalCountToThisValue, width: 10, align: .right),
1027-
Column("1/(1-Percentile)" <- (lastLine ? "" : "%.2f".format(1.0 / (1.0 - ($0.percentile / 100.0)))), align: .right)
1028-
]
1029-
}
1030-
1031-
let data = [IterationValue](percentiles(ticksPerHalfDistance: ticks))
1032-
stream.write(table.string(for: data) ?? "unable to render percentile table")
1033-
1034-
// Calculate and output mean and std. deviation.
1035-
// Note: mean/std. deviation numbers are very often completely irrelevant when
1036-
// data is extremely non-normal in distribution (e.g. in cases of strong multi-modal
1037-
// response time distribution associated with GC pauses). However, reporting these numbers
1038-
// can be very useful for contrasting with the detailed percentile distribution
1039-
// reported by outputPercentileDistribution(). It is not at all surprising to find
1040-
// percentile distributions where results fall many tens or even hundreds of standard
1041-
// deviations away from the mean - such results simply indicate that the data sampled
1042-
// exhibits a very non-normal distribution, highlighting situations for which the std.
1043-
// deviation metric is a useless indicator.
1044-
1045-
let mean = self.mean / outputValueUnitScalingRatio
1046-
let stdDeviation = self.stdDeviation / outputValueUnitScalingRatio
1047-
1048-
stream.write(("#[Mean = %12.\(numberOfSignificantValueDigits.rawValue)f," +
1049-
" StdDeviation = %12.\(numberOfSignificantValueDigits.rawValue)f]\n").format(mean, stdDeviation))
1050-
stream.write(("#[Max = %12.\(numberOfSignificantValueDigits.rawValue)f," +
1051-
" Total count = %12d]\n").format(Double(max) / outputValueUnitScalingRatio, totalCount))
1052-
stream.write("#[Buckets = %12d, SubBuckets = %12d]\n".format(bucketCount, subBucketCount))
1053-
}
1054-
1055-
private func outputPercentileDistributionCsv<Stream: TextOutputStream>(
1056-
to stream: inout Stream,
1057-
outputValueUnitScalingRatio: Double,
1058-
percentileTicksPerHalfDistance ticks: Int = 5) {
1059-
stream.write("\"Value\",\"Percentile\",\"TotalCount\",\"1/(1-Percentile)\"\n")
1060-
1061-
let percentileFormatString = "%.\(numberOfSignificantValueDigits)f,%.12f,%d,%.2f\n"
1062-
let lastLinePercentileFormatString = "%.\(numberOfSignificantValueDigits)f,%.12f,%d,Infinity\n"
1046+
let lastLinePercentileFormatString = format == .csv ?
1047+
"%.\(numberOfSignificantValueDigits.rawValue)f,%.12f,%d,Infinity\n" :
1048+
"%12.\(numberOfSignificantValueDigits.rawValue)f %2.12f %10d\n"
10631049

10641050
for iv in percentiles(ticksPerHalfDistance: ticks) {
1065-
if iv.percentile != 100.0 {
1066-
stream.write(percentileFormatString.format(
1051+
if iv.percentileLevelIteratedTo != 100.0 {
1052+
stream.write(String(
1053+
format: percentileFormatString,
10671054
Double(iv.value) / outputValueUnitScalingRatio,
1068-
iv.percentile / 100.0,
1055+
iv.percentileLevelIteratedTo / 100.0,
10691056
iv.totalCountToThisValue,
1070-
1.0 / (1.0 - (iv.percentile / 100.0))))
1057+
1.0 / (1.0 - (iv.percentileLevelIteratedTo / 100.0))))
10711058
} else {
1072-
stream.write(lastLinePercentileFormatString.format(
1059+
stream.write(String(
1060+
format: lastLinePercentileFormatString,
10731061
Double(iv.value) / outputValueUnitScalingRatio,
1074-
iv.percentile / 100.0,
1062+
iv.percentileLevelIteratedTo / 100.0,
10751063
iv.totalCountToThisValue))
10761064
}
10771065
}
1066+
1067+
if format != .csv {
1068+
// Calculate and output mean and std. deviation.
1069+
// Note: mean/std. deviation numbers are very often completely irrelevant when
1070+
// data is extremely non-normal in distribution (e.g. in cases of strong multi-modal
1071+
// response time distribution associated with GC pauses). However, reporting these numbers
1072+
// can be very useful for contrasting with the detailed percentile distribution
1073+
// reported by outputPercentileDistribution(). It is not at all surprising to find
1074+
// percentile distributions where results fall many tens or even hundreds of standard
1075+
// deviations away from the mean - such results simply indicate that the data sampled
1076+
// exhibits a very non-normal distribution, highlighting situations for which the std.
1077+
// deviation metric is a useless indicator.
1078+
//
1079+
1080+
let mean = self.mean / outputValueUnitScalingRatio
1081+
let stdDeviation = self.stdDeviation / outputValueUnitScalingRatio
1082+
stream.write(String(format: "#[Mean = %12.\(numberOfSignificantValueDigits.rawValue)f," +
1083+
" StdDeviation = %12.\(numberOfSignificantValueDigits.rawValue)f]\n", mean, stdDeviation))
1084+
stream.write(String(format: "#[Max = %12.\(numberOfSignificantValueDigits.rawValue)f," +
1085+
" Total count = %12d]\n", Double(max) / outputValueUnitScalingRatio, totalCount))
1086+
stream.write(String(format: "#[Buckets = %12d, SubBuckets = %12d]\n", bucketCount, subBucketCount))
1087+
}
10781088
}
10791089

10801090
// MARK: Structure querying support.
@@ -1257,8 +1267,8 @@ public struct Histogram<Count: FixedWidthInteger> {
12571267
private static func bucketsNeededToCoverValue(_ value: UInt64, subBucketCount: Int, unitMagnitude: UInt8) -> Int {
12581268
var smallestUntrackableValue = UInt64(subBucketCount) << unitMagnitude
12591269
var bucketsNeeded = 1
1260-
while (smallestUntrackableValue <= value) {
1261-
if (smallestUntrackableValue > UInt64.max / 2) {
1270+
while smallestUntrackableValue <= value {
1271+
if smallestUntrackableValue > UInt64.max / 2 {
12621272
return bucketsNeeded + 1
12631273
}
12641274
smallestUntrackableValue <<= 1
@@ -1301,6 +1311,7 @@ extension Histogram: Equatable {
13011311
// resizing.
13021312
if lhs.counts.count == rhs.counts.count {
13031313
for i in 0..<lhs.counts.count {
1314+
// swiftlint:disable:next for_where
13041315
if lhs.counts[i] != rhs.counts[i] {
13051316
return false
13061317
}
@@ -1309,6 +1320,7 @@ extension Histogram: Equatable {
13091320
// Comparing the values is valid here because we have already confirmed the histograms have the same total
13101321
// count. It would not be correct otherwise.
13111322
for iv in lhs.recordedValues() {
1323+
// swiftlint:disable:next for_where
13121324
if rhs.countForValue(iv.value) != iv.count {
13131325
return false
13141326
}

0 commit comments

Comments
 (0)