From bcbea9d37beb8ab9e32de7b92a263dd7a0bf3c13 Mon Sep 17 00:00:00 2001 From: "Bobby R. Bruce" Date: Wed, 13 Jan 2021 16:03:03 -0800 Subject: [PATCH 1/3] An updated to the JSON distribution schema This distribution reformatting has the following benefits: - The min and max of a given bin are clearly stated. - Permits statistics with bins of different sizes or sparce bins. - The 'numBins' and 'binSize', 'min', and 'max' stats are no explicitly (and redundantly) stated. They may be derived easily if needed. --- simstats.schema.json | 36 +++++++++++++++--------------------- 1 file changed, 15 insertions(+), 21 deletions(-) diff --git a/simstats.schema.json b/simstats.schema.json index 45be31c..a80f934 100644 --- a/simstats.schema.json +++ b/simstats.schema.json @@ -26,6 +26,18 @@ "required": ["scaleFactor"] }, + "bin": { + "type": "object", + "title": "A Distribution bin", + "description": "An object describing a single bin within a distribution.", + "properties" : { + "min": { "type": "number" }, + "max": { "type": "number" }, + "value": { "type": "integer", "minimum": 0} + }, + "required": ["min","max","value"] + }, + "statistic" : { "type": "object", "title": "Statistic", @@ -66,35 +78,17 @@ "distribution": { "allOf": [ {"$ref": "#/definitions/statistic"} ], "title": "Distribution", - "description": "A distribution of statistic values. This is an aggregated statistic which is a summary of many sampled simulator events. The `value` property holds the number of instances in each bin (i.e., this is a histogram). Bins can be defined either with a set size by using `binSize` and `numBins`, or `bins` can be defined with an array. The value should be an array with a length equal to `numBins` or the length of `bins` - 1. If defined as an array, the `bins` property has the following form: `[bin0 start, bin0 end, bin1 end, bin2 end, ... binN end]`. `count` is the total number of things added to the histogram. `min` and `max` are the minimum and maximum of the samples added to the distribution. `sumSquared` is the sum of each sample squared.", + "description": "A distribution of statistic values. This is an aggregated statistic which is a summary of many sampled simulator events. The `value` property is an array of bins, with each bin recording the count of samples between a maximum and minimum range (inclusive). This may be viewed as a histogram, though they are no requirements for continious ranges, or equal sizing of bins. The array is ordered in an assending manner from the bin with the smallest minimum to that of the largest minimum. In cases where the maximum value of a bin is equal to that of the minimum value of the next bin, any samples of that value will be counted in the preceeding bin (i.e., that where the sample's value is equal to that of the bin's stated range minimum).", "properties": { "type": { "const": "Distribution" }, "value": { "type": "array", - "items": { "type": "integer", "minimum": 0 } - }, - "bins": { - "type": "array", - "items": { "type": "number" } + "items": { "$ref": "#/definitions/bin" } }, - "binSize": { "type": "number", "minimum": 0 }, - "numBins": { "type": "integer", "minimum": 1 }, - "count": { "type": "integer", "minimum": 0 }, - "min": { "type": "integer", "minimum": 0 }, - "max": { "type": "integer", "minimum": 0 }, "sumSquared": { "type": "integer", "minimum": 0 } }, - "oneOf": [ - { - "required": ["value", "bins"] - }, - { - "required": ["value", "binSize", "numBins"] - } - ], - - "$comment": "The `oneOf` above requires either `value` and `bins` or `value`, `binSize`, and `numBins` to allow either description of a distribution." + "required": ["value"] }, "accumulator": { From 5ae4aebc1f54b241367bc875904499c02cebaba2 Mon Sep 17 00:00:00 2001 From: BobbyRBruce <43390222+BobbyRBruce@users.noreply.github.com> Date: Mon, 18 Jan 2021 11:35:44 -0800 Subject: [PATCH 2/3] Use 'count' instead of 'value' in bins Co-authored-by: Jason Lowe-Power --- simstats.schema.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/simstats.schema.json b/simstats.schema.json index a80f934..11febb1 100644 --- a/simstats.schema.json +++ b/simstats.schema.json @@ -33,7 +33,7 @@ "properties" : { "min": { "type": "number" }, "max": { "type": "number" }, - "value": { "type": "integer", "minimum": 0} + "count": { "type": "integer", "minimum": 0} }, "required": ["min","max","value"] }, From 2c0d9ccd24cfdf808dfdbbe1f45d6068a7b4158e Mon Sep 17 00:00:00 2001 From: "Bobby R. Bruce" Date: Mon, 18 Jan 2021 13:10:23 -0800 Subject: [PATCH 3/3] Updated simstats.schema.json * Added new properties of a Distribtion * Allowed the value of a distribution to be an array of numbers. This allows for a simple distribution with a fixed bin size, starting from zero. --- simstats.schema.json | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/simstats.schema.json b/simstats.schema.json index 11febb1..e1c9dd0 100644 --- a/simstats.schema.json +++ b/simstats.schema.json @@ -33,10 +33,10 @@ "properties" : { "min": { "type": "number" }, "max": { "type": "number" }, - "count": { "type": "integer", "minimum": 0} + "count": { "type": "integer", "minimum": 0 } }, - "required": ["min","max","value"] - }, + "required": ["min","max","count"] + }, "statistic" : { "type": "object", @@ -84,9 +84,21 @@ "type": { "const": "Distribution" }, "value": { "type": "array", - "items": { "$ref": "#/definitions/bin" } + "items": { + "oneOf": [ + {"$ref": "#/definitions/bin"}, + {"type": "integer"} + ] + } }, - "sumSquared": { "type": "integer", "minimum": 0 } + "binSize": { "type": "number"}, + "sum": { "type": "number" }, + "minVal": { "type": "number" }, + "maxVal": { "type": "number" }, + "sumSquared": { "type": "integer", "minimum": 0 }, + "underflow": { "type":" integer", "minimum": 0 }, + "overflow": { "type": "integer", "maximum": 0 }, + "logs": { "type": "number"} }, "required": ["value"] },