From a37e5dbac2f8fd68e3de7b99b4e70b746c93aa02 Mon Sep 17 00:00:00 2001 From: Yao Yue Date: Tue, 9 Jul 2024 14:35:22 -0700 Subject: [PATCH] Release 1.1.0 * add methods to load dense or sparse counts * remove accidental dependency update * fix typo * version bump * fix incomplete doc message * add github action to publish to npm * adjust token permission to write * fix permission --- .github/workflows/npm-publish.yml | 34 ++++++++++++++++++ package.json | 2 +- src/index.js | 58 ++++++++++++++++++++++--------- src/index.test.js | 31 ++++++++++++++--- 4 files changed, 103 insertions(+), 22 deletions(-) create mode 100644 .github/workflows/npm-publish.yml diff --git a/.github/workflows/npm-publish.yml b/.github/workflows/npm-publish.yml new file mode 100644 index 0000000..5db0f15 --- /dev/null +++ b/.github/workflows/npm-publish.yml @@ -0,0 +1,34 @@ +name: npm-publish +on: + push: + branches: + - main # Change this to your default branch +jobs: + npm-publish: + name: npm-publish + runs-on: ubuntu-latest + permissions: + contents: write + actions: write + steps: + - name: Checkout repository + uses: actions/checkout@v4 + - name: Run tests + uses: actions/setup-node@v3 + with: + node-version: '20' + - run: npm ci + - run: npm test + - name: Publish if version has been updated + uses: pascalgn/npm-publish-action@1.3.9 + with: # All of theses inputs are optional + tag_name: 'v%s' + tag_message: 'v%s' + create_tag: 'true' + commit_pattern: "^Release (\\S+)" + workspace: '.' + publish_command: 'yarn' + publish_args: '--non-interactive' + env: # More info about the environment variables in the README + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # Leave this as is, it's automatically generated + NPM_AUTH_TOKEN: ${{ secrets.NPM_AUTH_TOKEN }} # You need to set this in your repo settings diff --git a/package.json b/package.json index a5e5c69..82c3511 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "type": "module", "name": "h2-histogram", - "version": "1.0.2", + "version": "1.1.0", "description": "The H2Histogram provides a histogram that is conceptually similar to HdrHistogram, but noticeably faster due to the use of base-2 buckets and efficient bit operations.", "main": "src/index.js", "scripts": { diff --git a/src/index.js b/src/index.js index f89cf94..3b61c84 100644 --- a/src/index.js +++ b/src/index.js @@ -4,12 +4,12 @@ export class H2Encoding { /** * H2Encoding encodes values from the integer range [0, 2^n) into base-2 logarithmic * bins with a controllable relative error bound. - * + * * The number of bins must be less than 2^32, and the largest encodable value must * be less than 2^53. - * + * * The histogram is designed to encode integer values only. - * + * * @param {object} options * @param {number} options.a - The `a` parameter controls the width of bins on * the low end of the value range. Each bin is 2^a wide, so the absolute error on @@ -54,13 +54,13 @@ export class H2Encoding { static params({ relativeError, minimumUnit = 1, maxValue = 2 ** 53 - 1 }) { assert(relativeError > 0 && relativeError <= 1, () => `expected relative error to be in (0, 1], got ${relativeError}`); // Since we use bit shifts to handle the parameters, we need `a` >= 0, so the minimum - // unit must be a positive number greater than 1. + // unit must be a positive number greater than 1. // There's no conceptual issue with smaller numbers, but they are hard to support with bit math. assert(minimumUnit >= 1, () => `expected minimumUnit > 1, got ${minimumUnit}`); // Mandate that maxValue is an integer in order to avoid issues with floating-point // rounding, eg. Math.log2(1.0000000000000002 + 1) === 1 assert(maxValue >= 1, () => `expected maxValue >= 1, got ${maxValue}`); - assertSafeInteger(maxValue); + assertSafeInteger(maxValue); const a = Math.floor(Math.log2(minimumUnit)); let b = -Math.floor(Math.log2(relativeError)); // since `2^n` is the first unrepresentable value, @@ -137,7 +137,7 @@ export class H2Encoding { const binsBelowCutoff = u32(1 << (c - a)); if (code < binsBelowCutoff) { return u32(code << a); - } + } // The number of bins in 0..code that are above the cutoff point const n = code - binsBelowCutoff; @@ -202,7 +202,7 @@ export class H2Encoding { return 2 ** this.a; } - /** + /** * Relative error on the high end of the histogram, above the cutoff */ relativeError() { @@ -271,6 +271,32 @@ export class H2HistogramBuilder { this.counts[bin] += count; } + /** + * Import `counts` as represented in a dense Histogram + * @param {number[] | Float64Array} counts + */ + loadDenseCounts(counts) { + for (let i = 0; i < counts.length; i++) { + const index = i; + const count = counts[i]; + this.incrementBin(index, count); + } + } + + /** + * Import `bins` and `counts` as represented in a sparse Histogram + * @param {number[] | Uint32Array} bins + * @param {number[] | Float64Array} counts + */ + loadSparseCounts(bins, counts) { + assert(bins.length === counts.length, () => `bins.length (${bins.length}) must equal counts.length (${counts.length})`); + for (let i = 0; i < bins.length; i++) { + const index = bins[i]; + const count = counts[i]; + this.incrementBin(index, count); + } + } + build() { // Sparsify by storing only the nonzero bins const bins = []; @@ -399,15 +425,15 @@ export class H2Histogram { /** * Returns the largest index for which `pred` returns true, plus one. * If the predicate does not return true for any index, returns 0. - * The predicate function `pred` is required to be monotonic, ie. + * The predicate function `pred` is required to be monotonic, ie. * to return `true` for all inputs below some cutoff, and `false` * for all inputs above that cutoff. - * + * * This implementation is adapted from https://orlp.net/blog/bitwise-binary-search/ - * + * * That post contains optimized versions of this function, but here I opted for the * clearest implementation, at a slight performance cost. - * + * * @param {number} n * @param {(index: number) => boolean} pred */ @@ -457,7 +483,7 @@ function u32(x) { /** * A miniature implementation of H2 histogram encoding for values <= 2^32-1. * Returns the bin index of the bin containing `value`. - * + * * @param {number} value * @param {number} a * @param {number} b @@ -474,7 +500,7 @@ export function encode32(value, a, b) { * A miniature implementation of H2 histogram decoding for values <= 2^32-1. * Returns an object { lower, upper } representing the inclusive bounds * [lower, upper] for the `index`-th bin. - * + * * @param {number} index * @param {number} a * @param {number} b @@ -502,7 +528,7 @@ export function decode32(index, a, b) { /** * Common assertions on the input arguments to encode32 and decode32. - * + * * @param {number} x - code or value * @param {number} a - histogram `a` parameter * @param {number} b - histogram `b` parameter @@ -516,9 +542,9 @@ function assertValid32(x, a, b) { } /** - * + * * @param {boolean} condition - * @param {string | (() => string) } [message] - error message as a string or zero-argument function, + * @param {string | (() => string) } [message] - error message as a string or zero-argument function, * to allow deferring the evaluation of an expensive message until the time an error occurs. */ function assert(condition, message) { diff --git a/src/index.test.js b/src/index.test.js index b7efbbd..73a4add 100644 --- a/src/index.test.js +++ b/src/index.test.js @@ -1,6 +1,6 @@ import * as fc from 'fast-check'; -import { describe, expect, it, test } from 'vitest'; -import { H2Encoding, H2Histogram, H2HistogramBuilder, decode32, encode32 } from './index.js'; +import { describe, expect, test } from 'vitest'; +import { H2Encoding, H2HistogramBuilder, decode32, encode32 } from './index.js'; describe('H2Encoding', () => { test('H2Encoding.params', () => { @@ -21,7 +21,7 @@ describe('H2Encoding', () => { expect(enc.maxValue()).toBeGreaterThanOrEqual(maxValue); }) ); - }); + }); test('H2Encoding.encode', () => { let enc = new H2Encoding({ a: 1, b: 2, n: 6 }); @@ -104,7 +104,7 @@ describe('H2Encoding', () => { // c = a + b + 1 out of bounds (above 31) expect(() => new H2Encoding({ a: 20, b: 20, n: 53 })).toThrow(); - + // n out of bounds expect(() => new H2Encoding({ a: 0, b: 0, n: 54 })).toThrow(); }); @@ -146,7 +146,7 @@ test('H2Histogram', () => { expect(hist.quantile(0.25)).toBeGreaterThan(1e5); expect(hist.quantile(0.75)).toBeGreaterThan(2e5); expect(hist.quantile(1)).toBeGreaterThan(3e5); - } + }; { /** @@ -212,3 +212,24 @@ test('H2Histogram', () => { ); } }); + +test('loadCounts', () => { + { + const enc = new H2Encoding({ a: 0, b: 2, n: 6 }); + + const builder_dense = new H2HistogramBuilder(enc); + builder_dense.loadDenseCounts([0, 0, 2, 3, 0, 0, 6, 7, 8, 0, 0, 0, 12, 0, 14, 0]); + const histo_dense = builder_dense.build(); + + expect(histo_dense.numObservations).toBe(52); + + const builder_sparse = new H2HistogramBuilder(enc); + builder_sparse.loadSparseCounts([2, 3, 6, 7, 8, 12, 14], [2, 3, 6, 7, 8, 12, 14]); + const histo_sparse = builder_sparse.build(); + + expect(histo_dense.numObservations).toEqual(histo_sparse.numObservations); + for (let v = 0; v < 63; v++) { + expect(histo_dense.cumulativeCount(v)).toEqual(histo_sparse.cumulativeCount(v)); + } + } +});