From 4cf4d73e865a4bccdcbdf49cd479720e497b3233 Mon Sep 17 00:00:00 2001 From: Mike Bostock Date: Tue, 7 Nov 2023 09:12:26 -0800 Subject: [PATCH 1/2] fix overload precedence (#1915) --- src/transforms/dodge.d.ts | 4 ++-- src/transforms/normalize.d.ts | 4 ++-- src/transforms/stack.d.ts | 12 ++++++------ src/transforms/window.d.ts | 4 ++-- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/transforms/dodge.d.ts b/src/transforms/dodge.d.ts index 3a6476784f..185b58916a 100644 --- a/src/transforms/dodge.d.ts +++ b/src/transforms/dodge.d.ts @@ -59,8 +59,8 @@ export interface DodgeYOptions extends DodgeOptions { * * If *dodgeOptions* is a string, it is shorthand for the dodge **anchor**. */ -export function dodgeX(dodgeOptions?: DodgeXOptions | DodgeXOptions["anchor"], options?: T): Initialized; export function dodgeX(options?: T & DodgeXOptions): Initialized; +export function dodgeX(dodgeOptions?: DodgeXOptions | DodgeXOptions["anchor"], options?: T): Initialized; /** * Given an **x** position channel, derives a new **y** position channel that @@ -71,5 +71,5 @@ export function dodgeX(options?: T & DodgeXOptions): Initialized; * * If *dodgeOptions* is a string, it is shorthand for the dodge **anchor**. */ -export function dodgeY(dodgeOptions?: DodgeYOptions | DodgeYOptions["anchor"], options?: T): Initialized; export function dodgeY(options?: T & DodgeYOptions): Initialized; +export function dodgeY(dodgeOptions?: DodgeYOptions | DodgeYOptions["anchor"], options?: T): Initialized; diff --git a/src/transforms/normalize.d.ts b/src/transforms/normalize.d.ts index e4a4273337..d0f5917bb7 100644 --- a/src/transforms/normalize.d.ts +++ b/src/transforms/normalize.d.ts @@ -56,8 +56,8 @@ export interface NormalizeOptions { * is used, the derived series values would be [*x₀* / *x₀*, *x₁* / *x₀*, *x₂* / * *x₀*, …] as in an index chart. */ -export function normalizeX(basis?: NormalizeBasis, options?: T): Transformed; export function normalizeX(options?: T & NormalizeOptions): Transformed; +export function normalizeX(basis?: NormalizeBasis, options?: T): Transformed; /** * Groups data into series using the first channel of **z**, **fill**, or @@ -67,8 +67,8 @@ export function normalizeX(options?: T & NormalizeOptions): Transformed; * is used, the derived series values would be [*y₀* / *y₀*, *y₁* / *y₀*, *y₂* / * *y₀*, …] as in an index chart. */ -export function normalizeY(basis?: NormalizeBasis, options?: T): Transformed; export function normalizeY(options?: T & NormalizeOptions): Transformed; +export function normalizeY(basis?: NormalizeBasis, options?: T): Transformed; /** * Given a normalize *basis*, returns a corresponding map implementation for use diff --git a/src/transforms/stack.d.ts b/src/transforms/stack.d.ts index 528fe5007f..1cd8d7a13d 100644 --- a/src/transforms/stack.d.ts +++ b/src/transforms/stack.d.ts @@ -119,24 +119,24 @@ export interface StackOptions { * a label. If not specified, the input channel **x** defaults to the constant * one. */ -export function stackX(stackOptions?: StackOptions, options?: T): Transformed; export function stackX(options?: T & StackOptions): Transformed; +export function stackX(stackOptions?: StackOptions, options?: T): Transformed; /** * Like **stackX**, but returns the starting position **x1** as the **x** * channel, for example to position a dot on the left-hand side of each element * of a stack. */ -export function stackX1(stackOptions?: StackOptions, options?: T): Transformed; export function stackX1(options?: T & StackOptions): Transformed; +export function stackX1(stackOptions?: StackOptions, options?: T): Transformed; /** * Like **stackX**, but returns the starting position **x2** as the **x** * channel, for example to position a dot on the right-hand side of each element * of a stack. */ -export function stackX2(stackOptions?: StackOptions, options?: T): Transformed; export function stackX2(options?: T & StackOptions): Transformed; +export function stackX2(stackOptions?: StackOptions, options?: T): Transformed; /** * Transforms a length channel **y** into starting and ending position channels @@ -147,20 +147,20 @@ export function stackX2(options?: T & StackOptions): Transformed; * midpoint between **y1** and **y2**, for example to place a label. If not * specified, the input channel **y** defaults to the constant one. */ -export function stackY(stackOptions?: StackOptions, options?: T): Transformed; export function stackY(options?: T & StackOptions): Transformed; +export function stackY(stackOptions?: StackOptions, options?: T): Transformed; /** * Like **stackY**, but returns the starting position **y1** as the **y** * channel, for example to position a dot at the bottom of each element of a * stack. */ -export function stackY1(stackOptions?: StackOptions, options?: T): Transformed; export function stackY1(options?: T & StackOptions): Transformed; +export function stackY1(stackOptions?: StackOptions, options?: T): Transformed; /** * Like **stackY**, but returns the ending position **y2** as the **y** channel, * for example to position a dot at the top of each element of a stack. */ -export function stackY2(stackOptions?: StackOptions, options?: T): Transformed; export function stackY2(options?: T & StackOptions): Transformed; +export function stackY2(stackOptions?: StackOptions, options?: T): Transformed; diff --git a/src/transforms/window.d.ts b/src/transforms/window.d.ts index 36c959a089..2ba0d312d0 100644 --- a/src/transforms/window.d.ts +++ b/src/transforms/window.d.ts @@ -108,8 +108,8 @@ export interface WindowOptions { * * If *windowOptions* is a number, it is shorthand for the window size **k**. */ -export function windowX(windowOptions?: WindowOptions | WindowOptions["k"], options?: T): Transformed; export function windowX(options?: T & WindowOptions): Transformed; +export function windowX(windowOptions?: WindowOptions | WindowOptions["k"], options?: T): Transformed; /** * Groups data into series using the first channel of *z*, *fill*, or *stroke* @@ -123,8 +123,8 @@ export function windowX(options?: T & WindowOptions): Transformed; * * If *windowOptions* is a number, it is shorthand for the window size **k**. */ -export function windowY(windowOptions?: WindowOptions | WindowOptions["k"], options?: T): Transformed; export function windowY(options?: T & WindowOptions): Transformed; +export function windowY(windowOptions?: WindowOptions | WindowOptions["k"], options?: T): Transformed; /** * Given the specified window *options*, returns a corresponding map From c6c1bcd242838b5e310cd3d5972a67d34fbd73c5 Mon Sep 17 00:00:00 2001 From: Mike Bostock Date: Tue, 7 Nov 2023 09:16:05 -0800 Subject: [PATCH 2/2] x and y reducers for group and hexbin (#1916) * x and y reducers for group * x and y reducers for hexbin --- docs/transforms/group.md | 2 + docs/transforms/hexbin.md | 17 +- src/transforms/group.d.ts | 36 ++- src/transforms/group.js | 46 +++- src/transforms/hexbin.d.ts | 3 +- src/transforms/hexbin.js | 30 +- test/output/hexbinFillX.svg | 273 +++++++++++++++++++ test/output/mobyDickLetterFrequencyFillX.svg | 151 ++++++++++ test/plots/hexbin.ts | 11 + test/plots/moby-dick-letter-frequency.ts | 14 +- 10 files changed, 554 insertions(+), 29 deletions(-) create mode 100644 test/output/hexbinFillX.svg create mode 100644 test/output/mobyDickLetterFrequencyFillX.svg diff --git a/docs/transforms/group.md b/docs/transforms/group.md index a49ef69b06..f2d1cea94d 100644 --- a/docs/transforms/group.md +++ b/docs/transforms/group.md @@ -366,6 +366,8 @@ The following named reducers are supported: * *deviation* - the standard deviation * *variance* - the variance per [Welford’s algorithm](https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm) * *identity* - the array of values +* *x* - the group’s *x* value (when grouping on *x*) +* *y* - the group’s *y* value (when grouping on *y*) In addition, a reducer may be specified as: diff --git a/docs/transforms/hexbin.md b/docs/transforms/hexbin.md index 2e3031a4f4..938f9c8671 100644 --- a/docs/transforms/hexbin.md +++ b/docs/transforms/hexbin.md @@ -174,9 +174,9 @@ Plot.plot({ The *options* must specify the **x** and **y** channels. The **binWidth** option (default 20) defines the distance between centers of neighboring hexagons in pixels. If any of **z**, **fill**, or **stroke** is a channel, the first of these channels will be used to subdivide bins. -The *outputs* options are similar to the [bin transform](./bin.md); each output channel receives as input, for each hexagon, the subset of the data which has been matched to its center. The outputs object specifies the aggregation method for each output channel. +The *outputs* options are similar to the [bin transform](./bin.md); for each hexagon, an output channel value is derived by reducing the corresponding binned input channel values. The *outputs* object specifies the reducer for each output channel. -The following aggregation methods are supported: +The following named reducers are supported: * *first* - the first value, in input order * *last* - the last value, in input order @@ -195,13 +195,22 @@ The following aggregation methods are supported: * *variance* - the variance per [Welford’s algorithm](https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm) * *mode* - the value with the most occurrences * *identity* - the array of values -* a function to be passed the array of values for each bin and the extent of the bin +* *x* - the hexagon’s *x* center +* *y* - the hexagon’s *y* center + +In addition, a reducer may be specified as: + +* a function to be passed the array of values for each bin and the center of the bin * an object with a *reduceIndex* method +In the last case, the **reduceIndex** method is repeatedly passed three arguments: the index for each bin (an array of integers), the input channel’s array of values, and the center of the bin (an object {data, x, y}); it must then return the corresponding aggregate value for the bin. + +Most reducers require binding the output channel to an input channel; for example, if you want the **y** output channel to be a *sum* (not merely a count), there should be a corresponding **y** input channel specifying which values to sum. If there is not, *sum* will be equivalent to *count*. + ## hexbin(*outputs*, *options*) {#hexbin} ```js Plot.dot(olympians, Plot.hexbin({fill: "count"}, {x: "weight", y: "height"})) ``` -Bins (hexagonally) on **x** and **y**. Also groups on the first channel of **z**, **fill**, or **stroke**, if any. +Bins hexagonally on **x** and **y**. Also groups on the first channel of **z**, **fill**, or **stroke**, if any. diff --git a/src/transforms/group.d.ts b/src/transforms/group.d.ts index 8e00329b09..1982c76e0e 100644 --- a/src/transforms/group.d.ts +++ b/src/transforms/group.d.ts @@ -38,8 +38,42 @@ export interface GroupOutputOptions { z?: ChannelValue; } +/** + * How to reduce grouped values; one of: + * + * - a generic reducer name, such as *count* or *first* + * - *x* - the group’s **x** value (when grouping on **x**) + * - *y* - the group’s **y** value (when grouping on **y**) + * - a function that takes an array of values and returns the reduced value + * - an object that implements the *reduceIndex* method + * + * When a reducer function or implementation is used with the group transform, + * it is passed the group extent {x, y} as an additional argument. + */ +export type GroupReducer = Reducer | GroupReducerFunction | GroupReducerImplementation | "x" | "y"; + +/** + * A shorthand functional group reducer implementation: given an array of input + * channel *values*, and the current group’s *extent*, returns the corresponding + * reduced output value. + */ +export type GroupReducerFunction = (values: S[], extent: {x: any; y: any}) => T; + +/** A group reducer implementation. */ +export interface GroupReducerImplementation { + /** + * Given an *index* representing the contents of the current group, the input + * channel’s array of *values*, and the current group’s *extent*, returns the + * corresponding reduced output value. If no input channel is supplied (e.g., + * as with the *count* reducer) then *values* may be undefined. + */ + reduceIndex(index: number[], values: S[], extent: {x: any; y: any}): T; + // TODO scope + // TODO label +} + /** Output channels (and options) for the group transform. */ -export type GroupOutputs = ChannelReducers | GroupOutputOptions; +export type GroupOutputs = ChannelReducers | GroupOutputOptions; /** * Groups on the first channel of **z**, **fill**, or **stroke**, if any, and diff --git a/src/transforms/group.js b/src/transforms/group.js index 6024b588ba..4330c886a1 100644 --- a/src/transforms/group.js +++ b/src/transforms/group.js @@ -76,10 +76,10 @@ function groupn( inputs = {} // input channels and options ) { // Compute the outputs. - outputs = maybeOutputs(outputs, inputs); - reduceData = maybeReduce(reduceData, identity); - sort = sort == null ? undefined : maybeOutput("sort", sort, inputs); - filter = filter == null ? undefined : maybeEvaluator("filter", filter, inputs); + outputs = maybeGroupOutputs(outputs, inputs); + reduceData = maybeGroupReduce(reduceData, identity); + sort = sort == null ? undefined : maybeGroupOutput("sort", sort, inputs); + filter = filter == null ? undefined : maybeGroupEvaluator("filter", filter, inputs); // Produce x and y output channels as appropriate. const [GX, setGX] = maybeColumn(x); @@ -287,6 +287,32 @@ function invalidReduce(reduce) { throw new Error(`invalid reduce: ${reduce}`); } +export function maybeGroupOutputs(outputs, inputs) { + return maybeOutputs(outputs, inputs, maybeGroupOutput); +} + +function maybeGroupOutput(name, reduce, inputs) { + return maybeOutput(name, reduce, inputs, maybeGroupEvaluator); +} + +function maybeGroupEvaluator(name, reduce, inputs) { + return maybeEvaluator(name, reduce, inputs, maybeGroupReduce); +} + +function maybeGroupReduce(reduce, value) { + return maybeReduce(reduce, value, maybeGroupReduceFallback); +} + +function maybeGroupReduceFallback(reduce) { + switch (`${reduce}`.toLowerCase()) { + case "x": + return reduceX; + case "y": + return reduceY; + } + throw new Error(`invalid group reduce: ${reduce}`); +} + export function maybeSubgroup(outputs, inputs) { for (const name in inputs) { const value = inputs[name]; @@ -399,6 +425,18 @@ function reduceProportion(value, scope) { : {scope, reduceIndex: (I, V, basis = 1) => sum(I, (i) => V[i]) / basis}; } +const reduceX = { + reduceIndex(I, X, {x}) { + return x; + } +}; + +const reduceY = { + reduceIndex(I, X, {y}) { + return y; + } +}; + export function find(test) { if (typeof test !== "function") throw new Error(`invalid test function: ${test}`); return { diff --git a/src/transforms/hexbin.d.ts b/src/transforms/hexbin.d.ts index 6f6adedff3..4a6dad2cc2 100644 --- a/src/transforms/hexbin.d.ts +++ b/src/transforms/hexbin.d.ts @@ -1,5 +1,6 @@ import type {ChannelReducers, ChannelValue} from "../channel.js"; import type {Initialized} from "./basic.js"; +import type {GroupReducer} from "./group.js"; /** Options for the hexbin transform. */ export interface HexbinOptions { @@ -43,4 +44,4 @@ export interface HexbinOptions { * * To draw empty hexagons, see the hexgrid mark. */ -export function hexbin(outputs?: ChannelReducers, options?: T & HexbinOptions): Initialized; +export function hexbin(outputs?: ChannelReducers, options?: T & HexbinOptions): Initialized; diff --git a/src/transforms/hexbin.js b/src/transforms/hexbin.js index 5952951352..54e409d0aa 100644 --- a/src/transforms/hexbin.js +++ b/src/transforms/hexbin.js @@ -2,7 +2,7 @@ import {map, number, valueof} from "../options.js"; import {applyPosition} from "../projection.js"; import {sqrt3} from "../symbol.js"; import {initializer} from "./basic.js"; -import {hasOutput, maybeGroup, maybeOutputs, maybeSubgroup} from "./group.js"; +import {hasOutput, maybeGroup, maybeGroupOutputs, maybeSubgroup} from "./group.js"; // We don’t want the hexagons to align with the edges of the plot frame, as that // would cause extreme x-values (the upper bound of the default x-scale domain) @@ -16,9 +16,8 @@ export function hexbin(outputs = {fill: "count"}, {binWidth, ...options} = {}) { const {z} = options; // TODO filter e.g. to show empty hexbins? - // TODO disallow x, x1, x2, y, y1, y2 reducers? binWidth = binWidth === undefined ? 20 : number(binWidth); - outputs = maybeOutputs(outputs, options); + outputs = maybeGroupOutputs(outputs, options); // A fill output means a fill channel; declaring the channel here instead of // waiting for the initializer allows the mark constructor to determine that @@ -65,15 +64,15 @@ export function hexbin(outputs = {fill: "count"}, {binWidth, ...options} = {}) { const binFacet = []; for (const o of outputs) o.scope("facet", facet); for (const [f, I] of maybeGroup(facet, G)) { - for (const bin of hbin(I, X, Y, binWidth)) { + for (const {index: b, extent} of hbin(data, I, X, Y, binWidth)) { binFacet.push(++i); - BX.push(bin.x); - BY.push(bin.y); - if (Z) GZ.push(G === Z ? f : Z[bin[0]]); - if (F) GF.push(G === F ? f : F[bin[0]]); - if (S) GS.push(G === S ? f : S[bin[0]]); - if (Q) GQ.push(G === Q ? f : Q[bin[0]]); - for (const o of outputs) o.reduce(bin); + BX.push(extent.x); + BY.push(extent.y); + if (Z) GZ.push(G === Z ? f : Z[b[0]]); + if (F) GF.push(G === F ? f : F[b[0]]); + if (S) GS.push(G === S ? f : S[b[0]]); + if (Q) GQ.push(G === Q ? f : Q[b[0]]); + for (const o of outputs) o.reduce(b, extent); } } binFacets.push(binFacet); @@ -106,7 +105,7 @@ export function hexbin(outputs = {fill: "count"}, {binWidth, ...options} = {}) { }); } -function hbin(I, X, Y, dx) { +function hbin(data, I, X, Y, dx) { const dy = dx * (1.5 / sqrt3); const bins = new Map(); for (const i of I) { @@ -127,11 +126,10 @@ function hbin(I, X, Y, dx) { const key = `${pi},${pj}`; let bin = bins.get(key); if (bin === undefined) { - bins.set(key, (bin = [])); - bin.x = (pi + (pj & 1) / 2) * dx + ox; - bin.y = pj * dy + oy; + bin = {index: [], extent: {data, x: (pi + (pj & 1) / 2) * dx + ox, y: pj * dy + oy}}; + bins.set(key, bin); } - bin.push(i); + bin.index.push(i); } return bins.values(); } diff --git a/test/output/hexbinFillX.svg b/test/output/hexbinFillX.svg new file mode 100644 index 0000000000..3c83296f8b --- /dev/null +++ b/test/output/hexbinFillX.svg @@ -0,0 +1,273 @@ + + + + + + + + + + + + + + + + + + + 34 + 36 + 38 + 40 + 42 + 44 + 46 + 48 + 50 + 52 + 54 + 56 + 58 + + + ↑ culmen_length_mm + + + + + + + + + + + + + 14 + 15 + 16 + 17 + 18 + 19 + 20 + 21 + + + culmen_depth_mm → + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/test/output/mobyDickLetterFrequencyFillX.svg b/test/output/mobyDickLetterFrequencyFillX.svg new file mode 100644 index 0000000000..6e843da67b --- /dev/null +++ b/test/output/mobyDickLetterFrequencyFillX.svg @@ -0,0 +1,151 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0 + 100 + 200 + 300 + 400 + 500 + 600 + 700 + 800 + 900 + 1,000 + 1,100 + 1,200 + + + ↑ Frequency + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A + B + C + D + E + F + G + H + I + J + K + L + M + N + O + P + Q + R + S + T + U + V + W + X + Y + Z + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/test/plots/hexbin.ts b/test/plots/hexbin.ts index 760e260a49..dd8fa67568 100644 --- a/test/plots/hexbin.ts +++ b/test/plots/hexbin.ts @@ -11,3 +11,14 @@ export async function hexbin() { ] }); } + +export async function hexbinFillX() { + const penguins = await d3.csv("data/penguins.csv", d3.autoType); + return Plot.plot({ + marks: [ + Plot.hexgrid(), + Plot.frame(), + Plot.dot(penguins, Plot.hexbin({r: "count", fill: "x"}, {x: "culmen_depth_mm", y: "culmen_length_mm"})) + ] + }); +} diff --git a/test/plots/moby-dick-letter-frequency.ts b/test/plots/moby-dick-letter-frequency.ts index fd5f5ca8af..708f754024 100644 --- a/test/plots/moby-dick-letter-frequency.ts +++ b/test/plots/moby-dick-letter-frequency.ts @@ -5,9 +5,17 @@ export async function mobyDickLetterFrequency() { const mobydick = await d3.text("data/moby-dick-chapter-1.txt"); const letters = [...mobydick].filter((c) => /[a-z]/i.test(c)).map((c) => c.toUpperCase()); return Plot.plot({ - y: { - grid: true - }, + y: {grid: true}, marks: [Plot.barY(letters, Plot.groupX({y: "count"})), Plot.ruleY([0])] }); } + +export async function mobyDickLetterFrequencyFillX() { + const mobydick = await d3.text("data/moby-dick-chapter-1.txt"); + const letters = [...mobydick].filter((c) => /[a-z]/i.test(c)).map((c) => c.toUpperCase()); + return Plot.plot({ + y: {grid: true}, + color: {scheme: "spectral"}, + marks: [Plot.barY(letters, Plot.groupX({y: "count", fill: "x"})), Plot.ruleY([0])] + }); +}