Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make line plot 0/0 proportions have hollow circle #1686

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ import {
mapValues,
map,
keys,
get,
set,
} from 'lodash';
import BinWidthControl from '@veupathdb/components/lib/components/plotControls/BinWidthControl';
import LabelledGroup from '@veupathdb/components/lib/components/widgets/LabelledGroup';
Expand Down Expand Up @@ -564,6 +566,7 @@ function LineplotViz(props: VisualizationProps<Options>) {
'yAxisVariable'
);

// data
const data = usePromise(
useCallback(async (): Promise<LinePlotDataWithCoverage | undefined> => {
if (
Expand Down Expand Up @@ -665,7 +668,8 @@ function LineplotViz(props: VisualizationProps<Options>) {
showMissingFacet,
facetVocabulary,
facetVariable,
neutralPaletteProps.colorPalette
neutralPaletteProps.colorPalette,
params.config.valueSpec === 'proportion'
);
}, [
studyId,
Expand Down Expand Up @@ -760,6 +764,7 @@ function LineplotViz(props: VisualizationProps<Options>) {
);

// custom legend list
// Will have to change colors separately here
const legendItems: LegendItemsProps[] = useMemo(() => {
const allData = data.value?.dataSetProcess;
const palette = neutralPaletteProps.colorPalette ?? ColorPaletteDefault;
Expand All @@ -779,7 +784,9 @@ function LineplotViz(props: VisualizationProps<Options>) {
marker: 'line',
// set marker colors appropriately
markerColor:
dataItem?.name === 'No data' ? '#E8E8E8' : palette[index], // set first color for no overlay variable selected
dataItem?.name === 'No data'
? '#E8E8E8'
: dataItem.marker?.color ?? palette[index], // set first color for no overlay variable selected
// simplifying the check with the presence of data: be carefule of y:[null] case in Scatter plot
hasData: !isFaceted(allData)
? dataItem.y != null &&
Expand Down Expand Up @@ -1835,7 +1842,8 @@ export function lineplotResponseToData(
showMissingFacet: boolean = false,
facetVocabulary: string[] = [],
facetVariable?: Variable,
colorPaletteOverride?: string[]
colorPaletteOverride?: string[],
dependentIsProportion?: boolean
): LinePlotDataWithCoverage {
const modeValue: LinePlotDataSeries['mode'] = 'lines+markers';

Expand All @@ -1852,6 +1860,7 @@ export function lineplotResponseToData(
: '__NO_FACET__'
);

// processedData
const processedData = mapValues(facetGroupedResponseData, (group) => {
const {
dataSetProcess,
Expand Down Expand Up @@ -1880,7 +1889,8 @@ export function lineplotResponseToData(
response.lineplot.config.binSpec,
response.lineplot.config.binSlider,
overlayVariable,
colorPaletteOverride
colorPaletteOverride,
dependentIsProportion
);

return {
Expand Down Expand Up @@ -1949,10 +1959,15 @@ type ArrayTypes = PickByType<
* input: { x: [1,2,3,4,5], y: [6,1,null,9,11], foo: ['a','b','c','d','e'] }
* output: { x: [1,2,3,3,3,4,5], y: [ 6,1,null,0,null,9,11, foo: ['a','b','c','c','c','d','e'] ] }
*/
// nullZeroHack
// Update this to get rid of marker.symbol array logic
// because adding whole new 0/0 series alleviates the issue
function nullZeroHack(
dataSetProcess: LinePlotDataSeries[],
dependentValueType: string
): LinePlotDataSeries[] {
console.log('In nullZeroHack()');
console.log({ dataSetProcess });
// make no attempt to process date values
if (dependentValueType === 'date') return dataSetProcess;

Expand All @@ -1963,7 +1978,9 @@ function nullZeroHack(
.filter((_): _ is keyof LinePlotDataSeries => true)
.filter((key): key is keyof ArrayTypes => Array.isArray(series[key]));

const otherArrayKeys = arrayKeys.filter((key) => key !== 'y');
const otherArrayKeys = arrayKeys.filter((key) => key !== 'y') as string[];
if (Array.isArray(series.marker?.symbol))
otherArrayKeys.push('marker.symbol');

// coersce type of y knowing that we're not dealing with dates (as string[])
const y = series.y as (number | null)[];
Expand Down Expand Up @@ -1992,19 +2009,22 @@ function nullZeroHack(
newY.push(current);
}

// need to handle markers of 0/0 values here
otherArrayKeys.forEach(
// e.g. x, binLabel etc
(key) => {
// initialize empty array if needed
if (accum[key] == null) accum[key] = [];
if (get(accum, key) == null) set(accum, key, []);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Interested to know why the change here. Style or substance, or both?

Ah - I get it now. It's path-based access, e.g. x['a']['b'] is get(x, 'a.b') - needed for marker.symbol

// get the value of, e.g. x[i]
const value = series[key]![index];
const value = get(series, key)[index];
// figure out if we're going to push one or three identical values
const oneOrThree = current == null ? 3 : 1;
// and do it
[...Array(oneOrThree)].forEach(() =>
(accum[key] as (number | null | string)[]).push(value)
(get(accum, key) as (number | null | string)[]).push(value)
);
if (key === 'marker.symbol')
accum['marker']!['color'] = series.marker!.color;
}
);

Expand Down Expand Up @@ -2112,6 +2132,7 @@ function getRequestParams(
}

// making plotly input data
// processInputData
function processInputData(
responseLineplotData: LineplotResponse['lineplot']['data'],
categoricalMode: boolean,
Expand All @@ -2126,15 +2147,26 @@ function processInputData(
binSpec?: BinSpec,
binWidthSlider?: BinWidthSlider,
overlayVariable?: Variable,
colorPaletteOverride?: string[]
colorPaletteOverride?: string[],
dependentIsProportion?: boolean
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

An alternative to passing yet another argument might be to create at io-ts type for the numeratorN/denominatorN structure, so:

      binSampleSize: union([
        array(
          type({
            N: number,
          })
        ),
        array(
          type({
            numeratorN: number,
            denominatorN: number,
          })
        ),
      ]),

would become

export type ProportionSampleSizes = TypeOf<typeof ProportionSampleSizes>;
const ProportionSampleSizes = type({
            numeratorN: number,
            denominatorN: number,
          });

...

      binSampleSize: union([
        array(
          type({
            N: number,
          })
        ),
        array(ProportionSampleSizes),
      ]),

Then in the markerSymbol function you could replace the dependentIsProportion with something like

el.binSampleSize.every((bss) => ProportionSampleSizes.is(bss))

I don't know if that makes it any more (or less) readable... (and it probably doesn't fix all the TS issues)

It would be nice to avoid the any and the hasOwnProperty check on just the first array element. But I may just be introducing more TS spaghetti to compensate...

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I might wrap back around to the numeratorN stuff, though there are more pressing TS issues, like the many anys, as you mention.

Copy link
Member

@bobular bobular Mar 21, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we might need to explicitly "tag" the LineplotResponse series produced by processZeroOverZeroData with an "isZeroSeries" property?

Introspecting to figure out which symbol to use is tricky. I'm noticing a el.binSampleSize[0] is undefined error when the zeroSeries is empty, though that may be avoidable with a length check - I haven't tried that.

Here's how to reproduce:
LLINEUP -> x=observation date, y=Plasmodium by RDT, num=Yes, denom=all
(basically there are no 0/0s)

) {
const { zeroSeriesAdded, zeroSplitLineplotData } = processZeroOverZeroData(
responseLineplotData,
hasMissingData,
dependentIsProportion
);

console.log({ responseLineplotData });
console.log({ dependentIsProportion });
console.log({ zeroSplitLineplotData });

// set fillAreaValue for densityplot
const fillAreaValue: LinePlotDataSeries['fill'] =
vizType === 'densityplot' ? 'toself' : undefined;

// catch the case when the back end has returned valid but completely empty data
if (
responseLineplotData.every(
zeroSplitLineplotData.every(
(data) => data.seriesX?.length === 0 && data.seriesY?.length === 0
)
) {
Expand All @@ -2143,13 +2175,24 @@ function processInputData(
};
}

console.log({ zeroSeriesAdded });

// function to return color or gray where needed if showMissingness == true
const markerColor = (index: number) => {
const palette = colorPaletteOverride ?? ColorPaletteDefault;
if (showMissingness && index === responseLineplotData.length - 1) {
if (showMissingness && index === zeroSplitLineplotData.length - 1) {
return gray;
} else {
return palette[index] ?? 'black'; // TO DO: decide on overflow behaviour
console.log({ index });
console.log({
newIndex: index % Math.floor(zeroSplitLineplotData.length / 2),
});
return (
(!zeroSeriesAdded
? palette[index]
: palette[index % Math.floor(zeroSplitLineplotData.length / 2)]) ??
'black'
); // TO DO: decide on overflow behaviour
}
};

Expand All @@ -2160,14 +2203,28 @@ function processInputData(
return (
showMissingness &&
!hasMissingData &&
index === responseLineplotData.length - 2
index === zeroSplitLineplotData.length - 2
);
};

const markerSymbol = (index: number): string =>
showMissingness && index === responseLineplotData.length - 1
? 'x'
: 'circle';
// Update this to just read zeroSeriesAdded and infer symbol from that
const markerSymbol = (
index: number,
el: LineplotResponse['lineplot']['data'][number]
): string | string[] => {
const symbol =
showMissingness && index === zeroSplitLineplotData.length - 1
? 'x'
: dependentIsProportion &&
el.binSampleSize &&
el.binSampleSize[0].hasOwnProperty('numeratorN')
? el.binSampleSize.map((obj: any) =>
obj.numeratorN || obj.denominatorN ? 'circle' : 'circle-open'
)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Interestingly, the markers still look like they are connected with lines when they're close together.
Even if we set the data[].mode: 'markers' instead of lines+markers they look like little London Underground logos
image

The line is coming from the error bars, even though the values are null for the offending 0/0 points. Heres data[].mode: 'none'

image

It will be a pain to create a separate series with no error bars for the special zero points.

Here's what it looks like when switching off error bars via the UI (and putting lines+markers back on)

image

: 'circle';
console.log({ symbol });
return symbol;
};

const binWidthSliderData =
binSpec != null && binWidthSlider != null
Expand Down Expand Up @@ -2205,7 +2262,7 @@ function processInputData(

let dataSetProcess: LinePlotDataSeries[] = [];

responseLineplotData.some(function (el, index) {
zeroSplitLineplotData.some(function (el, index) {
if (el.seriesX && el.seriesY) {
if (el.seriesX.length !== el.seriesY.length) {
throw new Error(
Expand Down Expand Up @@ -2271,12 +2328,14 @@ function processInputData(
opacity: 0.7,
marker: {
color: markerColor(index),
symbol: markerSymbol(index),
// Can make this an array to specify symbols for specific points
symbol: markerSymbol(index, el) as any,
},
// this needs to be here for the case of markers with line or lineplot.
line: { color: markerColor(index), shape: 'linear' },
// for connecting points regardless of missing data
connectgaps: true,
// note: removing this may cause other issues
// connectgaps: true,
});

return breakAfterThisSeries(index);
Expand All @@ -2301,10 +2360,13 @@ function processInputData(
.filter((val): val is number | string => val != null)
);

console.log({ dataSetProcess });
const zeroHackedSeries = nullZeroHack(dataSetProcess, dependentValueType);
console.log({ zeroHackedSeries });

return {
dataSetProcess: {
// Let's not show no data: nullZeroHack is not used
series: dataSetProcess,
series: zeroHackedSeries,
...binWidthSliderData,
},
xMin: min(xValues),
Expand All @@ -2320,6 +2382,8 @@ function processInputData(
* Utility functions for processInputData()
*/

// May need to add multiple new 0/0 series here
// reorderResponseLineplotData
function reorderResponseLineplotData(
data: LinePlotDataResponse['lineplot']['data'],
categoricalMode: boolean,
Expand Down Expand Up @@ -2398,6 +2462,93 @@ function reorderResponseLineplotData(
}
}

type ArrayTypesGeneral = Omit<
PickByType<LineplotResponse['lineplot']['data'][number], any[]>,
'facetVariableDetails'
>;

function processZeroOverZeroData(
lineplotData: LineplotResponse['lineplot']['data'],
hasMissingData: boolean,
dependentIsProportion?: boolean
) {
if (!dependentIsProportion)
return { zeroSeriesAdded: false, zeroSplitLineplotData: lineplotData };

const newNonZeroSeries: Array<
LineplotResponse['lineplot']['data'][number]
> = [];
const newZeroSeries: Array<LineplotResponse['lineplot']['data'][number]> = [];

const stopIndex = hasMissingData
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we need to worry about the early stopping before the final element in the lineplot response array that may correspond to the "no data for overlay/facet".

In processInputData, the zeroSplitLineplotData that this function returns is passed through the (somewhat opaque) .some(do_stuff; return breakAfterThisSeries(index)) which will ignore the final element if the user hasn't asked to show the "missing data".

? lineplotData.length - 1
: lineplotData.length;

for (let seriesIndex = 0; seriesIndex < stopIndex; seriesIndex++) {
const series = lineplotData[seriesIndex];

// which are the arrays in the series object?
// (assumption: the lengths of all arrays are all the same)
const arrayKeys = Object.keys(series)
.filter(
(_): _ is keyof LineplotResponse['lineplot']['data'][number] => true
)
.filter(
(key): key is keyof ArrayTypesGeneral =>
Array.isArray(series[key]) && key !== 'facetVariableDetails'
);
const binSampleSize = series.binSampleSize as {
numeratorN: number;
denominatorN: number;
}[];

console.log({ arrayKeys });

const makeEmptySeries = () => ({
...series,
...arrayKeys.reduce((newObj, arrayKey) => {
newObj[arrayKey] = [];
return newObj;
}, {} as Pick<LineplotResponse['lineplot']['data'][number], typeof arrayKeys[number]>),
});

const nonZeroSeries = makeEmptySeries();
const zeroSeries = makeEmptySeries();

for (
let dataPointIndex = 0;
dataPointIndex < binSampleSize.length;
dataPointIndex++
) {
let destinationSeries: typeof nonZeroSeries;

if (
binSampleSize[dataPointIndex].numeratorN ||
binSampleSize[dataPointIndex].denominatorN
) {
destinationSeries = nonZeroSeries;
} else {
destinationSeries = zeroSeries;
}

arrayKeys.forEach((key) => {
const array = series[key]!;
const value = array[dataPointIndex]!;
destinationSeries[key]!.push(value as any);
});

if (dataPointIndex === 0) console.log({ nonZeroSeries, zeroSeries });
}

newNonZeroSeries.push(nonZeroSeries);
newZeroSeries.push(zeroSeries);
}

const newLineplotData = [...newNonZeroSeries, ...newZeroSeries];
if (hasMissingData) newLineplotData.push(...lineplotData.slice(-1));
return { zeroSeriesAdded: true, zeroSplitLineplotData: newLineplotData };
}

/**
* determine if we are dealing with a categorical variable
*/
Expand Down