Skip to content

Commit 3de7fac

Browse files
authored
Merge pull request #3745 from soerenreichardt/prevent-negative-histogram-values
Prevent computing histogram when using log scaler
2 parents 9f019e3 + f30be1e commit 3de7fac

File tree

14 files changed

+92
-40
lines changed

14 files changed

+92
-40
lines changed

algo-common/src/main/java/org/neo4j/gds/result/AbstractCentralityResultBuilder.java

Lines changed: 56 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -20,40 +20,32 @@
2020
package org.neo4j.gds.result;
2121

2222
import org.HdrHistogram.DoubleHistogram;
23-
import org.jetbrains.annotations.Nullable;
23+
import org.jetbrains.annotations.NotNull;
2424
import org.neo4j.gds.compat.MapUtil;
2525
import org.neo4j.gds.core.concurrency.Pools;
2626
import org.neo4j.gds.core.utils.ProgressTimer;
2727
import org.neo4j.gds.core.utils.statistics.CentralityStatistics;
28+
import org.neo4j.gds.scaling.ScalarScaler;
2829
import org.neo4j.internal.kernel.api.procs.ProcedureCallContext;
2930

31+
import java.util.HashMap;
3032
import java.util.Map;
3133
import java.util.Optional;
3234
import java.util.function.LongToDoubleFunction;
3335

3436
public abstract class AbstractCentralityResultBuilder<WRITE_RESULT> extends AbstractResultBuilder<WRITE_RESULT> {
3537

36-
private final int concurrency;
37-
protected boolean buildHistogram;
38+
private static final String HISTOGRAM_ERROR_KEY = "Error";
3839

39-
protected long postProcessingMillis = -1L;
40-
protected Optional<DoubleHistogram> maybeCentralityHistogram = Optional.empty();
40+
private final int concurrency;
41+
private final boolean buildHistogram;
42+
private final Map<String, Object> histogramError;
4143

42-
protected @Nullable Map<String, Object> centralityHistogramOrNull() {
43-
return maybeCentralityHistogram.map(histogram -> MapUtil.map(
44-
"min", histogram.getMinValue(),
45-
"mean", histogram.getMean(),
46-
"max", histogram.getMaxValue(),
47-
"p50", histogram.getValueAtPercentile(50),
48-
"p75", histogram.getValueAtPercentile(75),
49-
"p90", histogram.getValueAtPercentile(90),
50-
"p95", histogram.getValueAtPercentile(95),
51-
"p99", histogram.getValueAtPercentile(99),
52-
"p999", histogram.getValueAtPercentile(99.9)
53-
)).orElse(null);
54-
}
44+
private LongToDoubleFunction centralityFunction;
45+
private ScalarScaler.Variant scaler;
5546

56-
protected LongToDoubleFunction centralityFunction = null;
47+
protected long postProcessingMillis = -1L;
48+
protected Map<String, Object> centralityHistogram;
5749

5850
protected AbstractCentralityResultBuilder(
5951
ProcedureCallContext callContext,
@@ -63,6 +55,7 @@ protected AbstractCentralityResultBuilder(
6355
.outputFields()
6456
.anyMatch(s -> s.equalsIgnoreCase("centralityDistribution"));
6557
this.concurrency = concurrency;
58+
this.histogramError = new HashMap<>();
6659
}
6760

6861
protected abstract WRITE_RESULT buildResult();
@@ -72,17 +65,57 @@ public AbstractCentralityResultBuilder<WRITE_RESULT> withCentralityFunction(Long
7265
return this;
7366
}
7467

68+
public AbstractCentralityResultBuilder<WRITE_RESULT> withScalerVariant(ScalarScaler.Variant scaler) {
69+
this.scaler = scaler;
70+
return this;
71+
}
72+
7573
@Override
7674
public WRITE_RESULT build() {
77-
final ProgressTimer timer = ProgressTimer.start();
75+
var timer = ProgressTimer.start();
76+
var maybeCentralityHistogram = computeCentralityHistogram();
77+
this.centralityHistogram = centralityHistogramResult(maybeCentralityHistogram);
7878

79-
if (buildHistogram && centralityFunction != null) {
80-
maybeCentralityHistogram = Optional.of(CentralityStatistics.histogram(nodeCount, centralityFunction,Pools.DEFAULT, concurrency));
81-
}
8279
timer.stop();
8380
this.postProcessingMillis = timer.getDuration();
8481

8582
return buildResult();
8683
}
8784

85+
@NotNull
86+
private Optional<DoubleHistogram> computeCentralityHistogram() {
87+
var logScaler = scaler == ScalarScaler.Variant.LOG;
88+
if (buildHistogram && centralityFunction != null) {
89+
if (logScaler) {
90+
logScalerHistogramError();
91+
} else {
92+
return Optional.of(CentralityStatistics.histogram(
93+
nodeCount,
94+
centralityFunction,
95+
Pools.DEFAULT,
96+
concurrency
97+
));
98+
}
99+
}
100+
return Optional.empty();
101+
}
102+
103+
private Map<String, Object> centralityHistogramResult(Optional<DoubleHistogram> maybeHistogram) {
104+
return maybeHistogram.map(histogram -> MapUtil.map(
105+
"min", histogram.getMinValue(),
106+
"mean", histogram.getMean(),
107+
"max", histogram.getMaxValue(),
108+
"p50", histogram.getValueAtPercentile(50),
109+
"p75", histogram.getValueAtPercentile(75),
110+
"p90", histogram.getValueAtPercentile(90),
111+
"p95", histogram.getValueAtPercentile(95),
112+
"p99", histogram.getValueAtPercentile(99),
113+
"p999", histogram.getValueAtPercentile(99.9)
114+
)).orElse(histogramError);
115+
}
116+
117+
private void logScalerHistogramError() {
118+
this.histogramError.put(HISTOGRAM_ERROR_KEY, "Unable to create histogram when using scaler of type " + ScalarScaler.Variant.LOG);
119+
}
120+
88121
}

alpha/alpha-algo/src/main/java/org/neo4j/gds/results/CentralityScore.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@
2020
package org.neo4j.gds.results;
2121

2222
import org.jetbrains.annotations.Nullable;
23-
import org.neo4j.gds.result.AbstractCentralityResultBuilder;
2423
import org.neo4j.gds.config.WritePropertyConfig;
24+
import org.neo4j.gds.result.AbstractCentralityResultBuilder;
2525
import org.neo4j.internal.kernel.api.procs.ProcedureCallContext;
2626

2727
import java.util.Map;
@@ -73,7 +73,7 @@ public CentralityScore.Stats buildResult() {
7373
computeMillis,
7474
writeMillis,
7575
config instanceof WritePropertyConfig ? ((WritePropertyConfig) config).writeProperty() : "",
76-
centralityHistogramOrNull()
76+
centralityHistogram
7777
);
7878
}
7979
}

alpha/alpha-algo/src/main/java/org/neo4j/gds/results/PageRankScore.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@
2020
package org.neo4j.gds.results;
2121

2222
import org.jetbrains.annotations.Nullable;
23-
import org.neo4j.gds.result.AbstractCentralityResultBuilder;
2423
import org.neo4j.gds.config.WritePropertyConfig;
24+
import org.neo4j.gds.result.AbstractCentralityResultBuilder;
2525
import org.neo4j.internal.kernel.api.procs.ProcedureCallContext;
2626

2727
import java.util.Map;
@@ -92,7 +92,7 @@ public Stats buildResult() {
9292
writeMillis,
9393
dampingFactor,
9494
config instanceof WritePropertyConfig ? ((WritePropertyConfig) config).writeProperty() : "",
95-
centralityHistogramOrNull()
95+
centralityHistogram
9696
);
9797
}
9898
}

proc/centrality/src/main/java/org/neo4j/gds/betweenness/BetweennessCentralityMutateProc.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ public MutateResult buildResult() {
137137
computeMillis,
138138
postProcessingMillis,
139139
mutateMillis,
140-
centralityHistogramOrNull(),
140+
centralityHistogram,
141141
sumCentrality,
142142
minCentrality,
143143
maxCentrality,

proc/centrality/src/main/java/org/neo4j/gds/betweenness/BetweennessCentralityStatsProc.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ protected Builder(ProcedureCallContext callContext, int concurrency) {
128128
@Override
129129
public StatsResult buildResult() {
130130
return new StatsResult(
131-
centralityHistogramOrNull(),
131+
centralityHistogram,
132132
sumCentrality,
133133
minCentrality,
134134
maxCentrality,

proc/centrality/src/main/java/org/neo4j/gds/betweenness/BetweennessCentralityWriteProc.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ public WriteResult buildResult() {
132132
computeMillis,
133133
postProcessingMillis,
134134
writeMillis,
135-
centralityHistogramOrNull(),
135+
centralityHistogram,
136136
sumCentrality,
137137
minCentrality,
138138
maxCentrality,

proc/centrality/src/main/java/org/neo4j/gds/degree/DegreeCentralityMutateProc.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ public DegreeCentralityMutateProc.MutateResult buildResult() {
127127
computeMillis,
128128
postProcessingMillis,
129129
mutateMillis,
130-
centralityHistogramOrNull(),
130+
centralityHistogram,
131131
config.toMap()
132132
);
133133
}

proc/centrality/src/main/java/org/neo4j/gds/degree/DegreeCentralityStatsProc.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ protected Builder(ProcedureCallContext callContext, int concurrency) {
112112
@Override
113113
public DegreeCentralityStatsProc.StatsResult buildResult() {
114114
return new DegreeCentralityStatsProc.StatsResult(
115-
centralityHistogramOrNull(),
115+
centralityHistogram,
116116
createMillis,
117117
computeMillis,
118118
postProcessingMillis,

proc/centrality/src/main/java/org/neo4j/gds/degree/DegreeCentralityWriteProc.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ protected WriteResult buildResult() {
123123
computeMillis,
124124
postProcessingMillis,
125125
writeMillis,
126-
centralityHistogramOrNull(),
126+
centralityHistogram,
127127
config.toMap()
128128
);
129129
}

proc/centrality/src/main/java/org/neo4j/gds/pagerank/PageRankMutateProc.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ public MutateResult buildResult() {
138138
return new MutateResult(
139139
ranIterations,
140140
didConverge,
141-
centralityHistogramOrNull(),
141+
centralityHistogram,
142142
createMillis,
143143
computeMillis,
144144
postProcessingMillis,

proc/centrality/src/main/java/org/neo4j/gds/pagerank/PageRankProc.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,9 @@
1919
*/
2020
package org.neo4j.gds.pagerank;
2121

22-
import org.neo4j.gds.result.AbstractCentralityResultBuilder;
2322
import org.neo4j.gds.AlgoBaseProc;
2423
import org.neo4j.gds.api.NodeProperties;
24+
import org.neo4j.gds.result.AbstractCentralityResultBuilder;
2525
import org.neo4j.internal.kernel.api.procs.ProcedureCallContext;
2626
import org.neo4j.logging.Log;
2727

@@ -47,7 +47,8 @@ static <PROC_RESULT, CONFIG extends PageRankConfig> PageRankResultBuilder<PROC_R
4747
procResultBuilder
4848
.withDidConverge(!computeResult.isGraphEmpty() && result.didConverge())
4949
.withRanIterations(!computeResult.isGraphEmpty() ? result.iterations() : 0)
50-
.withCentralityFunction(!computeResult.isGraphEmpty() ? computeResult.result().scores()::get : null);
50+
.withCentralityFunction(!computeResult.isGraphEmpty() ? computeResult.result().scores()::get : null)
51+
.withScalerVariant(computeResult.config().scaler());
5152

5253
return procResultBuilder;
5354
}

proc/centrality/src/main/java/org/neo4j/gds/pagerank/PageRankStatsProc.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ public StatsResult buildResult() {
124124
return new StatsResult(
125125
ranIterations,
126126
didConverge,
127-
centralityHistogramOrNull(),
127+
centralityHistogram,
128128
createMillis,
129129
computeMillis,
130130
postProcessingMillis,

proc/centrality/src/main/java/org/neo4j/gds/pagerank/PageRankWriteProc.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ public WriteResult buildResult() {
139139
return new WriteResult(
140140
ranIterations,
141141
didConverge,
142-
centralityHistogramOrNull(),
142+
centralityHistogram,
143143
createMillis,
144144
computeMillis,
145145
postProcessingMillis,

proc/centrality/src/test/java/org/neo4j/gds/pagerank/PageRankWriteProcTest.java

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,12 @@
2323
import org.junit.jupiter.api.TestFactory;
2424
import org.junit.jupiter.params.ParameterizedTest;
2525
import org.junit.jupiter.params.provider.MethodSource;
26+
import org.neo4j.gds.AlgoBaseProc;
27+
import org.neo4j.gds.GdsCypher.ModeBuildStage;
2628
import org.neo4j.gds.WritePropertyConfigProcTest;
2729
import org.neo4j.gds.compat.MapUtil;
2830
import org.neo4j.gds.core.CypherMapWrapper;
29-
import org.neo4j.gds.AlgoBaseProc;
30-
import org.neo4j.gds.GdsCypher.ModeBuildStage;
31+
import org.neo4j.gds.scaling.ScalarScaler;
3132

3233
import java.util.Collection;
3334
import java.util.List;
@@ -119,6 +120,23 @@ void testWriteYields(ModeBuildStage queryBuilder, String testCaseName) {
119120
)));
120121
}
121122

123+
@ParameterizedTest(name = "{1}")
124+
@MethodSource("org.neo4j.gds.pagerank.PageRankProcTest#graphVariations")
125+
void shouldNotComputeCentralityDistributionOnLogScaler(ModeBuildStage queryBuilder, String testCaseName) {
126+
var writeProp = "writeProp";
127+
var query = queryBuilder
128+
.writeMode()
129+
.addParameter("scaler", ScalarScaler.Variant.LOG)
130+
.addParameter("writeProperty", writeProp)
131+
.yields("centralityDistribution");
132+
133+
assertCypherResult(query, List.of(
134+
Map.of(
135+
"centralityDistribution", Map.of("Error", "Unable to create histogram when using scaler of type LOG")
136+
)
137+
));
138+
}
139+
122140
@Override
123141
public PageRankWriteConfig createConfig(CypherMapWrapper mapWrapper) {
124142
return PageRankWriteConfig.of(

0 commit comments

Comments
 (0)