From 88220a91a1a43253a5a9a46d152faf55d909ff18 Mon Sep 17 00:00:00 2001 From: Alwine Balfanz <100916390+alwba@users.noreply.github.com> Date: Tue, 9 Aug 2022 20:32:52 +0200 Subject: [PATCH 1/2] [#1571] add degree variance operator --- ! | 19 +++ .../metric/DegreeVarianceEvolution.java | 81 ++++++++++ .../functions/ExtractAllTimePointsReduce.java | 49 ++++++ .../functions/GroupDegreeTreesToVariance.java | 116 ++++++++++++++ .../TransformDeltaToAbsoluteDegreeTree.java | 55 +++++++ .../metric/DegreeVarianceEvolutionTest.java | 146 ++++++++++++++++++ 6 files changed, 466 insertions(+) create mode 100644 ! create mode 100644 gradoop-temporal/src/main/java/org/gradoop/temporal/model/impl/operators/metric/DegreeVarianceEvolution.java create mode 100644 gradoop-temporal/src/main/java/org/gradoop/temporal/model/impl/operators/metric/functions/ExtractAllTimePointsReduce.java create mode 100644 gradoop-temporal/src/main/java/org/gradoop/temporal/model/impl/operators/metric/functions/GroupDegreeTreesToVariance.java create mode 100644 gradoop-temporal/src/main/java/org/gradoop/temporal/model/impl/operators/metric/functions/TransformDeltaToAbsoluteDegreeTree.java create mode 100644 gradoop-temporal/src/test/java/org/gradoop/temporal/model/impl/operators/metric/DegreeVarianceEvolutionTest.java diff --git a/! b/! new file mode 100644 index 000000000000..f098a8e00ed2 --- /dev/null +++ b/! @@ -0,0 +1,19 @@ +[#1559] add unit-tests + +# Please enter the commit message for your changes. Lines starting +# with '#' will be ignored, and an empty message aborts the commit. +# +# Date: Tue Aug 2 11:06:54 2022 +0200 +# +# interactive rebase in progress; onto 5f1b5d5a004 +# Last commands done (3 commands done): +# reword dc25f24377f add help functions +# reword 22c9104636f add unit-tests +# No commands remaining. +# You are currently editing a commit while rebasing branch '#1559_min_max_avg_degree_evolution' on '5f1b5d5a004'. +# +# Changes to be committed: +# new file: gradoop-temporal/src/test/java/org/gradoop/temporal/model/impl/operators/metric/AvgDegreeEvolutionTest.java +# new file: gradoop-temporal/src/test/java/org/gradoop/temporal/model/impl/operators/metric/MaxDegreeEvolutionTest.java +# new file: gradoop-temporal/src/test/java/org/gradoop/temporal/model/impl/operators/metric/MinDegreeEvolutionTest.java +# diff --git a/gradoop-temporal/src/main/java/org/gradoop/temporal/model/impl/operators/metric/DegreeVarianceEvolution.java b/gradoop-temporal/src/main/java/org/gradoop/temporal/model/impl/operators/metric/DegreeVarianceEvolution.java new file mode 100644 index 000000000000..e5505e33f67e --- /dev/null +++ b/gradoop-temporal/src/main/java/org/gradoop/temporal/model/impl/operators/metric/DegreeVarianceEvolution.java @@ -0,0 +1,81 @@ +/* + * Copyright © 2014 - 2021 Leipzig University (Database Research Group) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.gradoop.temporal.model.impl.operators.metric; + +import org.apache.flink.api.java.DataSet; +import org.apache.flink.api.java.tuple.Tuple1; +import org.apache.flink.api.java.tuple.Tuple2; +import org.gradoop.common.model.impl.id.GradoopId; +import org.gradoop.flink.model.api.operators.UnaryBaseGraphToValueOperator; +import org.gradoop.flink.model.impl.operators.sampling.functions.VertexDegree; +import org.gradoop.temporal.model.api.TimeDimension; +import org.gradoop.temporal.model.impl.TemporalGraph; +import org.gradoop.temporal.model.impl.operators.metric.functions.*; +import org.gradoop.temporal.model.impl.operators.metric.functions.ExtractAllTimePointsReduce; +import org.gradoop.temporal.model.impl.operators.metric.functions.GroupDegreeTreesToVariance; + +import java.util.Objects; +import java.util.TreeMap; + +/** + * Operator that calculates the degree variance evolution of a temporal graph for the + * whole lifetime of the graph. + */ +public class DegreeVarianceEvolution implements UnaryBaseGraphToValueOperator>> { + /** + * The time dimension that will be considered. + */ + private final TimeDimension dimension; + + /** + * The degree type (IN, OUT, BOTH); + */ + private final VertexDegree degreeType; + + /** + * Creates an instance of this average degree evolution operator. + * + * @param degreeType the degree type to use (IN, OUT, BOTH). + * @param dimension the time dimension to use (VALID_TIME, TRANSACTION_TIME). + */ + public DegreeVarianceEvolution(VertexDegree degreeType, TimeDimension dimension) { + this.degreeType = Objects.requireNonNull(degreeType); + this.dimension = Objects.requireNonNull(dimension); + } + + @Override + public DataSet> execute(TemporalGraph graph) { + DataSet>> absoluteDegreeTrees = graph.getEdges() + // 1) Extract vertex id(s) and corresponding time intervals + .flatMap(new FlatMapVertexIdEdgeInterval(dimension, degreeType)) + // 2) Group them by the vertex id + .groupBy(0) + // 3) For each vertex id, build a degree tree data structure + .reduceGroup(new BuildTemporalDegreeTree()) + // 4) Transform each tree to aggregated evolution + .map(new TransformDeltaToAbsoluteDegreeTree()); + + DataSet> timePoints = absoluteDegreeTrees + // 5) extract all timestamps where degree of any vertex changes + .reduceGroup(new ExtractAllTimePointsReduce()) + .distinct(); + + return absoluteDegreeTrees + // join with interval degree mappings + // 6) Merge trees together and calculate aggregation + .reduceGroup(new GroupDegreeTreesToVariance(timePoints)); + } +} diff --git a/gradoop-temporal/src/main/java/org/gradoop/temporal/model/impl/operators/metric/functions/ExtractAllTimePointsReduce.java b/gradoop-temporal/src/main/java/org/gradoop/temporal/model/impl/operators/metric/functions/ExtractAllTimePointsReduce.java new file mode 100644 index 000000000000..ece67f4a2efd --- /dev/null +++ b/gradoop-temporal/src/main/java/org/gradoop/temporal/model/impl/operators/metric/functions/ExtractAllTimePointsReduce.java @@ -0,0 +1,49 @@ +/* + * Copyright © 2014 - 2021 Leipzig University (Database Research Group) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.gradoop.temporal.model.impl.operators.metric.functions; + +import org.apache.flink.api.common.functions.GroupReduceFunction; +import org.apache.flink.api.java.tuple.Tuple1; +import org.apache.flink.api.java.tuple.Tuple2; +import org.apache.flink.util.Collector; +import org.gradoop.common.model.impl.id.GradoopId; + +import java.util.SortedSet; +import java.util.TreeMap; +import java.util.TreeSet; + +/** + * Reduce function to extract all timestamps where the degree of a vertex changes. + */ +public class ExtractAllTimePointsReduce implements GroupReduceFunction>, Tuple1> { + + public ExtractAllTimePointsReduce() { + } + + @Override + public void reduce(Iterable>> iterable, Collector> collector) throws Exception { + SortedSet timePoints = new TreeSet<>(); + + for (Tuple2> tuple : iterable) { + timePoints.addAll(tuple.f1.keySet()); + } + + for (Long timePoint: timePoints) { + collector.collect(new Tuple1<>(timePoint)); + } + + } +} diff --git a/gradoop-temporal/src/main/java/org/gradoop/temporal/model/impl/operators/metric/functions/GroupDegreeTreesToVariance.java b/gradoop-temporal/src/main/java/org/gradoop/temporal/model/impl/operators/metric/functions/GroupDegreeTreesToVariance.java new file mode 100644 index 000000000000..34175302456a --- /dev/null +++ b/gradoop-temporal/src/main/java/org/gradoop/temporal/model/impl/operators/metric/functions/GroupDegreeTreesToVariance.java @@ -0,0 +1,116 @@ +/* + * Copyright © 2014 - 2021 Leipzig University (Database Research Group) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.gradoop.temporal.model.impl.operators.metric.functions; + +import org.apache.flink.api.common.functions.GroupReduceFunction; +import org.apache.flink.api.java.DataSet; +import org.apache.flink.api.java.tuple.Tuple1; +import org.apache.flink.api.java.tuple.Tuple2; +import org.apache.flink.util.Collector; +import org.gradoop.common.model.impl.id.GradoopId; + +import java.util.*; +import java.util.stream.Stream; + +/** + * A group reduce function that merges all Tuples (vId, degreeTree) to a dataset of tuples (time, aggDegree) + * that represents the aggregated degree value for the whole graph at the given time. + */ +public class GroupDegreeTreesToVariance + implements GroupReduceFunction>, Tuple2> { + + /** + * The timestamps where at least one vertex degree changes. + */ + private final SortedSet timePoints; + + /** + * Creates an instance of this group reduce function. + * + * @param timePoints the timestamps where vertex degree changes. + */ + public GroupDegreeTreesToVariance(DataSet> timePoints) { + + List> tuples; + try { + tuples = timePoints.collect(); + this.timePoints = new TreeSet<>(); + + for (int i = 0; i < timePoints.count(); i = i + 1) { + this.timePoints.add(tuples.get(i).getField(0)); + } + } catch (Exception e) { + throw new RuntimeException(e); + } + + } + + @Override + public void reduce(Iterable>> iterable, + Collector> collector) throws Exception { + + // init necessary maps and set + HashMap> degreeTrees = new HashMap<>(); + HashMap vertexDegrees = new HashMap<>(); + + // convert the iterables to a hashmap and remember all possible timestamps + for (Tuple2> tuple : iterable) { + degreeTrees.put(tuple.f0, tuple.f1); + } + + int numberOfVertices = degreeTrees.size(); + + // Add default times + timePoints.add(Long.MIN_VALUE); + + for (Long timePoint : timePoints) { + // skip last default time + if (Long.MAX_VALUE == timePoint) { + continue; + } + // Iterate over all vertices + for (Map.Entry> entry : degreeTrees.entrySet()) { + // Make sure the vertex is registered in the current vertexDegrees capture + if (!vertexDegrees.containsKey(entry.getKey())) { + vertexDegrees.put(entry.getKey(), 0); + } + + // Check if timestamp is in tree, if not, take the lower key + if (entry.getValue().containsKey(timePoint)) { + vertexDegrees.put(entry.getKey(), entry.getValue().get(timePoint)); + } else { + Long lowerKey = entry.getValue().lowerKey(timePoint); + if (lowerKey != null) { + vertexDegrees.put(entry.getKey(), entry.getValue().get(lowerKey)); + } + } + } + + Optional opt = vertexDegrees.values().stream().reduce(Math::addExact); + Optional opt2 = Optional.empty(); + + double mean; + + if (opt.isPresent()) { + mean = (double) opt.get() / (double) numberOfVertices; + opt2 = Optional.of(vertexDegrees.values().stream().mapToDouble(val -> (val - mean) * (val - mean)).sum()); + } + + opt2.ifPresent(val -> collector.collect( + new Tuple2<>(timePoint, val / (double) numberOfVertices))); + } + } +} diff --git a/gradoop-temporal/src/main/java/org/gradoop/temporal/model/impl/operators/metric/functions/TransformDeltaToAbsoluteDegreeTree.java b/gradoop-temporal/src/main/java/org/gradoop/temporal/model/impl/operators/metric/functions/TransformDeltaToAbsoluteDegreeTree.java new file mode 100644 index 000000000000..85b8c1fadb2d --- /dev/null +++ b/gradoop-temporal/src/main/java/org/gradoop/temporal/model/impl/operators/metric/functions/TransformDeltaToAbsoluteDegreeTree.java @@ -0,0 +1,55 @@ +/* + * Copyright © 2014 - 2021 Leipzig University (Database Research Group) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.gradoop.temporal.model.impl.operators.metric.functions; + +import org.apache.flink.api.common.functions.MapFunction; +import org.apache.flink.api.java.functions.FunctionAnnotation; +import org.apache.flink.api.java.tuple.Tuple2; +import org.gradoop.common.model.impl.id.GradoopId; + +import java.util.Map; +import java.util.TreeMap; + +/** + * Replaces the degree tree, that just stores the degree changes for each time, with a degree tree that + * stores the actual degree of the vertex at that time. + */ +@FunctionAnnotation.ForwardedFields("f0") +public class TransformDeltaToAbsoluteDegreeTree + implements MapFunction>, + Tuple2>> { + + /** + * To reduce object instantiations. + */ + private TreeMap absoluteDegreeTree; + + @Override + public Tuple2> map( + Tuple2> vIdTreeMapTuple) throws Exception { + // init the degree and the temporal tree + int degree = 0; + absoluteDegreeTree = new TreeMap<>(); + + // aggregate the degrees + for (Map.Entry entry : vIdTreeMapTuple.f1.entrySet()) { + degree += entry.getValue(); + absoluteDegreeTree.put(entry.getKey(), degree); + } + vIdTreeMapTuple.f1 = absoluteDegreeTree; + return vIdTreeMapTuple; + } +} \ No newline at end of file diff --git a/gradoop-temporal/src/test/java/org/gradoop/temporal/model/impl/operators/metric/DegreeVarianceEvolutionTest.java b/gradoop-temporal/src/test/java/org/gradoop/temporal/model/impl/operators/metric/DegreeVarianceEvolutionTest.java new file mode 100644 index 000000000000..be895b88f261 --- /dev/null +++ b/gradoop-temporal/src/test/java/org/gradoop/temporal/model/impl/operators/metric/DegreeVarianceEvolutionTest.java @@ -0,0 +1,146 @@ +/* + * Copyright © 2014 - 2021 Leipzig University (Database Research Group) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.gradoop.temporal.model.impl.operators.metric; + +import org.apache.flink.api.common.typeinfo.TypeHint; +import org.apache.flink.api.java.DataSet; +import org.apache.flink.api.java.io.LocalCollectionOutputFormat; +import org.apache.flink.api.java.tuple.Tuple2; +import org.gradoop.common.model.impl.id.GradoopId; +import org.gradoop.flink.model.impl.operators.sampling.functions.VertexDegree; +import org.gradoop.temporal.model.api.TimeDimension; +import org.gradoop.temporal.model.impl.TemporalGraph; +import org.gradoop.temporal.util.TemporalGradoopTestBase; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.HashSet; +import java.util.List; + +import static org.junit.Assert.assertTrue; + +@RunWith(Parameterized.class) +public class DegreeVarianceEvolutionTest extends TemporalGradoopTestBase { + /** + * The expected in-degrees for each vertex label. + */ + private static final List> EXPECTED_IN_DEGREES = new ArrayList<>(); + /** + * The expected out-degrees for each vertex label. + */ + private static final List> EXPECTED_OUT_DEGREES = new ArrayList<>(); + /** + * The expected degrees for each vertex label. + */ + private static final List> EXPECTED_BOTH_DEGREES = new ArrayList<>(); + + static { + // IN DEGREES + EXPECTED_IN_DEGREES.add(new Tuple2<>(Long.MIN_VALUE, 0.0)); + EXPECTED_IN_DEGREES.add(new Tuple2<>(0L, 0.25)); // 0.1875 + EXPECTED_IN_DEGREES.add(new Tuple2<>(4L, 0.56)); // 0.5 + EXPECTED_IN_DEGREES.add(new Tuple2<>(5L, 0.25)); + EXPECTED_IN_DEGREES.add(new Tuple2<>(6L, 0.25)); + EXPECTED_IN_DEGREES.add(new Tuple2<>(7L, 0.1875)); + + // OUT DEGREES + EXPECTED_OUT_DEGREES.add(new Tuple2<>(Long.MIN_VALUE, 0.0)); + EXPECTED_OUT_DEGREES.add(new Tuple2<>(0L, 0.25)); // 0.1875 + EXPECTED_OUT_DEGREES.add(new Tuple2<>(4L, 0.608)); // 0.5 + EXPECTED_OUT_DEGREES.add(new Tuple2<>(5L, 0.25)); + EXPECTED_OUT_DEGREES.add(new Tuple2<>(6L, 0.25)); + EXPECTED_OUT_DEGREES.add(new Tuple2<>(7L, 0.1875)); + + // DEGREES + EXPECTED_BOTH_DEGREES.add(new Tuple2<>(Long.MIN_VALUE, 0.0)); + EXPECTED_BOTH_DEGREES.add(new Tuple2<>(0L, 0.0)); // 0.24000000000000005 + EXPECTED_BOTH_DEGREES.add(new Tuple2<>(4L, 0.8)); // 0.64 + EXPECTED_BOTH_DEGREES.add(new Tuple2<>(5L, 0.0)); // 0.16 + EXPECTED_BOTH_DEGREES.add(new Tuple2<>(6L, 0.5)); // 0.56 + EXPECTED_BOTH_DEGREES.add(new Tuple2<>(7L, 0.25)); // 0.24000000000000005 + } + + /** + * The degree type to test. + */ + @Parameterized.Parameter(0) + public VertexDegree degreeType; + + /** + * The expected degree variance evolution for the given type. + */ + @Parameterized.Parameter(1) + public List> expectedDegrees; + + /** + * The temporal graph to test the operator. + */ + TemporalGraph testGraph; + + /** + * The parameters to test the operator. + * + * @return three different vertex degree types with its corresponding expected degree evolution. + */ + @Parameterized.Parameters(name = "Test degree type {0}.") + public static Iterable parameters() { + return Arrays.asList( + new Object[] {VertexDegree.IN, EXPECTED_IN_DEGREES}, + new Object[] {VertexDegree.OUT, EXPECTED_OUT_DEGREES}, + new Object[] {VertexDegree.BOTH, EXPECTED_BOTH_DEGREES}); + } + + /** + * Set up the test graph and create the id-label mapping. + * + * @throws Exception in case of an error + */ + @Before + public void setUp() throws Exception { + testGraph = getTestGraphWithValues(); + Collection> idLabelCollection = new HashSet<>(); + testGraph.getVertices().map(v -> new Tuple2<>(v.getId(), v.getLabel())) + .returns(new TypeHint>() { + }).output(new LocalCollectionOutputFormat<>(idLabelCollection)); + getExecutionEnvironment().execute(); + } + + /** + * Test the degree variance evolution operator. + * + * @throws Exception in case of an error. + */ + @Test + public void testDegreeVariance() throws Exception { + Collection> resultCollection = new ArrayList<>(); + + final DataSet> resultDataSet = testGraph + .callForValue(new DegreeVarianceEvolution(degreeType, TimeDimension.VALID_TIME)); + + resultDataSet.output(new LocalCollectionOutputFormat<>(resultCollection)); + getExecutionEnvironment().execute(); + + System.out.println(resultCollection); + + assertTrue(resultCollection.containsAll(expectedDegrees)); + assertTrue(expectedDegrees.containsAll(resultCollection)); + } +} From f47e0f6f421da3717b9406e8ea8417724886ab2c Mon Sep 17 00:00:00 2001 From: Alwine Balfanz <100916390+alwba@users.noreply.github.com> Date: Thu, 11 Aug 2022 14:59:30 +0200 Subject: [PATCH 2/2] [#1571] change to timestamp extraction inside aggregation group reduce and adjust unit test --- .../metric/DegreeVarianceEvolution.java | 80 +++---- .../functions/ExtractAllTimePointsReduce.java | 49 ----- .../functions/GroupDegreeTreesToVariance.java | 145 ++++++------ .../TransformDeltaToAbsoluteDegreeTree.java | 40 ++-- .../metric/DegreeVarianceEvolutionTest.java | 206 +++++++++--------- 5 files changed, 223 insertions(+), 297 deletions(-) delete mode 100644 gradoop-temporal/src/main/java/org/gradoop/temporal/model/impl/operators/metric/functions/ExtractAllTimePointsReduce.java diff --git a/gradoop-temporal/src/main/java/org/gradoop/temporal/model/impl/operators/metric/DegreeVarianceEvolution.java b/gradoop-temporal/src/main/java/org/gradoop/temporal/model/impl/operators/metric/DegreeVarianceEvolution.java index e5505e33f67e..a77e640aacb6 100644 --- a/gradoop-temporal/src/main/java/org/gradoop/temporal/model/impl/operators/metric/DegreeVarianceEvolution.java +++ b/gradoop-temporal/src/main/java/org/gradoop/temporal/model/impl/operators/metric/DegreeVarianceEvolution.java @@ -16,66 +16,56 @@ package org.gradoop.temporal.model.impl.operators.metric; import org.apache.flink.api.java.DataSet; -import org.apache.flink.api.java.tuple.Tuple1; import org.apache.flink.api.java.tuple.Tuple2; -import org.gradoop.common.model.impl.id.GradoopId; import org.gradoop.flink.model.api.operators.UnaryBaseGraphToValueOperator; import org.gradoop.flink.model.impl.operators.sampling.functions.VertexDegree; import org.gradoop.temporal.model.api.TimeDimension; import org.gradoop.temporal.model.impl.TemporalGraph; -import org.gradoop.temporal.model.impl.operators.metric.functions.*; -import org.gradoop.temporal.model.impl.operators.metric.functions.ExtractAllTimePointsReduce; +import org.gradoop.temporal.model.impl.operators.metric.functions.TransformDeltaToAbsoluteDegreeTree; +import org.gradoop.temporal.model.impl.operators.metric.functions.FlatMapVertexIdEdgeInterval; +import org.gradoop.temporal.model.impl.operators.metric.functions.BuildTemporalDegreeTree; import org.gradoop.temporal.model.impl.operators.metric.functions.GroupDegreeTreesToVariance; import java.util.Objects; -import java.util.TreeMap; /** * Operator that calculates the degree variance evolution of a temporal graph for the * whole lifetime of the graph. */ public class DegreeVarianceEvolution implements UnaryBaseGraphToValueOperator>> { - /** - * The time dimension that will be considered. - */ - private final TimeDimension dimension; + /** + * The time dimension that will be considered. + */ + private final TimeDimension dimension; - /** - * The degree type (IN, OUT, BOTH); - */ - private final VertexDegree degreeType; + /** + * The degree type (IN, OUT, BOTH); + */ + private final VertexDegree degreeType; - /** - * Creates an instance of this average degree evolution operator. - * - * @param degreeType the degree type to use (IN, OUT, BOTH). - * @param dimension the time dimension to use (VALID_TIME, TRANSACTION_TIME). - */ - public DegreeVarianceEvolution(VertexDegree degreeType, TimeDimension dimension) { - this.degreeType = Objects.requireNonNull(degreeType); - this.dimension = Objects.requireNonNull(dimension); - } + /** + * Creates an instance of this average degree evolution operator. + * + * @param degreeType the degree type to use (IN, OUT, BOTH). + * @param dimension the time dimension to use (VALID_TIME, TRANSACTION_TIME). + */ + public DegreeVarianceEvolution(VertexDegree degreeType, TimeDimension dimension) { + this.degreeType = Objects.requireNonNull(degreeType); + this.dimension = Objects.requireNonNull(dimension); + } - @Override - public DataSet> execute(TemporalGraph graph) { - DataSet>> absoluteDegreeTrees = graph.getEdges() - // 1) Extract vertex id(s) and corresponding time intervals - .flatMap(new FlatMapVertexIdEdgeInterval(dimension, degreeType)) - // 2) Group them by the vertex id - .groupBy(0) - // 3) For each vertex id, build a degree tree data structure - .reduceGroup(new BuildTemporalDegreeTree()) - // 4) Transform each tree to aggregated evolution - .map(new TransformDeltaToAbsoluteDegreeTree()); - - DataSet> timePoints = absoluteDegreeTrees - // 5) extract all timestamps where degree of any vertex changes - .reduceGroup(new ExtractAllTimePointsReduce()) - .distinct(); - - return absoluteDegreeTrees - // join with interval degree mappings - // 6) Merge trees together and calculate aggregation - .reduceGroup(new GroupDegreeTreesToVariance(timePoints)); - } + @Override + public DataSet> execute(TemporalGraph graph) { + return graph.getEdges() + // 1) Extract vertex id(s) and corresponding time intervals + .flatMap(new FlatMapVertexIdEdgeInterval(dimension, degreeType)) + // 2) Group them by the vertex id + .groupBy(0) + // 3) For each vertex id, build a degree tree data structure + .reduceGroup(new BuildTemporalDegreeTree()) + // 4) Transform each tree to aggregated evolution + .map(new TransformDeltaToAbsoluteDegreeTree()) + // 6) Merge trees together and calculate aggregation + .reduceGroup(new GroupDegreeTreesToVariance()); + } } diff --git a/gradoop-temporal/src/main/java/org/gradoop/temporal/model/impl/operators/metric/functions/ExtractAllTimePointsReduce.java b/gradoop-temporal/src/main/java/org/gradoop/temporal/model/impl/operators/metric/functions/ExtractAllTimePointsReduce.java deleted file mode 100644 index ece67f4a2efd..000000000000 --- a/gradoop-temporal/src/main/java/org/gradoop/temporal/model/impl/operators/metric/functions/ExtractAllTimePointsReduce.java +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright © 2014 - 2021 Leipzig University (Database Research Group) - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.gradoop.temporal.model.impl.operators.metric.functions; - -import org.apache.flink.api.common.functions.GroupReduceFunction; -import org.apache.flink.api.java.tuple.Tuple1; -import org.apache.flink.api.java.tuple.Tuple2; -import org.apache.flink.util.Collector; -import org.gradoop.common.model.impl.id.GradoopId; - -import java.util.SortedSet; -import java.util.TreeMap; -import java.util.TreeSet; - -/** - * Reduce function to extract all timestamps where the degree of a vertex changes. - */ -public class ExtractAllTimePointsReduce implements GroupReduceFunction>, Tuple1> { - - public ExtractAllTimePointsReduce() { - } - - @Override - public void reduce(Iterable>> iterable, Collector> collector) throws Exception { - SortedSet timePoints = new TreeSet<>(); - - for (Tuple2> tuple : iterable) { - timePoints.addAll(tuple.f1.keySet()); - } - - for (Long timePoint: timePoints) { - collector.collect(new Tuple1<>(timePoint)); - } - - } -} diff --git a/gradoop-temporal/src/main/java/org/gradoop/temporal/model/impl/operators/metric/functions/GroupDegreeTreesToVariance.java b/gradoop-temporal/src/main/java/org/gradoop/temporal/model/impl/operators/metric/functions/GroupDegreeTreesToVariance.java index 34175302456a..81ee172c973f 100644 --- a/gradoop-temporal/src/main/java/org/gradoop/temporal/model/impl/operators/metric/functions/GroupDegreeTreesToVariance.java +++ b/gradoop-temporal/src/main/java/org/gradoop/temporal/model/impl/operators/metric/functions/GroupDegreeTreesToVariance.java @@ -16,101 +16,88 @@ package org.gradoop.temporal.model.impl.operators.metric.functions; import org.apache.flink.api.common.functions.GroupReduceFunction; -import org.apache.flink.api.java.DataSet; -import org.apache.flink.api.java.tuple.Tuple1; import org.apache.flink.api.java.tuple.Tuple2; import org.apache.flink.util.Collector; import org.gradoop.common.model.impl.id.GradoopId; -import java.util.*; -import java.util.stream.Stream; +import java.util.TreeSet; +import java.util.TreeMap; +import java.util.SortedSet; +import java.util.HashMap; +import java.util.Map; +import java.util.Optional; /** * A group reduce function that merges all Tuples (vId, degreeTree) to a dataset of tuples (time, aggDegree) * that represents the aggregated degree value for the whole graph at the given time. */ public class GroupDegreeTreesToVariance - implements GroupReduceFunction>, Tuple2> { - - /** - * The timestamps where at least one vertex degree changes. - */ - private final SortedSet timePoints; - - /** - * Creates an instance of this group reduce function. - * - * @param timePoints the timestamps where vertex degree changes. - */ - public GroupDegreeTreesToVariance(DataSet> timePoints) { - - List> tuples; - try { - tuples = timePoints.collect(); - this.timePoints = new TreeSet<>(); - - for (int i = 0; i < timePoints.count(); i = i + 1) { - this.timePoints.add(tuples.get(i).getField(0)); - } - } catch (Exception e) { - throw new RuntimeException(e); - } +implements GroupReduceFunction>, Tuple2> { - } + /** + * Creates an instance of this group reduce function. + * + */ + public GroupDegreeTreesToVariance() { + + } - @Override - public void reduce(Iterable>> iterable, - Collector> collector) throws Exception { + @Override + public void reduce(Iterable>> iterable, + Collector> collector) throws Exception { - // init necessary maps and set - HashMap> degreeTrees = new HashMap<>(); - HashMap vertexDegrees = new HashMap<>(); + // init necessary maps and set + HashMap> degreeTrees = new HashMap<>(); + HashMap vertexDegrees = new HashMap<>(); + SortedSet timePoints = new TreeSet<>(); + + // convert the iterables to a hashmap and remember all possible timestamps + for (Tuple2> tuple : iterable) { + degreeTrees.put(tuple.f0, tuple.f1); + timePoints.addAll(tuple.f1.keySet()); + } - // convert the iterables to a hashmap and remember all possible timestamps - for (Tuple2> tuple : iterable) { - degreeTrees.put(tuple.f0, tuple.f1); + int numberOfVertices = degreeTrees.size(); + + // Add default times + timePoints.add(Long.MIN_VALUE); + + for (Long timePoint : timePoints) { + // skip last default time + if (Long.MAX_VALUE == timePoint) { + continue; + } + // Iterate over all vertices + for (Map.Entry> entry : degreeTrees.entrySet()) { + // Make sure the vertex is registered in the current vertexDegrees capture + if (!vertexDegrees.containsKey(entry.getKey())) { + vertexDegrees.put(entry.getKey(), 0); } - int numberOfVertices = degreeTrees.size(); - - // Add default times - timePoints.add(Long.MIN_VALUE); - - for (Long timePoint : timePoints) { - // skip last default time - if (Long.MAX_VALUE == timePoint) { - continue; - } - // Iterate over all vertices - for (Map.Entry> entry : degreeTrees.entrySet()) { - // Make sure the vertex is registered in the current vertexDegrees capture - if (!vertexDegrees.containsKey(entry.getKey())) { - vertexDegrees.put(entry.getKey(), 0); - } - - // Check if timestamp is in tree, if not, take the lower key - if (entry.getValue().containsKey(timePoint)) { - vertexDegrees.put(entry.getKey(), entry.getValue().get(timePoint)); - } else { - Long lowerKey = entry.getValue().lowerKey(timePoint); - if (lowerKey != null) { - vertexDegrees.put(entry.getKey(), entry.getValue().get(lowerKey)); - } - } - } - - Optional opt = vertexDegrees.values().stream().reduce(Math::addExact); - Optional opt2 = Optional.empty(); - - double mean; - - if (opt.isPresent()) { - mean = (double) opt.get() / (double) numberOfVertices; - opt2 = Optional.of(vertexDegrees.values().stream().mapToDouble(val -> (val - mean) * (val - mean)).sum()); - } - - opt2.ifPresent(val -> collector.collect( - new Tuple2<>(timePoint, val / (double) numberOfVertices))); + // Check if timestamp is in tree, if not, take the lower key + if (entry.getValue().containsKey(timePoint)) { + vertexDegrees.put(entry.getKey(), entry.getValue().get(timePoint)); + } else { + Long lowerKey = entry.getValue().lowerKey(timePoint); + if (lowerKey != null) { + vertexDegrees.put(entry.getKey(), entry.getValue().get(lowerKey)); + } } + } + + Optional opt = vertexDegrees.values().stream().reduce(Math::addExact); + Optional opt2 = Optional.empty(); + + double mean; + + if (opt.isPresent()) { + mean = (double) opt.get() / (double) numberOfVertices; + opt2 = Optional.of(vertexDegrees.values().stream() + .mapToDouble(val -> (val - mean) * (val - mean)).sum()); + } + + opt2.ifPresent(val -> collector.collect( + new Tuple2<>(timePoint, val / (double) numberOfVertices))); } + } } diff --git a/gradoop-temporal/src/main/java/org/gradoop/temporal/model/impl/operators/metric/functions/TransformDeltaToAbsoluteDegreeTree.java b/gradoop-temporal/src/main/java/org/gradoop/temporal/model/impl/operators/metric/functions/TransformDeltaToAbsoluteDegreeTree.java index 85b8c1fadb2d..246412a4de16 100644 --- a/gradoop-temporal/src/main/java/org/gradoop/temporal/model/impl/operators/metric/functions/TransformDeltaToAbsoluteDegreeTree.java +++ b/gradoop-temporal/src/main/java/org/gradoop/temporal/model/impl/operators/metric/functions/TransformDeltaToAbsoluteDegreeTree.java @@ -29,27 +29,27 @@ */ @FunctionAnnotation.ForwardedFields("f0") public class TransformDeltaToAbsoluteDegreeTree - implements MapFunction>, - Tuple2>> { +implements MapFunction>, +Tuple2>> { - /** - * To reduce object instantiations. - */ - private TreeMap absoluteDegreeTree; + /** + * To reduce object instantiations. + */ + private TreeMap absoluteDegreeTree; - @Override - public Tuple2> map( - Tuple2> vIdTreeMapTuple) throws Exception { - // init the degree and the temporal tree - int degree = 0; - absoluteDegreeTree = new TreeMap<>(); + @Override + public Tuple2> map( + Tuple2> vIdTreeMapTuple) throws Exception { + // init the degree and the temporal tree + int degree = 0; + absoluteDegreeTree = new TreeMap<>(); - // aggregate the degrees - for (Map.Entry entry : vIdTreeMapTuple.f1.entrySet()) { - degree += entry.getValue(); - absoluteDegreeTree.put(entry.getKey(), degree); - } - vIdTreeMapTuple.f1 = absoluteDegreeTree; - return vIdTreeMapTuple; + // aggregate the degrees + for (Map.Entry entry : vIdTreeMapTuple.f1.entrySet()) { + degree += entry.getValue(); + absoluteDegreeTree.put(entry.getKey(), degree); } -} \ No newline at end of file + vIdTreeMapTuple.f1 = absoluteDegreeTree; + return vIdTreeMapTuple; + } +} diff --git a/gradoop-temporal/src/test/java/org/gradoop/temporal/model/impl/operators/metric/DegreeVarianceEvolutionTest.java b/gradoop-temporal/src/test/java/org/gradoop/temporal/model/impl/operators/metric/DegreeVarianceEvolutionTest.java index be895b88f261..f8ae9771b0db 100644 --- a/gradoop-temporal/src/test/java/org/gradoop/temporal/model/impl/operators/metric/DegreeVarianceEvolutionTest.java +++ b/gradoop-temporal/src/test/java/org/gradoop/temporal/model/impl/operators/metric/DegreeVarianceEvolutionTest.java @@ -39,108 +39,106 @@ @RunWith(Parameterized.class) public class DegreeVarianceEvolutionTest extends TemporalGradoopTestBase { - /** - * The expected in-degrees for each vertex label. - */ - private static final List> EXPECTED_IN_DEGREES = new ArrayList<>(); - /** - * The expected out-degrees for each vertex label. - */ - private static final List> EXPECTED_OUT_DEGREES = new ArrayList<>(); - /** - * The expected degrees for each vertex label. - */ - private static final List> EXPECTED_BOTH_DEGREES = new ArrayList<>(); - - static { - // IN DEGREES - EXPECTED_IN_DEGREES.add(new Tuple2<>(Long.MIN_VALUE, 0.0)); - EXPECTED_IN_DEGREES.add(new Tuple2<>(0L, 0.25)); // 0.1875 - EXPECTED_IN_DEGREES.add(new Tuple2<>(4L, 0.56)); // 0.5 - EXPECTED_IN_DEGREES.add(new Tuple2<>(5L, 0.25)); - EXPECTED_IN_DEGREES.add(new Tuple2<>(6L, 0.25)); - EXPECTED_IN_DEGREES.add(new Tuple2<>(7L, 0.1875)); - - // OUT DEGREES - EXPECTED_OUT_DEGREES.add(new Tuple2<>(Long.MIN_VALUE, 0.0)); - EXPECTED_OUT_DEGREES.add(new Tuple2<>(0L, 0.25)); // 0.1875 - EXPECTED_OUT_DEGREES.add(new Tuple2<>(4L, 0.608)); // 0.5 - EXPECTED_OUT_DEGREES.add(new Tuple2<>(5L, 0.25)); - EXPECTED_OUT_DEGREES.add(new Tuple2<>(6L, 0.25)); - EXPECTED_OUT_DEGREES.add(new Tuple2<>(7L, 0.1875)); - - // DEGREES - EXPECTED_BOTH_DEGREES.add(new Tuple2<>(Long.MIN_VALUE, 0.0)); - EXPECTED_BOTH_DEGREES.add(new Tuple2<>(0L, 0.0)); // 0.24000000000000005 - EXPECTED_BOTH_DEGREES.add(new Tuple2<>(4L, 0.8)); // 0.64 - EXPECTED_BOTH_DEGREES.add(new Tuple2<>(5L, 0.0)); // 0.16 - EXPECTED_BOTH_DEGREES.add(new Tuple2<>(6L, 0.5)); // 0.56 - EXPECTED_BOTH_DEGREES.add(new Tuple2<>(7L, 0.25)); // 0.24000000000000005 - } - - /** - * The degree type to test. - */ - @Parameterized.Parameter(0) - public VertexDegree degreeType; - - /** - * The expected degree variance evolution for the given type. - */ - @Parameterized.Parameter(1) - public List> expectedDegrees; - - /** - * The temporal graph to test the operator. - */ - TemporalGraph testGraph; - - /** - * The parameters to test the operator. - * - * @return three different vertex degree types with its corresponding expected degree evolution. - */ - @Parameterized.Parameters(name = "Test degree type {0}.") - public static Iterable parameters() { - return Arrays.asList( - new Object[] {VertexDegree.IN, EXPECTED_IN_DEGREES}, - new Object[] {VertexDegree.OUT, EXPECTED_OUT_DEGREES}, - new Object[] {VertexDegree.BOTH, EXPECTED_BOTH_DEGREES}); - } - - /** - * Set up the test graph and create the id-label mapping. - * - * @throws Exception in case of an error - */ - @Before - public void setUp() throws Exception { - testGraph = getTestGraphWithValues(); - Collection> idLabelCollection = new HashSet<>(); - testGraph.getVertices().map(v -> new Tuple2<>(v.getId(), v.getLabel())) - .returns(new TypeHint>() { - }).output(new LocalCollectionOutputFormat<>(idLabelCollection)); - getExecutionEnvironment().execute(); - } - - /** - * Test the degree variance evolution operator. - * - * @throws Exception in case of an error. - */ - @Test - public void testDegreeVariance() throws Exception { - Collection> resultCollection = new ArrayList<>(); - - final DataSet> resultDataSet = testGraph - .callForValue(new DegreeVarianceEvolution(degreeType, TimeDimension.VALID_TIME)); - - resultDataSet.output(new LocalCollectionOutputFormat<>(resultCollection)); - getExecutionEnvironment().execute(); - - System.out.println(resultCollection); - - assertTrue(resultCollection.containsAll(expectedDegrees)); - assertTrue(expectedDegrees.containsAll(resultCollection)); - } + /** + * The expected in-degrees for each vertex label. + */ + private static final List> EXPECTED_IN_DEGREES = new ArrayList<>(); + /** + * The expected out-degrees for each vertex label. + */ + private static final List> EXPECTED_OUT_DEGREES = new ArrayList<>(); + /** + * The expected degrees for each vertex label. + */ + private static final List> EXPECTED_BOTH_DEGREES = new ArrayList<>(); + + static { + // IN DEGREES + EXPECTED_IN_DEGREES.add(new Tuple2<>(Long.MIN_VALUE, 0.0)); + EXPECTED_IN_DEGREES.add(new Tuple2<>(0L, 0.1875)); + EXPECTED_IN_DEGREES.add(new Tuple2<>(4L, 0.5)); + EXPECTED_IN_DEGREES.add(new Tuple2<>(5L, 0.25)); + EXPECTED_IN_DEGREES.add(new Tuple2<>(6L, 0.25)); + EXPECTED_IN_DEGREES.add(new Tuple2<>(7L, 0.1875)); + + // OUT DEGREES + EXPECTED_OUT_DEGREES.add(new Tuple2<>(Long.MIN_VALUE, 0.0)); + EXPECTED_OUT_DEGREES.add(new Tuple2<>(0L, 0.1875)); + EXPECTED_OUT_DEGREES.add(new Tuple2<>(4L, 0.5)); + EXPECTED_OUT_DEGREES.add(new Tuple2<>(5L, 0.25)); + EXPECTED_OUT_DEGREES.add(new Tuple2<>(6L, 0.25)); + EXPECTED_OUT_DEGREES.add(new Tuple2<>(7L, 0.1875)); + + // DEGREES + EXPECTED_BOTH_DEGREES.add(new Tuple2<>(Long.MIN_VALUE, 0.0)); + EXPECTED_BOTH_DEGREES.add(new Tuple2<>(0L, 0.24000000000000005)); + EXPECTED_BOTH_DEGREES.add(new Tuple2<>(4L, 0.64)); + EXPECTED_BOTH_DEGREES.add(new Tuple2<>(5L, 0.16)); + EXPECTED_BOTH_DEGREES.add(new Tuple2<>(6L, 0.56)); + EXPECTED_BOTH_DEGREES.add(new Tuple2<>(7L, 0.24000000000000005)); + } + + /** + * The degree type to test. + */ + @Parameterized.Parameter(0) + public VertexDegree degreeType; + + /** + * The expected degree variance evolution for the given type. + */ + @Parameterized.Parameter(1) + public List> expectedDegrees; + + /** + * The temporal graph to test the operator. + */ + TemporalGraph testGraph; + + /** + * The parameters to test the operator. + * + * @return three different vertex degree types with its corresponding expected degree evolution. + */ + @Parameterized.Parameters(name = "Test degree type {0}.") + public static Iterable parameters() { + return Arrays.asList( + new Object[]{VertexDegree.IN, EXPECTED_IN_DEGREES}, + new Object[]{VertexDegree.OUT, EXPECTED_OUT_DEGREES}, + new Object[]{VertexDegree.BOTH, EXPECTED_BOTH_DEGREES}); + } + + /** + * Set up the test graph and create the id-label mapping. + * + * @throws Exception in case of an error + */ + @Before + public void setUp() throws Exception { + testGraph = getTestGraphWithValues(); + Collection> idLabelCollection = new HashSet<>(); + testGraph.getVertices().map(v -> new Tuple2<>(v.getId(), v.getLabel())) + .returns(new TypeHint>() { + }).output(new LocalCollectionOutputFormat<>(idLabelCollection)); + getExecutionEnvironment().execute(); + } + + /** + * Test the degree variance evolution operator. + * + * @throws Exception in case of an error. + */ + @Test + public void testDegreeVariance() throws Exception { + Collection> resultCollection = new ArrayList<>(); + + final DataSet> resultDataSet = testGraph + .callForValue(new DegreeVarianceEvolution(degreeType, TimeDimension.VALID_TIME)); + + resultDataSet.output(new LocalCollectionOutputFormat<>(resultCollection)); + getExecutionEnvironment().execute(); + + assertTrue(resultCollection.containsAll(expectedDegrees)); + assertTrue(expectedDegrees.containsAll(resultCollection)); + } }