From c8a68dec5d3ee86e36f8560fb8c7b287a5538386 Mon Sep 17 00:00:00 2001 From: Michael Pollmeier Date: Tue, 26 Mar 2024 09:57:57 +0100 Subject: [PATCH] unify node|edge|property kind ranges in schema (#165) * same naming for properties: should also be propertyKinds * centrally encode the fact that our ranges are 0.until(numberOfXyzKind) in the schema, not distrubuted across all usage sites --- .../LoadingAndMemoryBenchmarks.scala | 4 +-- core/src/main/scala/flatgraph/Accessors.scala | 10 +++---- .../scala/flatgraph/DiffGraphApplier.scala | 26 +++++++++---------- core/src/main/scala/flatgraph/Graph.scala | 10 +++---- core/src/main/scala/flatgraph/Schema.scala | 11 +++++--- .../main/scala/flatgraph/misc/DebugDump.scala | 10 +++---- .../main/scala/flatgraph/misc/TestUtils.scala | 2 +- .../flatgraph/storage/Deserialization.scala | 10 +++---- .../flatgraph/storage/Serialization.scala | 10 +++---- 9 files changed, 49 insertions(+), 44 deletions(-) diff --git a/benchmarks/src/main/scala/flatgraph/benchmark/LoadingAndMemoryBenchmarks.scala b/benchmarks/src/main/scala/flatgraph/benchmark/LoadingAndMemoryBenchmarks.scala index 9572b608..7d43bd26 100644 --- a/benchmarks/src/main/scala/flatgraph/benchmark/LoadingAndMemoryBenchmarks.scala +++ b/benchmarks/src/main/scala/flatgraph/benchmark/LoadingAndMemoryBenchmarks.scala @@ -106,7 +106,7 @@ object LoadingAndMemoryBenchmarks { var count = 0 for { nodesArray <- graph.nodesArray - edgeKind <- Range(0, graph.schema.getNumberOfEdgeKinds).iterator + edgeKind <- graph.schema.edgeKinds.iterator node <- nodesArray } count += flatgraph.Accessors.getNeighborsOut(node, edgeKind).length @@ -168,7 +168,7 @@ object LoadingAndMemoryBenchmarks { val nodecount = cpgBox.result.asInstanceOf[flatgraph.Graph].livingNodeCountByKind.sum val filesize = new java.io.File("./cpg.fg").length() val (nnodeKinds, npropKinds, nEdgeKinds) = Some(cpgBox.result.asInstanceOf[flatgraph.Graph].schema).map { s => - (s.getNumberOfNodeKinds, s.getNumberOfProperties, s.getNumberOfEdgeKinds) + (s.getNumberOfNodeKinds, s.getNumberOfPropertyKinds, s.getNumberOfEdgeKinds) }.get println( s"Graph with ${nodecount} nodes and ${touch1.result} edges. There are ${nnodeKinds} node kinds, ${npropKinds} property kinds and ${nEdgeKinds} edge kinds." diff --git a/core/src/main/scala/flatgraph/Accessors.scala b/core/src/main/scala/flatgraph/Accessors.scala index c31b39eb..b8689961 100644 --- a/core/src/main/scala/flatgraph/Accessors.scala +++ b/core/src/main/scala/flatgraph/Accessors.scala @@ -37,10 +37,10 @@ object Accessors { } def getEdgesOut(node: GNode): IndexedSeq[Edge] = - Range(0, node.graph.schema.getNumberOfEdgeKinds).flatMap(getEdgesOut(node, _)) + node.graph.schema.edgeKinds.flatMap(getEdgesOut(node, _)) def getEdgesIn(node: GNode): IndexedSeq[Edge] = - Range(0, node.graph.schema.getNumberOfEdgeKinds).flatMap(getEdgesIn(node, _)) + node.graph.schema.edgeKinds.flatMap(getEdgesIn(node, _)) class EdgeView(neighbors: Array[GNode], base: GNode, properties: Any, inout: Byte, edgeKind: Short, start: Int, end: Int) extends IndexedSeq[Edge] { @@ -83,14 +83,14 @@ object Accessors { /** follow _all_ OUT edges to their adjacent nodes */ def getNeighborsOut(g: Graph, nodeKind: Short, seq: Int): Iterator[GNode] = { - Range(0, g.schema.getNumberOfEdgeKinds).iterator.flatMap { edgeKind => + g.schema.edgeKinds.iterator.flatMap { edgeKind => getNeighborsOut(g, nodeKind, seq, edgeKind.toShort) } } /** follow _all_ IN edges to their adjacent nodes */ def getNeighborsIn(g: Graph, nodeKind: Short, seq: Int): Iterator[GNode] = { - Range(0, g.schema.getNumberOfEdgeKinds).iterator.flatMap { edgeKind => + g.schema.edgeKinds.iterator.flatMap { edgeKind => getNeighborsIn(g, nodeKind, seq, edgeKind.toShort) } } @@ -162,7 +162,7 @@ object Accessors { def getNodeProperties(node: GNode): IterableOnce[(String, AnyRef)] = { val schema = node.graph.schema for { - propertyKind <- Range(0, schema.getNumberOfProperties) + propertyKind <- schema.propertyKinds property = Accessors.getNodeProperty(node, propertyKind) if property.nonEmpty propertyLabel = schema.getPropertyLabel(node.nodeKind, propertyKind) diff --git a/core/src/main/scala/flatgraph/DiffGraphApplier.scala b/core/src/main/scala/flatgraph/DiffGraphApplier.scala index 37d4da74..7e5f4412 100644 --- a/core/src/main/scala/flatgraph/DiffGraphApplier.scala +++ b/core/src/main/scala/flatgraph/DiffGraphApplier.scala @@ -184,20 +184,20 @@ private[flatgraph] class DiffGraphApplier(graph: Graph, diff: DiffGraphBuilder) // set edge properties for { - nodeKind <- Range(0, graph.schema.getNumberOfNodeKinds) - edgeKind <- Range(0, graph.schema.getNumberOfEdgeKinds) + nodeKind <- graph.schema.nodeKinds + edgeKind <- graph.schema.edgeKinds direction <- Edge.Direction.values } setEdgeProperty(nodeKind, direction, edgeKind) // remove edges for { - nodeKind <- Range(0, graph.schema.getNumberOfNodeKinds) - edgeKind <- Range(0, graph.schema.getNumberOfEdgeKinds) + nodeKind <- graph.schema.nodeKinds + edgeKind <- graph.schema.edgeKinds direction <- Edge.Direction.values } deleteEdges(nodeKind, direction, edgeKind) // add nodes - for (nodeKind <- Range(0, graph.schema.getNumberOfNodeKinds)) + for (nodeKind <- graph.schema.nodeKinds) addNodes(nodeKind) // delete nodes @@ -207,15 +207,15 @@ private[flatgraph] class DiffGraphApplier(graph: Graph, diff: DiffGraphBuilder) // add edges for { - nodeKind <- Range(0, graph.schema.getNumberOfNodeKinds) - edgeKind <- Range(0, graph.schema.getNumberOfEdgeKinds) + nodeKind <- graph.schema.nodeKinds + edgeKind <- graph.schema.edgeKinds direction <- Direction.values } addEdges(nodeKind, direction, edgeKind) // set node properties for { - nodeKind <- Range(0, graph.schema.getNumberOfNodeKinds) - propertyKind <- Range(0, graph.schema.getNumberOfProperties) + nodeKind <- graph.schema.nodeKinds + propertyKind <- graph.schema.propertyKinds } setNodeProperties(nodeKind, propertyKind) } @@ -249,7 +249,7 @@ private[flatgraph] class DiffGraphApplier(graph: Graph, diff: DiffGraphBuilder) // remove properties for { - propertyKind <- Range(0, graph.schema.getNumberOfProperties) + propertyKind <- graph.schema.propertyKinds deletedNode <- delNodes } { val pos = graph.schema.propertyOffsetArrayIndex(deletedNode.nodeKind, propertyKind) @@ -265,7 +265,7 @@ private[flatgraph] class DiffGraphApplier(graph: Graph, diff: DiffGraphBuilder) // delete incident edges for { - edgeKind <- Range(0, graph.schema.getNumberOfEdgeKinds) // this part can run in parallel + edgeKind <- graph.schema.edgeKinds // this part can run in parallel direction <- Direction.values deletedNode <- delNodes } { @@ -297,8 +297,8 @@ private[flatgraph] class DiffGraphApplier(graph: Graph, diff: DiffGraphBuilder) } // Now replacements is filled with the modifications. for { - nodeKind <- Range(0, graph.schema.getNumberOfNodeKinds) - edgeKind <- Range(0, graph.schema.getNumberOfEdgeKinds) + nodeKind <- graph.schema.nodeKinds + edgeKind <- graph.schema.edgeKinds direction <- Direction.values } { val pos = graph.schema.neighborOffsetArrayIndex(nodeKind, direction, edgeKind) diff --git a/core/src/main/scala/flatgraph/Graph.scala b/core/src/main/scala/flatgraph/Graph.scala index a4d26dc7..08ed197d 100644 --- a/core/src/main/scala/flatgraph/Graph.scala +++ b/core/src/main/scala/flatgraph/Graph.scala @@ -40,7 +40,7 @@ object Graph { class Graph(val schema: Schema, val storagePathMaybe: Option[Path] = None) extends AutoCloseable { private val nodeKindCount = schema.getNumberOfNodeKinds private val edgeKindCount = schema.getNumberOfEdgeKinds - private val propertiesCount = schema.getNumberOfProperties + private val propertiesCount = schema.getNumberOfPropertyKinds private var closed = false private[flatgraph] val livingNodeCountByKind: Array[Int] = new Array[Int](nodeKindCount) @@ -85,7 +85,7 @@ class Graph(val schema: Schema, val storagePathMaybe: Option[Path] = None) exten node(kindAndSeq.kind, kindAndSeq.seq) def allNodes: Iterator[GNode] = - Range(0, schema.getNumberOfNodeKinds).iterator.flatMap(_nodes) + schema.nodeKinds.iterator.flatMap(_nodes) def nodeCount(label: String): Int = livingNodeCountByKind(schema.getNodeKindByLabel(label)) @@ -104,7 +104,7 @@ class Graph(val schema: Schema, val storagePathMaybe: Option[Path] = None) exten case Schema.UndefinedKind => Iterator.empty case propertyKind => - Range(0, schema.getNumberOfNodeKinds).iterator.flatMap { nodeKind => + schema.nodeKinds.iterator.flatMap { nodeKind => Accessors.getWithInverseIndex(this, nodeKind, propertyKind, value) } } @@ -120,9 +120,9 @@ class Graph(val schema: Schema, val storagePathMaybe: Option[Path] = None) exten private def makeNeighbors() = { val neighbors = new Array[AnyRef](nodeKindCount * edgeKindCount * NeighborsSlotSize * NumberOfDirections) for { - nodeKind <- Range(0, nodeKindCount) + nodeKind <- schema.nodeKinds direction <- Edge.Direction.values - edgeKind <- Range(0, edgeKindCount) + edgeKind <- schema.edgeKinds pos = schema.neighborOffsetArrayIndex(nodeKind, direction, edgeKind) propertyDefault = schema.allocateEdgeProperty(nodeKind, direction, edgeKind, size = 1) value = diff --git a/core/src/main/scala/flatgraph/Schema.scala b/core/src/main/scala/flatgraph/Schema.scala index 60e39f8a..c6556e0c 100644 --- a/core/src/main/scala/flatgraph/Schema.scala +++ b/core/src/main/scala/flatgraph/Schema.scala @@ -96,7 +96,14 @@ object FormalQtyType { abstract class Schema { def getNumberOfNodeKinds: Int + def nodeKinds: Range = Range(0, getNumberOfNodeKinds) + def getNumberOfEdgeKinds: Int + def edgeKinds: Range = Range(0, getNumberOfEdgeKinds) + + def getNumberOfPropertyKinds: Int + def propertyKinds: Range = Range(0, getNumberOfPropertyKinds) + def getNodeLabel(nodeKind: Int): String def getNodeKindByLabel(label: String): Int @@ -109,8 +116,6 @@ abstract class Schema { def getPropertyLabel(nodeKind: Int, propertyKind: Int): String def getPropertyKindByName(label: String): Int - def getNumberOfProperties: Int - final def neighborOffsetArrayIndex(nodeKind: Int, direction: Edge.Direction, edgeKind: Int): Int = { val directionFactor: Int = direction.encoding 3 * (nodeKind + getNumberOfNodeKinds * (directionFactor + 2 * edgeKind)) @@ -166,7 +171,7 @@ class FreeSchema( override def getEdgeKindByLabel(label: String): Int = edgeMap.getOrElse(label, Schema.UndefinedKind) override def getPropertyLabel(nodeKind: Int, propertyKind: Int): String = propertyLabels(propertyKind) override def getPropertyKindByName(label: String): Int = propMap.getOrElse(label, Schema.UndefinedKind) - override def getNumberOfProperties: Int = propertyLabels.length + override def getNumberOfPropertyKinds: Int = propertyLabels.length override def makeNode(graph: Graph, nodeKind: Short, seq: Int): GNode = new GNode(graph, nodeKind, seq) override def makeEdge(src: GNode, dst: GNode, edgeKind: Short, subSeq: Int, property: Any): Edge = new Edge(src, dst, edgeKind, subSeq, property) diff --git a/core/src/main/scala/flatgraph/misc/DebugDump.scala b/core/src/main/scala/flatgraph/misc/DebugDump.scala index 3728e2a7..ed695cec 100644 --- a/core/src/main/scala/flatgraph/misc/DebugDump.scala +++ b/core/src/main/scala/flatgraph/misc/DebugDump.scala @@ -31,9 +31,9 @@ object DebugDump { sb.append(s"#Node numbers (kindId, nnodes) ${numstr}, total ${g.nodesArray.iterator.map { _.size }.sum}\n") - for (nodeKind <- Range(0, g.schema.getNumberOfNodeKinds)) { + for (nodeKind <- g.schema.nodeKinds) { sb.append(s"Node kind ${nodeKind}. (eid, nEdgesOut, nEdgesIn):") - for (edgeKind <- Range(0, g.schema.getNumberOfEdgeKinds)) { + for (edgeKind <- g.schema.edgeKinds) { val posOut = g.schema.neighborOffsetArrayIndex(nodeKind, Outgoing, edgeKind) val neO = g.neighbors(posOut + 1) match { case null => "0 [NA]" @@ -50,7 +50,7 @@ object DebugDump { for (n <- g._nodes(nodeKind)) { val properties = mutable.ArrayBuffer.empty[String] - for (propertyKind <- Range(0, g.schema.getNumberOfProperties)) { + for (propertyKind <- g.schema.propertyKinds) { val propertyLabel = g.schema.getPropertyLabel(nodeKind, propertyKind) val p = Accessors.getNodeProperty(n, propertyKind) if (p.nonEmpty) @@ -68,7 +68,7 @@ object DebugDump { sb.append(s" ${printNode(n)} : " + properties.mkString(", ") + "\n") } - for (edgeKind <- Range(0, g.schema.getNumberOfEdgeKinds)) { + for (edgeKind <- g.schema.edgeKinds) { val edgeLabel = g.schema.getEdgeLabel(nodeKind, edgeKind) val edgesOut = Accessors.getEdgesOut(n, edgeKind) assert( @@ -82,7 +82,7 @@ object DebugDump { sb.append(s" ${printNode(n)} [${edgeLabel}] -> " + edgesOut.map { e => printNode(e.dst, e.property) }.mkString(", ") + "\n") } } - for (edgeKind <- Range(0, g.schema.getNumberOfEdgeKinds)) { + for (edgeKind <- g.schema.edgeKinds) { val edgeLabel = g.schema.getEdgeLabel(nodeKind, edgeKind) val edgesIn = Accessors.getEdgesIn(n, edgeKind) assert(Accessors.getNeighborsIn(n, edgeKind).to(Seq) == edgesIn.map(_.src).to(Seq)) diff --git a/core/src/main/scala/flatgraph/misc/TestUtils.scala b/core/src/main/scala/flatgraph/misc/TestUtils.scala index e55c8d56..db8702e5 100644 --- a/core/src/main/scala/flatgraph/misc/TestUtils.scala +++ b/core/src/main/scala/flatgraph/misc/TestUtils.scala @@ -30,7 +30,7 @@ object TestUtils { val schema = graph.schema val newGraph = new Graph(schema, storagePathMaybe) - for (kind <- Range(0, schema.getNumberOfNodeKinds)) { + for (kind <- schema.nodeKinds) { newGraph.nodesArray(kind) = graph.nodesArray(kind).clone() newGraph.nodesArray(kind).mapInPlace { oldNode => val newNode = schema.makeNode(newGraph, oldNode.nodeKind, oldNode.seq()) diff --git a/core/src/main/scala/flatgraph/storage/Deserialization.scala b/core/src/main/scala/flatgraph/storage/Deserialization.scala index 0872599a..51d2d4ec 100644 --- a/core/src/main/scala/flatgraph/storage/Deserialization.scala +++ b/core/src/main/scala/flatgraph/storage/Deserialization.scala @@ -25,7 +25,7 @@ object Deserialization { else None val g = new Graph(schema, storagePathMaybe) val nodekinds = mutable.HashMap[String, Short]() - for (nodeKind <- Range(0, g.schema.getNumberOfNodeKinds)) nodekinds(g.schema.getNodeLabel(nodeKind)) = nodeKind.toShort + for (nodeKind <- g.schema.nodeKinds) nodekinds(g.schema.getNodeLabel(nodeKind)) = nodeKind.toShort val kindRemapper = Array.fill(manifest.nodes.size)(-1.toShort) val nodeRemapper = new Array[Array[GNode]](manifest.nodes.length) for { @@ -49,8 +49,8 @@ object Deserialization { val edgeKinds = mutable.HashMap[(String, String), Short]() for { - nodeKind <- Range(0, g.schema.getNumberOfNodeKinds) - edgeKind <- Range(0, g.schema.getNumberOfEdgeKinds) + nodeKind <- g.schema.nodeKinds + edgeKind <- g.schema.edgeKinds } { val nodeLabel = g.schema.getNodeLabel(nodeKind) val edgeLabel = g.schema.getEdgeLabel(nodeKind, edgeKind) @@ -75,8 +75,8 @@ object Deserialization { val propertykinds = mutable.HashMap[(String, String), Int]() for { - nodeKind <- Range(0, g.schema.getNumberOfNodeKinds) - propertyKind <- Range(0, g.schema.getNumberOfProperties) + nodeKind <- g.schema.nodeKinds + propertyKind <- g.schema.propertyKinds } { val nodeLabel = g.schema.getNodeLabel(nodeKind) val propertyLabel = g.schema.getPropertyLabel(nodeKind, propertyKind) diff --git a/core/src/main/scala/flatgraph/storage/Serialization.scala b/core/src/main/scala/flatgraph/storage/Serialization.scala index d5caaebf..0efc2ce1 100644 --- a/core/src/main/scala/flatgraph/storage/Serialization.scala +++ b/core/src/main/scala/flatgraph/storage/Serialization.scala @@ -43,7 +43,7 @@ object Serialization { val nodes = mutable.ArrayBuffer.empty[NodeItem] val edges = mutable.ArrayBuffer.empty[EdgeItem] val properties = mutable.ArrayBuffer.empty[PropertyItem] - for (nodeKind <- Range(0, g.schema.getNumberOfNodeKinds)) { + for (nodeKind <- g.schema.nodeKinds) { val nodeLabel = g.schema.getNodeLabel(nodeKind) val deletions = g .nodesArray(nodeKind) @@ -55,8 +55,8 @@ object Serialization { nodes.addOne(new Manifest.NodeItem(nodeLabel, size, deletions)) } for { - nodeKind <- Range(0, g.schema.getNumberOfNodeKinds) - edgeKind <- Range(0, g.schema.getNumberOfEdgeKinds) + nodeKind <- g.schema.nodeKinds + edgeKind <- g.schema.edgeKinds direction <- Direction.values } { val pos = g.schema.neighborOffsetArrayIndex(nodeKind, direction, edgeKind) @@ -72,8 +72,8 @@ object Serialization { } } for { - nodeKind <- Range(0, g.schema.getNumberOfNodeKinds) - propertyKind <- Range(0, g.schema.getNumberOfProperties) + nodeKind <- g.schema.nodeKinds + propertyKind <- g.schema.propertyKinds } { val pos = g.schema.propertyOffsetArrayIndex(nodeKind, propertyKind) if (g.properties(pos) != null) {