From bd42174dab7a2afb0049d7e8cc79b03938a2ddab Mon Sep 17 00:00:00 2001 From: Peter Bloem Date: Wed, 24 Aug 2016 17:39:33 +0200 Subject: [PATCH] Made DiskDGraph reloadable. --- nodes/src/main/java/org/nodes/DiskDGraph.java | 94 +++++++++++++++---- .../test/java/org/nodes/DiskDGraphTest.java | 86 ++++++++++++----- 2 files changed, 136 insertions(+), 44 deletions(-) diff --git a/nodes/src/main/java/org/nodes/DiskDGraph.java b/nodes/src/main/java/org/nodes/DiskDGraph.java index 0bc3b69..3fb6c3c 100644 --- a/nodes/src/main/java/org/nodes/DiskDGraph.java +++ b/nodes/src/main/java/org/nodes/DiskDGraph.java @@ -24,6 +24,7 @@ import java.util.LinkedList; import java.util.List; import java.util.NoSuchElementException; +import java.util.Random; import java.util.Set; import javax.management.RuntimeErrorException; @@ -50,6 +51,10 @@ * A version of the LightDGraph which stores all data on disk. Ie. it's slower, * but can store much bigger graphs. * + * + * The db file can be stored and re-used later. Make sure to close the graph with + * close(). + * * @author Peter * * @param @@ -88,25 +93,33 @@ public DiskDGraph(File dir) /** * - * @param dir + * @param dbFile The file containing the graph structure. If the file doesn't exist, + * it will be created. IF it does exist, the graph it contains will be loaded. * @param nullLabels If true, all labels will be null (saving some space). * Adding a node with a nonnull label will result in an exception. */ - public DiskDGraph(File dir, boolean nullLabels) + public DiskDGraph(File dbFile, boolean nullLabels) { this.nullLabels = nullLabels; - dir.mkdirs(); - File dbFile = new File(dir, "graph."+id+".db"); - db = DBMaker.fileDB(dbFile).make(); labels = nullLabels ? null : db.indexTreeList("labels", Serializer.STRING).createOrOpen(); in = db.indexTreeList("in", new SerializerIntList()).createOrOpen(); out = db.indexTreeList("out", new SerializerIntList()).createOrOpen(); + + if(!nullLabels && labels.size() != in.size()) + throw new IllegalStateException("labels list has size "+ labels.size() + ", should be " + in.size() + "."); + + if(db.exists("numLinks")) + numLinks = db.atomicInteger("numLinks").createOrOpen().get(); + else + for(List list : in) + numLinks += list.size(); } + @Override public int size() { @@ -163,7 +176,7 @@ public void remove() for(int i : series(in.size())) { - List neighbors = in.get(i); + List neighbors = new ArrayList(in.get(i)); Iterator it = neighbors.iterator(); while(it.hasNext()) @@ -174,7 +187,7 @@ public void remove() } for(int i : series(out.size())) { - List neighbors = out.get(i); + List neighbors = new ArrayList(out.get(i)); Iterator it = neighbors.iterator(); while(it.hasNext()) @@ -188,7 +201,7 @@ public void remove() // is higher than the one we just removed. for(int i : series(in.size())) { - List neighbors = in.get(i); + List neighbors = new ArrayList(in.get(i)); for(int j : series(neighbors.size())) { @@ -201,7 +214,7 @@ public void remove() } for(int i : series(out.size())) { - List neighbors = out.get(i); + List neighbors = new ArrayList(out.get(i)); for(int j : series(neighbors.size())) { @@ -390,21 +403,21 @@ public void disconnect(Node other) int links = 0; - List myOut = out.get(mine); + List myOut = new ArrayList(out.get(mine)); while(myOut.remove((Integer)his)) links++; out.set(mine, myOut); - List hisOut = out.get(his); + List hisOut = new ArrayList(out.get(his)); while(hisOut.remove((Integer)mine)) links++; out.set(his, hisOut); - List myIn = in.get(mine); + List myIn = new ArrayList(in.get(mine)); while(myIn.remove((Integer)his)); in.set(mine, myIn); - List hisIn = in.get(his); + List hisIn = new ArrayList(in.get(his)); while(hisIn.remove((Integer)mine)); in.set(his, hisIn); @@ -651,8 +664,14 @@ public Graph graph() public void remove() { check(); - in.get(to.index()).remove((Integer)from.index()); - out.get(from.index()).remove((Integer)to.index()); + + List list = new ArrayList(in.get(to.index())); + list.remove((Integer)from.index()); + in.set(to.index(), list); + + list = new ArrayList(out.get(from.index())); + list.remove((Integer)to.index()); + out.set(from.index(), list); modCount++; dead = true; @@ -1155,6 +1174,28 @@ public List> neighborsFast(Node node) return new NodeList(indices); } + /** + * Loads a previous converted graph. + * + * @param dbFile + * @return + * @throws IOException + */ + public static DiskDGraph fromDB(File dbFile) + throws IOException + { + DB db = DBMaker.fileDB(dbFile).make(); + + if(db.exists("labels")) + { + db.close(); + return new DiskDGraph(dbFile, false); + } + + db.close(); + return new DiskDGraph(dbFile, true); + } + /** * Reads a (large) edgelist-encoded file into a DiskDGraph. * @@ -1165,16 +1206,23 @@ public List> neighborsFast(Node node) public static DiskDGraph fromFile(File file, File dir) throws IOException { - DiskDGraph graph = new DiskDGraph(dir, true); + int id = (new Random()).nextInt(10000000); + + return fromFile(file, dir, new File("graph."+id+".db")); + } + public static DiskDGraph fromFile(File file, File tmpDir, File dbFile) + throws IOException + { + DiskDGraph graph = new DiskDGraph(dbFile, true); // * sort the input file by first element - File forward = new File(dir, "forward.edgelist"); + File forward = new File(tmpDir, "forward.edgelist"); List files = ExternalSort.sortInBatch( file, new LComp(true), ExternalSort.DEFAULTMAXTEMPFILES, - Charset.defaultCharset(), dir, false); + Charset.defaultCharset(), tmpDir, false); ExternalSort.mergeSortedFiles(files, forward, new LComp(true), Charset.defaultCharset()); System.out.println("Forward sort finished"); @@ -1184,12 +1232,12 @@ public static DiskDGraph fromFile(File file, File dir) System.out.println("Forward list read"); forward.delete(); - File backward = new File(dir, "backward.edgelist"); + File backward = new File(tmpDir, "backward.edgelist"); files = ExternalSort.sortInBatch( file, new LComp(false), ExternalSort.DEFAULTMAXTEMPFILES, - Charset.defaultCharset(), dir, false); + Charset.defaultCharset(), tmpDir, false); ExternalSort.mergeSortedFiles(files, backward, new LComp(false), Charset.defaultCharset()); System.out.println("Backward sort finished"); @@ -1216,6 +1264,12 @@ public static DiskDGraph fromFile(File file, File dir) return graph; } + public void close() + { + db.atomicInteger("numLinks").createOrOpen().set(numLinks); + db.close(); + } + private static long readSorted(List> list, File file, boolean forward) throws IOException { diff --git a/nodes/src/test/java/org/nodes/DiskDGraphTest.java b/nodes/src/test/java/org/nodes/DiskDGraphTest.java index 9bae3fb..14dbe30 100644 --- a/nodes/src/test/java/org/nodes/DiskDGraphTest.java +++ b/nodes/src/test/java/org/nodes/DiskDGraphTest.java @@ -12,6 +12,7 @@ import java.util.Collections; import java.util.HashSet; import java.util.List; +import java.util.Random; import java.util.Set; import org.junit.After; @@ -29,19 +30,25 @@ public class DiskDGraphTest { public static File DIR = new File("./tmp/"); + public static File r() + { + DIR.mkdirs(); + int id = (new Random()).nextInt(1000000000); + return new File(DIR, "graph" + id + ".db"); + } @Test public void testDiskDGraph() { Global.randomSeed(); - DGraph graph = new DiskDGraph(DIR); + DGraph graph = new DiskDGraph(r()); } @Test public void testToString() { Global.randomSeed(); - DGraph graph = new DiskDGraph(DIR); + DGraph graph = new DiskDGraph(r()); DNode a = graph.add("a"), b = graph.add("b"); @@ -56,7 +63,7 @@ public void testToString() public void starTest() { Global.randomSeed(); - DGraph graph = new DiskDGraph(DIR); + DGraph graph = new DiskDGraph(r()); DNode a = graph.add("a"), b = graph.add("b"), @@ -87,7 +94,7 @@ public void starTest() public void testRemove() { Global.randomSeed(); - DGraph graph = new DiskDGraph(DIR); + DGraph graph = new DiskDGraph(r()); DNode a = graph.add(null), b = graph.add(null), @@ -113,7 +120,7 @@ public void testRemove() public void testRemove2() { Global.randomSeed(); - DGraph graph = new DiskDGraph(DIR); + DGraph graph = new DiskDGraph(r()); DNode a = graph.add(null), b = graph.add(null), @@ -144,7 +151,7 @@ public void testRemove2() public void testConnected() { Global.randomSeed(); - DGraph graph = new DiskDGraph(DIR); + DGraph graph = new DiskDGraph(r()); DNode a = graph.add(null), b = graph.add(null), @@ -179,7 +186,7 @@ public void testConnected() public void testLinks() { Global.randomSeed(); - DGraph graph = new DiskDGraph(DIR); + DGraph graph = new DiskDGraph(r()); DNode a = graph.add(null), b = graph.add(null), @@ -203,7 +210,7 @@ public void testLinks() public void testLinks2() { Global.randomSeed(); - DGraph graph = new DiskDGraph(DIR); + DGraph graph = new DiskDGraph(r()); DNode a = graph.add(null), b = graph.add(null), @@ -234,7 +241,7 @@ public void testEquals() { Global.randomSeed(); - DGraph g1 = new DiskDGraph(DIR); + DGraph g1 = new DiskDGraph(r()); g1.add("a"); g1.add("b"); g1.add("c"); @@ -242,7 +249,7 @@ public void testEquals() g1.node("a").connect(g1.node("b")); g1.node("b").connect(g1.node("c")); - DGraph g2 = new DiskDGraph(DIR); + DGraph g2 = new DiskDGraph(r()); g2.add("a"); g2.add("b"); g2.add("c"); @@ -261,7 +268,7 @@ public void testEquals() public void testNotEquals() { Global.randomSeed(); - DGraph g1 = new DiskDGraph(DIR); + DGraph g1 = new DiskDGraph(r()); DTGraph g2 = new MapDTGraph(); @@ -269,20 +276,51 @@ public void testNotEquals() assertFalse(g2.equals(g1)); } - @Test public void testImportBig() throws IOException { Global.randomSeed(); - DGraph graph = DiskDGraph.fromFile(new File("/Users/Peter/Documents/datasets/graphs/wikipedia-nl/wikipedia-nl-simple.txt"), DIR); + DGraph graph = DiskDGraph.fromFile(new File("/Users/Peter/Documents/datasets/graphs/p2p/p2p.txt"), DIR); System.out.println(graph.size()); System.out.println(graph.numLinks()); } + @Test + public void testImportDB() + throws IOException + { + Global.randomSeed(); + + FileIO.copy("graphs/p2p/p2p.txt", DIR); + File dbFile = r(); + + DiskDGraph diskGraph = DiskDGraph.fromFile(new File(DIR, "p2p.txt"), DIR, dbFile); + DGraph copy = LightDGraph.copy(diskGraph); + diskGraph.close(); + + diskGraph = DiskDGraph.fromDB(dbFile); + +// for(int i : series(copy.size())) +// { +// String a = diskGraph.get(i).out() + " " + diskGraph.get(i).in(); +// +// String b = copy.get(i).out() + " " + copy.get(i).in(); +// +// if(a.equals(b)) +// { +// System.out.println("d " + a); +// System.out.println("m " + b); +// } +// } + + assertEquals(copy, diskGraph); + + } + /** - * Run with low heap space... + * * * @throws IOException */ @@ -341,7 +379,7 @@ public void testCopy() assertEquals(graph.numLinks(), numLinks); } - graph = DiskDGraph.copy(graph, DIR); + graph = DiskDGraph.copy(graph, r()); { int numLinks = 0; @@ -367,7 +405,7 @@ public void testCopy2() assertEquals(graph.numLinks(), numLinks); } - graph = DiskDGraph.copy(graph, DIR); + graph = DiskDGraph.copy(graph, r()); { int numLinks = 0; @@ -383,7 +421,7 @@ public void testCopy2() public void testNumLinks2() { Global.randomSeed(); - DGraph graph = new DiskDGraph(DIR); + DGraph graph = new DiskDGraph(r()); DNode a = graph.add("a"); DNode b = graph.add("b"); @@ -414,7 +452,7 @@ public void testIndices2() // Note that light graphs have non-persistent nodes, so node.index() // doesn't update after removal - DiskDGraph graph = DiskDGraph.copy(in, DIR); + DiskDGraph graph = DiskDGraph.copy(in, r()); System.out.println("."); Node node = graph.get(145); assertEquals(145, node.index()); @@ -463,7 +501,7 @@ public void testNodeLinks() { Global.randomSeed(); DGraph graph = Examples.physicians(); - graph = DiskDGraph.copy(graph, DIR); + graph = DiskDGraph.copy(graph, r()); for(Node node : graph.nodes()) { @@ -478,7 +516,7 @@ public void testNodeLinks() public void testNodeLinks2() { Global.randomSeed(); - DGraph graph = new DiskDGraph(DIR); + DGraph graph = new DiskDGraph(r()); DNode a = graph.add(""); DNode b = graph.add(""); @@ -525,7 +563,7 @@ public void testNodeLinks2() public void testNeighbors() { Global.randomSeed(); - DGraph graph = new DiskDGraph(DIR); + DGraph graph = new DiskDGraph(r()); DNode a = graph.add("a"); DNode b = graph.add("b"); @@ -555,7 +593,7 @@ public void testNeighborsFast() { Global.randomSeed(); DGraph graph = Examples.physicians(); - graph = DiskDGraph.copy(graph, DIR); + graph = DiskDGraph.copy(graph, r()); assertTrue(graph instanceof FastWalkable); @@ -591,7 +629,7 @@ public void testNeighborsFast() public void testJBC() { DGraph graph = Graphs.jbcDirected(); - graph = DiskDGraph.copy(graph, DIR); + graph = DiskDGraph.copy(graph, r()); List nodes = Arrays.asList(13, 15, 16); @@ -602,7 +640,7 @@ public void testJBC() assertEquals(2, subgraph.numLinks()); } - // @After + @After public void cleanup() { for(File file : DIR.listFiles())