24
24
import java .util .LinkedList ;
25
25
import java .util .List ;
26
26
import java .util .NoSuchElementException ;
27
+ import java .util .Random ;
27
28
import java .util .Set ;
28
29
29
30
import javax .management .RuntimeErrorException ;
50
51
* A version of the LightDGraph which stores all data on disk. Ie. it's slower,
51
52
* but can store much bigger graphs.
52
53
*
54
+ *
55
+ * The db file can be stored and re-used later. Make sure to close the graph with
56
+ * close().
57
+ *
53
58
* @author Peter
54
59
*
55
60
* @param <L>
@@ -88,25 +93,33 @@ public DiskDGraph(File dir)
88
93
89
94
/**
90
95
*
91
- * @param dir
96
+ * @param dbFile The file containing the graph structure. If the file doesn't exist,
97
+ * it will be created. IF it does exist, the graph it contains will be loaded.
92
98
* @param nullLabels If true, all labels will be null (saving some space).
93
99
* Adding a node with a nonnull label will result in an exception.
94
100
*/
95
- public DiskDGraph (File dir , boolean nullLabels )
101
+ public DiskDGraph (File dbFile , boolean nullLabels )
96
102
{
97
103
this .nullLabels = nullLabels ;
98
104
99
- dir .mkdirs ();
100
- File dbFile = new File (dir , "graph." +id +".db" );
101
-
102
105
db = DBMaker .fileDB (dbFile ).make ();
103
106
104
107
labels = nullLabels ? null : db .indexTreeList ("labels" , Serializer .STRING ).createOrOpen ();
105
108
106
109
in = db .indexTreeList ("in" , new SerializerIntList ()).createOrOpen ();
107
110
out = db .indexTreeList ("out" , new SerializerIntList ()).createOrOpen ();
111
+
112
+ if (!nullLabels && labels .size () != in .size ())
113
+ throw new IllegalStateException ("labels list has size " + labels .size () + ", should be " + in .size () + "." );
114
+
115
+ if (db .exists ("numLinks" ))
116
+ numLinks = db .atomicInteger ("numLinks" ).createOrOpen ().get ();
117
+ else
118
+ for (List <Integer > list : in )
119
+ numLinks += list .size ();
108
120
}
109
121
122
+
110
123
@ Override
111
124
public int size ()
112
125
{
@@ -163,7 +176,7 @@ public void remove()
163
176
164
177
for (int i : series (in .size ()))
165
178
{
166
- List <Integer > neighbors = in .get (i );
179
+ List <Integer > neighbors = new ArrayList < Integer >( in .get (i ) );
167
180
168
181
Iterator <Integer > it = neighbors .iterator ();
169
182
while (it .hasNext ())
@@ -174,7 +187,7 @@ public void remove()
174
187
}
175
188
for (int i : series (out .size ()))
176
189
{
177
- List <Integer > neighbors = out .get (i );
190
+ List <Integer > neighbors = new ArrayList < Integer >( out .get (i ) );
178
191
179
192
Iterator <Integer > it = neighbors .iterator ();
180
193
while (it .hasNext ())
@@ -188,7 +201,7 @@ public void remove()
188
201
// is higher than the one we just removed.
189
202
for (int i : series (in .size ()))
190
203
{
191
- List <Integer > neighbors = in .get (i );
204
+ List <Integer > neighbors = new ArrayList < Integer >( in .get (i ) );
192
205
193
206
for (int j : series (neighbors .size ()))
194
207
{
@@ -201,7 +214,7 @@ public void remove()
201
214
}
202
215
for (int i : series (out .size ()))
203
216
{
204
- List <Integer > neighbors = out .get (i );
217
+ List <Integer > neighbors = new ArrayList < Integer >( out .get (i ) );
205
218
206
219
for (int j : series (neighbors .size ()))
207
220
{
@@ -390,21 +403,21 @@ public void disconnect(Node<String> other)
390
403
391
404
int links = 0 ;
392
405
393
- List <Integer > myOut = out .get (mine );
406
+ List <Integer > myOut = new ArrayList < Integer >( out .get (mine ) );
394
407
while (myOut .remove ((Integer )his ))
395
408
links ++;
396
409
out .set (mine , myOut );
397
410
398
- List <Integer > hisOut = out .get (his );
411
+ List <Integer > hisOut = new ArrayList < Integer >( out .get (his ) );
399
412
while (hisOut .remove ((Integer )mine ))
400
413
links ++;
401
414
out .set (his , hisOut );
402
415
403
- List <Integer > myIn = in .get (mine );
416
+ List <Integer > myIn = new ArrayList < Integer >( in .get (mine ) );
404
417
while (myIn .remove ((Integer )his ));
405
418
in .set (mine , myIn );
406
419
407
- List <Integer > hisIn = in .get (his );
420
+ List <Integer > hisIn = new ArrayList < Integer >( in .get (his ) );
408
421
while (hisIn .remove ((Integer )mine ));
409
422
in .set (his , hisIn );
410
423
@@ -651,8 +664,14 @@ public Graph<String> graph()
651
664
public void remove ()
652
665
{
653
666
check ();
654
- in .get (to .index ()).remove ((Integer )from .index ());
655
- out .get (from .index ()).remove ((Integer )to .index ());
667
+
668
+ List <Integer > list = new ArrayList <Integer >(in .get (to .index ()));
669
+ list .remove ((Integer )from .index ());
670
+ in .set (to .index (), list );
671
+
672
+ list = new ArrayList <Integer >(out .get (from .index ()));
673
+ list .remove ((Integer )to .index ());
674
+ out .set (from .index (), list );
656
675
657
676
modCount ++;
658
677
dead = true ;
@@ -1155,6 +1174,28 @@ public List<DNode<String>> neighborsFast(Node<String> node)
1155
1174
return new NodeList (indices );
1156
1175
}
1157
1176
1177
+ /**
1178
+ * Loads a previous converted graph.
1179
+ *
1180
+ * @param dbFile
1181
+ * @return
1182
+ * @throws IOException
1183
+ */
1184
+ public static DiskDGraph fromDB (File dbFile )
1185
+ throws IOException
1186
+ {
1187
+ DB db = DBMaker .fileDB (dbFile ).make ();
1188
+
1189
+ if (db .exists ("labels" ))
1190
+ {
1191
+ db .close ();
1192
+ return new DiskDGraph (dbFile , false );
1193
+ }
1194
+
1195
+ db .close ();
1196
+ return new DiskDGraph (dbFile , true );
1197
+ }
1198
+
1158
1199
/**
1159
1200
* Reads a (large) edgelist-encoded file into a DiskDGraph.
1160
1201
*
@@ -1165,16 +1206,23 @@ public List<DNode<String>> neighborsFast(Node<String> node)
1165
1206
public static DiskDGraph fromFile (File file , File dir )
1166
1207
throws IOException
1167
1208
{
1168
- DiskDGraph graph = new DiskDGraph (dir , true );
1209
+ int id = (new Random ()).nextInt (10000000 );
1210
+
1211
+ return fromFile (file , dir , new File ("graph." +id +".db" ));
1212
+ }
1213
+ public static DiskDGraph fromFile (File file , File tmpDir , File dbFile )
1214
+ throws IOException
1215
+ {
1216
+ DiskDGraph graph = new DiskDGraph (dbFile , true );
1169
1217
1170
1218
// * sort the input file by first element
1171
- File forward = new File (dir , "forward.edgelist" );
1219
+ File forward = new File (tmpDir , "forward.edgelist" );
1172
1220
1173
1221
1174
1222
List <File > files = ExternalSort .sortInBatch (
1175
1223
file ,
1176
1224
new LComp (true ), ExternalSort .DEFAULTMAXTEMPFILES ,
1177
- Charset .defaultCharset (), dir , false );
1225
+ Charset .defaultCharset (), tmpDir , false );
1178
1226
ExternalSort .mergeSortedFiles (files , forward , new LComp (true ), Charset .defaultCharset ());
1179
1227
1180
1228
System .out .println ("Forward sort finished" );
@@ -1184,12 +1232,12 @@ public static DiskDGraph fromFile(File file, File dir)
1184
1232
System .out .println ("Forward list read" );
1185
1233
1186
1234
forward .delete ();
1187
- File backward = new File (dir , "backward.edgelist" );
1235
+ File backward = new File (tmpDir , "backward.edgelist" );
1188
1236
1189
1237
files = ExternalSort .sortInBatch (
1190
1238
file ,
1191
1239
new LComp (false ), ExternalSort .DEFAULTMAXTEMPFILES ,
1192
- Charset .defaultCharset (), dir , false );
1240
+ Charset .defaultCharset (), tmpDir , false );
1193
1241
ExternalSort .mergeSortedFiles (files , backward , new LComp (false ), Charset .defaultCharset ());
1194
1242
1195
1243
System .out .println ("Backward sort finished" );
@@ -1216,6 +1264,12 @@ public static DiskDGraph fromFile(File file, File dir)
1216
1264
return graph ;
1217
1265
}
1218
1266
1267
+ public void close ()
1268
+ {
1269
+ db .atomicInteger ("numLinks" ).createOrOpen ().set (numLinks );
1270
+ db .close ();
1271
+ }
1272
+
1219
1273
private static long readSorted (List <List <Integer >> list , File file , boolean forward )
1220
1274
throws IOException
1221
1275
{
0 commit comments