Skip to content

Commit 2e13d9d

Browse files
committed
Added Hooks for users to execute code on checkpoint and/or restore.
1 parent dbd9963 commit 2e13d9d

6 files changed

+205
-29
lines changed

CheckpointRestore.cpp

+3-4
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ JNIEXPORT void JNICALL Java_CheckpointRestore_SaveTheWorldNative (JNIEnv * env,
6666
criu_set_ext_unix_sk(true);
6767
int ret = criu_dump();
6868

69-
if (ret == 0) {
69+
if (ret >= 0) {
7070
printf("Successful dump\n");
7171
} else {
7272
printf("Error from dump %d\n", ret);
@@ -84,7 +84,6 @@ JNIEXPORT void JNICALL Java_CheckpointRestore_SaveTheWorldNative (JNIEnv * env,
8484
JNIEXPORT void JNICALL Java_CheckpointRestore_RestoreTheWorldNative
8585
(JNIEnv * env, jobject jobj, jstring jstr) {
8686
const char * path = env->GetStringUTFChars(jstr, NULL);
87-
printf("\nRestore The World Path = %s\n",path);
8887
int fd = open(path, O_DIRECTORY);
8988

9089
if (fd < 0) {
@@ -101,11 +100,11 @@ JNIEXPORT void JNICALL Java_CheckpointRestore_RestoreTheWorldNative
101100

102101
criu_set_shell_job(true);
103102
criu_set_images_dir_fd(fd);
104-
criu_set_log_file((char *) "chfrestore.log");
103+
criu_set_log_file((char *) "javarestore.log");
105104
criu_set_log_level(4);
106105

107106
int pid = criu_restore_child();
108-
printf("\npid = %d\n", pid);
107+
109108
if (pid < 0) {
110109
perror("Criu Restore Bad Pid \n");
111110
} else {

CheckpointRestore.java

+104-17
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,22 @@
1+
import java.util.ArrayList;
2+
import java.util.List;
13
import java.util.Map;
4+
import java.io.File;
5+
import java.io.FileInputStream;
6+
import java.io.FileNotFoundException;
7+
import java.io.FileOutputStream;
8+
import java.io.IOException;
9+
import java.io.EOFException;
10+
import java.io.ObjectInputStream;
11+
import java.io.ObjectOutputStream;
12+
213

314
public class CheckpointRestore {
15+
public static CheckpointRestore CRContext;
16+
17+
private static List<Hook> CheckpointHooks;
18+
private static List<Hook> RestoreHooks;
19+
420

521
public static void CleanupTheWorld() {
622
System.gc();
@@ -15,31 +31,102 @@ public static void CleanupTheWorld() {
1531
public static native void SaveTheWorldIncremental();
1632

1733
public static void CheckTheWorld() {
18-
CheckpointRestore cr = new CheckpointRestore();
19-
cr.CheckTheWorldNative();
34+
CRContext.CheckTheWorldNative();
2035
}
2136

2237
public static void SaveTheWorld(String dir) {
23-
CheckpointRestore cr = new CheckpointRestore();
24-
cr.SaveTheWorldNative(dir);
25-
}
38+
for (Hook h : CheckpointHooks) {
39+
h.run();
40+
}
41+
42+
WriteRestoreHooks(dir);
43+
CRContext.SaveTheWorldNative(dir);
44+
}
45+
46+
public static void WriteRestoreHooks(String dir) {
47+
try {
48+
File outputDir = new File(dir);
49+
outputDir.mkdir();
50+
File file = new File(dir, "/JavaRestoreHooks.txt");
51+
FileOutputStream f = new FileOutputStream(file);
52+
ObjectOutputStream o = new ObjectOutputStream(f);
53+
54+
for (Hook h : RestoreHooks) {
55+
o.writeObject(h);
56+
}
57+
58+
o.close();
59+
f.close();
60+
} catch (FileNotFoundException e) {
61+
System.out.println("File not found");
62+
} catch (IOException e) {
63+
e.printStackTrace();
64+
System.out.println("Error initializing stream: " + e.getMessage());
65+
}
66+
}
67+
68+
public static void ReadRestoreHooks(String dir) {
69+
try {
70+
FileInputStream f = new FileInputStream(new File(dir + "/JavaRestoreHooks.txt"));
71+
ObjectInputStream o = new ObjectInputStream(f);
72+
73+
while (true) {
74+
Hook h = (Hook) o.readObject();
75+
RestoreHooks.add(h);
76+
}
77+
78+
} catch (FileNotFoundException e) {
79+
System.out.println("File not found");
80+
} catch (EOFException e) {
81+
// This always happens.
82+
// It's ugly, exceptions should be exceptional.
83+
// Is there a better way?
84+
} catch (IOException e) {
85+
System.out.println("Error initializing stream:" + e.getMessage());
86+
e.printStackTrace();
87+
} catch (ClassNotFoundException e) {
88+
// TODO Auto-generated catch block
89+
e.printStackTrace();
90+
}
91+
}
92+
2693

2794
public static void RestoreTheWorld(String dir) {
28-
System.out.println("RestoreTheWorld java call");
29-
CheckpointRestore cr = new CheckpointRestore();
30-
cr.RestoreTheWorldNative(dir);
95+
CRContext.RestoreTheWorldNative(dir);
96+
ReadRestoreHooks(dir);
97+
for (Hook h : RestoreHooks) {
98+
h.run();
99+
}
31100
}
32-
101+
102+
public static void RegisterCheckpointHook(Hook h) {
103+
CRContext.CheckpointHooks.add(h);
104+
}
105+
106+
public static void RegisterRestoreHook(Hook h) {
107+
CRContext.RestoreHooks.add(h);
108+
}
109+
110+
public static void DebugPrint(String s) {
111+
if (false) {
112+
System.out.println(s);
113+
}
114+
}
115+
33116
static {
117+
118+
DebugPrint("Library path = " + System.getProperty("java.library.path"));
119+
DebugPrint("About to load Checkpoint Restore library " + System.mapLibraryName("CheckpointRestore"));
120+
DebugPrint("About to load criu library " + System.mapLibraryName("criu"));
34121

35-
System.out.println("Library path = " + System.getProperty("java.library.path"));
36-
System.out.println("About to load Checkpoint Restore library " + System.mapLibraryName("CheckpointRestore"));
37-
System.out.println("About to load criu library " + System.mapLibraryName("criu"));
122+
DebugPrint("Before call to load CheckpointRestore");
123+
System.loadLibrary("CheckpointRestore");
124+
DebugPrint("After call to load CheckpointRestore");
125+
System.loadLibrary("criu");
126+
DebugPrint("After call to load criu");
38127

39-
System.out.println("Before call to load CheckpointRestore");
40-
System.loadLibrary("CheckpointRestore");
41-
System.out.println("After call to load CheckpointRestore");
42-
System.loadLibrary("criu");
43-
System.out.println("After call to load criu");
128+
CheckpointHooks = new ArrayList<Hook>();
129+
RestoreHooks = new ArrayList<Hook>();
130+
CRContext = new CheckpointRestore();
44131
}
45132
}

Hook.java

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
import java.io.Serializable;
2+
3+
public class Hook implements Serializable {
4+
public void run() {
5+
}
6+
}

ReadMe

+3-4
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,7 @@ I'm working on addressing this problem.
3030
You can now checkpoint java code:
3131
sudo setsid java -XX:+UseSerialGC -XX:-UsePerfData -Djava.library.path=$LOAD_LIBRARY_PATH TestRandom 1000000 1000
3232

33-
and restore:
33+
This will print out the restore instructions which you can run as root:
34+
35+
sudo java -XX:+UseSerialGC -XX:-UsePerfData -Djava.library.path=$LOAD_LIBRARY_PATH TestRandomRestore <saved world directory>
3436

35-
sudo bash
36-
cd /home/chf/SavedWorlds/run<x>
37-
criu restore --shell-job

TODO

+59
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
2+
3+
4+
Tasks:
5+
6+
Demo programs:
7+
Elastic Search
8+
Serverless
9+
10+
11+
Features:
12+
13+
Java API:
14+
Check() => Verify that Checkpointing is enabled in your kernel and working properly.
15+
Check to see if your current JVM has any issues (open sockets etc).
16+
17+
Save() => Runs Hooks, Runs CRIU dump.
18+
19+
SaveIncremental() => Saves just the changes since the last Save()/SaveIncremental().
20+
21+
Restore() => Restores a saved Image using CRIU restore.
22+
23+
AddCheckpointHook(Hook h) => run h method at checkpoint.
24+
25+
AddRestoreHook(Hook h) => run h method at restore.
26+
27+
28+
Tasks:
29+
30+
Figure out if Hooks are interfaces or Lambdas.
31+
32+
Implement System Hooks:
33+
34+
GC/shrink the heap and then reinflating it.
35+
Shutdown workstealing thread pools and re-establish with an appropriate number of threads based on number of available processors.
36+
37+
Implement User example hooks:
38+
Clear certificates and then re-establish them
39+
Clearing connections and then restoring them.
40+
41+
Hot Swapping Garbage Collection Algorithms just before checkpointing. Warmup and rampup with ParallelGC, checkpoint/restore with epsilon. Give memory for card tables etc back before checkpoint.
42+
43+
Optimize heap layout. Use hardware monitors to determine false cache line sharing and pad data structures before checkpointing.
44+
45+
46+
47+
Bugs/Issues:
48+
49+
What to do about missing library entries.
50+
51+
How to accomodate perf
52+
53+
How to accomodate parallelGC
54+
55+
What to do about /var/lib/sss/pipes/nss
56+
57+
58+
59+

TestRandom.java

+30-4
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,27 @@
1+
2+
class BeforeHook extends Hook {
3+
String str;
4+
BeforeHook(String s) {
5+
str = s;
6+
}
7+
8+
public void run() {
9+
System.out.println("Hook:Before Checkpoint: " + str);
10+
}
11+
}
12+
13+
class AfterHook extends Hook {
14+
String str;
15+
AfterHook(String s) {
16+
str = s;
17+
}
18+
19+
public void run() {
20+
System.out.println("Hook:After Restore: " + str);
21+
}
22+
}
23+
24+
125
public class TestRandom {
226

327
public static void main(String[] args) {
@@ -11,11 +35,13 @@ public static void main(String[] args) {
1135
int v = (int) Math.floor(r);
1236
testarray[v] += 1;
1337
}
14-
System.out.println("Before any goodness");
15-
CheckpointRestore.CheckTheWorld();
38+
// CheckpointRestore.CheckTheWorld();
1639
long end = System.currentTimeMillis();
17-
System.out.println("Saving the world in directory /home/chf/SavedWorlds/run" + end);
18-
CheckpointRestore.SaveTheWorld("/home/chf/SavedWorlds/run" + end);
40+
String dir = "/home/chf/SavedWorlds/run" + end;
41+
42+
CheckpointRestore.RegisterCheckpointHook(new BeforeHook("Restore Command: As root java -XX:+UseSerialGC -XX:-UsePerfData -Djava.library.path=$LOAD_LIBRARY_PATH TestRandomRestore " + dir ));
43+
CheckpointRestore.RegisterRestoreHook(new AfterHook("That's all folks"));
44+
CheckpointRestore.SaveTheWorld(dir);
1945

2046
long max = 0; long min = upperBound; long average = 0;
2147
for (int i = 0; i < upperBound; i++) {

0 commit comments

Comments
 (0)