Skip to content

Commit 60ed739

Browse files
authored
Remove hypervisor_handler thread (#533)
* Remove Hypervisor-Handler thread, and timeout-based config Signed-off-by: Ludvig Liljenberg <[email protected]> * Implement InterruptHandle API Signed-off-by: Ludvig Liljenberg <[email protected]> * Fix tests and examples that relied on timing out Signed-off-by: Ludvig Liljenberg <[email protected]> * Make interrupt retry delay configurable Signed-off-by: Ludvig Liljenberg <[email protected]> * Allow configuring the signal number of the signal used to interrupt a sandbox Signed-off-by: Ludvig Liljenberg <[email protected]> * Prevent ABA-problem, where the vcpu could be successfully interrupted, but a new function call could be scheduled, before the interruptor-thread has time to observe the fact that the vcpu was interrupted Signed-off-by: Ludvig Liljenberg <[email protected]> --------- Signed-off-by: Ludvig Liljenberg <[email protected]>
1 parent c454b0b commit 60ed739

File tree

27 files changed

+1333
-2215
lines changed

27 files changed

+1333
-2215
lines changed

.github/copilot-instructions.md

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,6 @@ Make sure that you keep commits small and focused. Each commit should represent
6363
Make sure that you arrange your commits in a logical order. You can use `git rebase -i` to do this.
6464
If you update your PR branch with new commits, make sure to rebase your branch on top of the main branch. This will help keep the commit history clean and make it easier to review your changes.
6565
Make sure that you do not have any merge commits in your PR.
66-
If when running tests you receive an Error like this "Error("No Hypervisor was found for Sandbox, Hypervisor Handler Message Receive Timedout")" then the real error is the string inside the brackets before the comma, so in this example
67-
the error is "No Hypervisor was found for Sandbox".
6866
If you see the error "No Hypervisor was found for Sandbox" then you if you are running on Linux you should check to see if there is a device called "kvm" or "mshv" in `/dev` and if it is present you should also check to see if you have rw access to it. You should ensure that you output the results of these checks for diagnostic purposes.
6967
This repository requires commits to be signed you should ensure that any commits that you create are done with `commit.gpgsign=true`. Also commits should also be signed with the `--signoff` option to comply with the DCO requirement.
7068

Justfile

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -83,14 +83,12 @@ test-unit target=default-target features="":
8383
test-isolated target=default-target features="":
8484
cargo test {{ if features =="" {''} else if features=="no-default-features" {"--no-default-features" } else {"--no-default-features -F " + features } }} --profile={{ if target == "debug" { "dev" } else { target } }} -p hyperlight-host --lib -- sandbox::uninitialized::tests::test_trace_trace --exact --ignored
8585
cargo test {{ if features =="" {''} else if features=="no-default-features" {"--no-default-features" } else {"--no-default-features -F " + features } }} --profile={{ if target == "debug" { "dev" } else { target } }} -p hyperlight-host --lib -- sandbox::uninitialized::tests::test_log_trace --exact --ignored
86-
cargo test {{ if features =="" {''} else if features=="no-default-features" {"--no-default-features" } else {"--no-default-features -F " + features } }} --profile={{ if target == "debug" { "dev" } else { target } }} -p hyperlight-host --lib -- hypervisor::hypervisor_handler::tests::create_1000_sandboxes --exact --ignored
86+
cargo test {{ if features =="" {''} else if features=="no-default-features" {"--no-default-features" } else {"--no-default-features -F " + features } }} --profile={{ if target == "debug" { "dev" } else { target } }} -p hyperlight-host --lib -- sandbox::initialized_multi_use::tests::create_1000_sandboxes --exact --ignored
8787
cargo test {{ if features =="" {''} else if features=="no-default-features" {"--no-default-features" } else {"--no-default-features -F " + features } }} --profile={{ if target == "debug" { "dev" } else { target } }} -p hyperlight-host --lib -- sandbox::outb::tests::test_log_outb_log --exact --ignored
8888
cargo test {{ if features =="" {''} else if features=="no-default-features" {"--no-default-features" } else {"--no-default-features -F " + features } }} --profile={{ if target == "debug" { "dev" } else { target } }} -p hyperlight-host --lib -- mem::shared_mem::tests::test_drop --exact --ignored
8989
cargo test {{ if features =="" {''} else if features=="no-default-features" {"--no-default-features" } else {"--no-default-features -F " + features } }} --profile={{ if target == "debug" { "dev" } else { target } }} -p hyperlight-host --test integration_test -- log_message --exact --ignored
9090
@# metrics tests
91-
cargo test {{ if features =="" {''} else if features=="no-default-features" {"--no-default-features" } else {"--no-default-features -F " + features } }} --profile={{ if target == "debug" { "dev" } else { target } }} -p hyperlight-host --lib -- metrics::tests::test_metrics_are_emitted --exact --ignored
92-
cargo test {{ if features =="" {''} else if features=="no-default-features" {"--no-default-features" } else {"--no-default-features -F function_call_metrics," + features } }} --profile={{ if target == "debug" { "dev" } else { target } }} -p hyperlight-host --lib -- metrics::tests::test_metrics_are_emitted --exact --ignored
93-
91+
cargo test {{ if features =="" {''} else if features=="no-default-features" {"--no-default-features" } else {"--no-default-features -F function_call_metrics," + features } }} --profile={{ if target == "debug" { "dev" } else { target } }} -p hyperlight-host --lib -- metrics::tests::test_metrics_are_emitted --exact
9492
# runs integration tests. Guest can either be "rust" or "c"
9593
test-integration guest target=default-target features="":
9694
@# run execute_on_heap test with feature "executable_heap" on and off

docs/how-to-debug-a-hyperlight-guest.md

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -142,15 +142,14 @@ To replicate the above behavior using VSCode follow the below steps:
142142
## How it works
143143

144144
The gdb feature is designed to work like a Request - Response protocol between
145-
a thread that accepts commands from a gdb client and the hypervisor handler over
146-
a communication channel.
145+
a thread that accepts commands from a gdb client and main thread of the sandbox.
147146

148147
All the functionality is implemented on the hypervisor side so it has access to
149148
the shared memory and the vCPU.
150149

151150
The gdb thread uses the `gdbstub` crate to handle the communication with the gdb client.
152151
When the gdb client requests one of the supported features mentioned above, a request
153-
is sent over the communication channel to the hypervisor handler for the sandbox
152+
is sent over the communication channel to the main thread for the sandbox
154153
to resolve.
155154

156155
Below is a sequence diagram that shows the interaction between the entities
@@ -161,7 +160,7 @@ involved in the gdb debugging of a Hyperlight guest running inside a **KVM** or
161160
│ Hyperlight Sandbox │
162161
USER │ │
163162
┌────────────┐ │ ┌──────────────┐ ┌───────────────────────────┐ ┌────────┐ │
164-
│ gdb client │ │ │ gdb thread │ │ hypervisor handler thread │ │ vCPU │ │
163+
│ gdb client │ │ │ gdb thread │ │ main sandbox thread │ │ vCPU │ │
165164
└────────────┘ │ └──────────────┘ └───────────────────────────┘ └────────┘ │
166165
| │ | create_gdb_thread | | │
167166
| │ |◄─────────────────────────────────────────┌─┐ vcpu stopped ┌─┐ │

src/hyperlight_host/benches/benchmarks.rs

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,6 @@ See the License for the specific language governing permissions and
1414
limitations under the License.
1515
*/
1616

17-
use std::time::Duration;
18-
1917
use criterion::{criterion_group, criterion_main, Criterion};
2018
use hyperlight_host::sandbox::{MultiUseSandbox, SandboxConfiguration, UninitializedSandbox};
2119
use hyperlight_host::sandbox_state::sandbox::EvolvableSandbox;
@@ -68,7 +66,6 @@ fn guest_call_benchmark(c: &mut Criterion) {
6866
let mut config = SandboxConfiguration::default();
6967
config.set_input_data_size(2 * SIZE + (1024 * 1024)); // 2 * SIZE + 1 MB, to allow 1MB for the rest of the serialized function call
7068
config.set_heap_size(SIZE as u64 * 15);
71-
config.set_max_execution_time(Duration::from_secs(10));
7269

7370
let sandbox = UninitializedSandbox::new(
7471
GuestBinary::FilePath(simple_guest_as_string().unwrap()),

src/hyperlight_host/examples/logging/main.rs

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ limitations under the License.
1616
#![allow(clippy::disallowed_macros)]
1717
extern crate hyperlight_host;
1818

19+
use std::sync::{Arc, Barrier};
20+
1921
use hyperlight_host::sandbox::uninitialized::UninitializedSandbox;
2022
use hyperlight_host::sandbox_state::sandbox::EvolvableSandbox;
2123
use hyperlight_host::sandbox_state::transition::Noop;
@@ -82,15 +84,29 @@ fn main() -> Result<()> {
8284
let no_op = Noop::<UninitializedSandbox, MultiUseSandbox>::default();
8385

8486
let mut multiuse_sandbox = usandbox.evolve(no_op)?;
87+
let interrupt_handle = multiuse_sandbox.interrupt_handle();
88+
let barrier = Arc::new(Barrier::new(2));
89+
let barrier2 = barrier.clone();
90+
const NUM_CALLS: i32 = 5;
91+
let thread = std::thread::spawn(move || {
92+
for _ in 0..NUM_CALLS {
93+
barrier2.wait();
94+
// Sleep for a short time to allow the guest function to run.
95+
std::thread::sleep(std::time::Duration::from_millis(500));
96+
// Cancel the host function call.
97+
interrupt_handle.kill();
98+
}
99+
});
85100

86101
// Call a function that gets cancelled by the host function 5 times to generate some log entries.
87102

88-
for _ in 0..5 {
103+
for _ in 0..NUM_CALLS {
89104
let mut ctx = multiuse_sandbox.new_call_context();
90-
105+
barrier.wait();
91106
ctx.call::<()>("Spin", ()).unwrap_err();
92107
multiuse_sandbox = ctx.finish().unwrap();
93108
}
109+
thread.join().unwrap();
94110

95111
Ok(())
96112
}

src/hyperlight_host/examples/metrics/main.rs

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ limitations under the License.
1515
*/
1616
#![allow(clippy::disallowed_macros)]
1717
extern crate hyperlight_host;
18+
use std::sync::{Arc, Barrier};
1819
use std::thread::{spawn, JoinHandle};
1920

2021
use hyperlight_host::sandbox::uninitialized::UninitializedSandbox;
@@ -95,12 +96,27 @@ fn do_hyperlight_stuff() {
9596
let no_op = Noop::<UninitializedSandbox, MultiUseSandbox>::default();
9697

9798
let mut multiuse_sandbox = usandbox.evolve(no_op).expect("Failed to evolve sandbox");
99+
let interrupt_handle = multiuse_sandbox.interrupt_handle();
100+
101+
const NUM_CALLS: i32 = 5;
102+
let barrier = Arc::new(Barrier::new(2));
103+
let barrier2 = barrier.clone();
104+
105+
let thread = std::thread::spawn(move || {
106+
for _ in 0..NUM_CALLS {
107+
barrier2.wait();
108+
// Sleep for a short time to allow the guest function to run after the `wait`.
109+
std::thread::sleep(std::time::Duration::from_millis(500));
110+
// Cancel the host function call.
111+
interrupt_handle.kill();
112+
}
113+
});
98114

99115
// Call a function that gets cancelled by the host function 5 times to generate some metrics.
100116

101-
for _ in 0..5 {
117+
for _ in 0..NUM_CALLS {
102118
let mut ctx = multiuse_sandbox.new_call_context();
103-
119+
barrier.wait();
104120
ctx.call::<()>("Spin", ()).unwrap_err();
105121
multiuse_sandbox = ctx.finish().unwrap();
106122
}
@@ -109,6 +125,7 @@ fn do_hyperlight_stuff() {
109125
let result = join_handle.join();
110126
assert!(result.is_ok());
111127
}
128+
thread.join().unwrap();
112129
}
113130

114131
fn fn_writer(_msg: String) -> Result<i32> {

src/hyperlight_host/examples/tracing-otlp/main.rs

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -16,16 +16,15 @@ limitations under the License.
1616
#![allow(clippy::disallowed_macros)]
1717
//use opentelemetry_sdk::resource::ResourceBuilder;
1818
use opentelemetry_sdk::trace::SdkTracerProvider;
19-
use rand::Rng;
2019
use tracing::{span, Level};
2120
use tracing_opentelemetry::OpenTelemetryLayer;
2221
use tracing_subscriber::layer::SubscriberExt;
2322
use tracing_subscriber::util::SubscriberInitExt;
2423
extern crate hyperlight_host;
2524
use std::error::Error;
2625
use std::io::stdin;
27-
use std::sync::{Arc, Mutex};
28-
use std::thread::{self, spawn, JoinHandle};
26+
use std::sync::{Arc, Barrier, Mutex};
27+
use std::thread::{spawn, JoinHandle};
2928

3029
use hyperlight_host::sandbox::uninitialized::UninitializedSandbox;
3130
use hyperlight_host::sandbox_state::sandbox::EvolvableSandbox;
@@ -157,8 +156,23 @@ fn run_example(wait_input: bool) -> HyperlightResult<()> {
157156
}
158157

159158
// Call a function that gets cancelled by the host function 5 times to generate some log entries.
160-
161-
for i in 0..5 {
159+
const NUM_CALLS: i32 = 5;
160+
let barrier = Arc::new(Barrier::new(2));
161+
let barrier2 = barrier.clone();
162+
163+
let interrupt_handle = multiuse_sandbox.interrupt_handle();
164+
165+
let thread = std::thread::spawn(move || {
166+
for _ in 0..NUM_CALLS {
167+
barrier2.wait();
168+
// Sleep for a short time to allow the guest function to run.
169+
std::thread::sleep(std::time::Duration::from_millis(500));
170+
// Cancel the host function call.
171+
interrupt_handle.kill();
172+
}
173+
});
174+
175+
for i in 0..NUM_CALLS {
162176
let id = Uuid::new_v4();
163177
// Construct a new span named "hyperlight tracing call cancellation example thread" with INFO level.
164178
let span = span!(
@@ -169,15 +183,11 @@ fn run_example(wait_input: bool) -> HyperlightResult<()> {
169183
);
170184
let _entered = span.enter();
171185
let mut ctx = multiuse_sandbox.new_call_context();
172-
186+
barrier.wait();
173187
ctx.call::<()>("Spin", ()).unwrap_err();
174188
multiuse_sandbox = ctx.finish().unwrap();
175189
}
176-
let sleep_for = {
177-
let mut rng = rand::rng();
178-
rng.random_range(500..3000)
179-
};
180-
thread::sleep(std::time::Duration::from_millis(sleep_for));
190+
thread.join().expect("Thread panicked");
181191
}
182192
Ok(())
183193
});

src/hyperlight_host/examples/tracing/main.rs

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ limitations under the License.
1616
#![allow(clippy::disallowed_macros)]
1717
use tracing::{span, Level};
1818
extern crate hyperlight_host;
19+
use std::sync::{Arc, Barrier};
1920
use std::thread::{spawn, JoinHandle};
2021

2122
use hyperlight_host::sandbox::uninitialized::UninitializedSandbox;
@@ -110,10 +111,24 @@ fn run_example() -> Result<()> {
110111
let no_op = Noop::<UninitializedSandbox, MultiUseSandbox>::default();
111112

112113
let mut multiuse_sandbox = usandbox.evolve(no_op)?;
114+
let interrupt_handle = multiuse_sandbox.interrupt_handle();
113115

114116
// Call a function that gets cancelled by the host function 5 times to generate some log entries.
115-
116-
for i in 0..5 {
117+
const NUM_CALLS: i32 = 5;
118+
let barrier = Arc::new(Barrier::new(2));
119+
let barrier2 = barrier.clone();
120+
121+
let thread = std::thread::spawn(move || {
122+
for _ in 0..NUM_CALLS {
123+
barrier2.wait();
124+
// Sleep for a short time to allow the guest function to run.
125+
std::thread::sleep(std::time::Duration::from_millis(500));
126+
// Cancel the host function call.
127+
interrupt_handle.kill();
128+
}
129+
});
130+
131+
for i in 0..NUM_CALLS {
117132
let id = Uuid::new_v4();
118133
// Construct a new span named "hyperlight tracing call cancellation example thread" with INFO level.
119134
let span = span!(
@@ -124,7 +139,7 @@ fn run_example() -> Result<()> {
124139
);
125140
let _entered = span.enter();
126141
let mut ctx = multiuse_sandbox.new_call_context();
127-
142+
barrier.wait();
128143
ctx.call::<()>("Spin", ()).unwrap_err();
129144
multiuse_sandbox = ctx.finish().unwrap();
130145
}
@@ -133,6 +148,7 @@ fn run_example() -> Result<()> {
133148
let result = join_handle.join();
134149
assert!(result.is_ok());
135150
}
151+
thread.join().unwrap();
136152

137153
Ok(())
138154
}

src/hyperlight_host/src/error.rs

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -122,22 +122,6 @@ pub enum HyperlightError {
122122
#[error("HostFunction {0} was not found")]
123123
HostFunctionNotFound(String),
124124

125-
/// An attempt to communicate with or from the Hypervisor Handler thread failed
126-
/// (i.e., usually a failure call to `.send()` or `.recv()` on a message passing
127-
/// channel)
128-
#[error("Communication failure with the Hypervisor Handler thread")]
129-
HypervisorHandlerCommunicationFailure(),
130-
131-
/// An attempt to cancel a Hypervisor Handler execution failed.
132-
/// See `terminate_hypervisor_handler_execution_and_reinitialise`
133-
/// for more details.
134-
#[error("Hypervisor Handler execution cancel attempt on a finished execution")]
135-
HypervisorHandlerExecutionCancelAttemptOnFinishedExecution(),
136-
137-
/// A Receive for a Hypervisor Handler Message Timedout
138-
#[error("Hypervisor Handler Message Receive Timedout")]
139-
HypervisorHandlerMessageReceiveTimedout(),
140-
141125
/// Reading Writing or Seeking data failed.
142126
#[error("Reading Writing or Seeking data failed {0:?}")]
143127
IOError(#[from] std::io::Error),

src/hyperlight_host/src/func/call_ctx.rs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@ limitations under the License.
1616

1717
use tracing::{instrument, Span};
1818

19-
use super::guest_dispatch::call_function_on_guest;
2019
use super::{ParameterTuple, SupportedReturnType};
2120
use crate::{MultiUseSandbox, Result};
2221
/// A context for calling guest functions.
@@ -69,8 +68,11 @@ impl MultiUseGuestCallContext {
6968
// !Send (and !Sync), we also don't need to worry about
7069
// synchronization
7170

72-
let ret =
73-
call_function_on_guest(&mut self.sbox, func_name, Output::TYPE, args.into_value());
71+
let ret = self.sbox.call_guest_function_by_name_no_reset(
72+
func_name,
73+
Output::TYPE,
74+
args.into_value(),
75+
);
7476
Output::from_value(ret?)
7577
}
7678

0 commit comments

Comments
 (0)