diff --git a/testing/tests/aarch64_cache_coherency.rs b/testing/tests/aarch64_cache_coherency.rs new file mode 100644 index 000000000..8043a4a6f --- /dev/null +++ b/testing/tests/aarch64_cache_coherency.rs @@ -0,0 +1,196 @@ +// This file contains test cases designed to validate proper assembler cache invalidation +// this is needed because aarch64's modified harvard architecture has an incoherent instruction and +// data cache. Therefore, it is needed to explicitly command the cache hierarchy to flush the dcache +// to the coherent layers, invalidate the icache, and ensure no stale data is left in the +// instruction pipeline. Testcases in this file are designed to break if this isn't handled properly +#![allow(unused_imports)] + +extern crate dynasmrt; + +use dynasmrt::dynasm; +use dynasmrt::{DynasmApi, DynasmLabelApi}; + +#[cfg(target_arch="aarch64")] +fn test_cache_coherency_same_core() { + let mut ops = dynasmrt::aarch64::Assembler::new().unwrap(); + let reader = ops.reader(); + + // write some code + let start = ops.offset(); + dynasm!(ops + ; .arch aarch64 + ; mov w0, 0xABCD + ; ret + ); + let end = ops.offset(); + + ops.commit().unwrap(); + + // execute it once + { + let buf = reader.lock(); + let callable: extern "C" fn() -> u32 = unsafe { std::mem::transmute(buf.ptr(start)) }; + assert_eq!(callable(), 0xABCD); + drop(buf); + } + + // change the code + ops.alter(|modifier| { + modifier.goto(start); + + dynasm!(modifier + ; .arch aarch64 + ; mov w0, 0xCDEF + ; ret + ); + modifier.check_exact(end).unwrap(); + + }).unwrap(); + + // execute it again! + { + let buf = reader.lock(); + let callable: extern "C" fn() -> u32 = unsafe { std::mem::transmute(buf.ptr(start)) }; + assert_eq!(callable(), 0xCDEF); + drop(buf); + } +} + +#[cfg(target_arch="aarch64")] +#[test] +fn test_cache_coherency_same_core_loop() { + for _ in 0 .. 10000 { + test_cache_coherency_same_core() + } +} + +#[cfg(target_arch="aarch64")] +#[test] +fn test_cache_coherency_other_cores() { + // spawn a bunch of threads, and have them all racing to execute some assembly + // then modify the assembly, and see if we execute stale data + let thread_count = 4; + + use std::sync::atomic::{AtomicU32, AtomicBool, Ordering}; + + // the code we'll generate tries to read one of these atomics with acquire ordering, + // and always expects to read 0x12345678. At first it tries to read the first one, and + // then we update it to read the second one, at which point we also change the second one + // to hold the expected value, and invalidate the first one. If stale code is read + // it will read the first value instead, which at that point should be updated to be invalid + let first_value = AtomicU32::new(0x12345678); + let second_value = AtomicU32::new(0xDEADC0DE); + let rejoin_threads = AtomicBool::new(false); + + let mut ops = dynasmrt::aarch64::Assembler::new().unwrap(); + + // write some code; + dynasm!(ops + ; .arch aarch64 + ; .align 8 + ; -> first_addr: + ; .qword first_value.as_ptr() as *mut u8 as _ + ; -> second_addr: + ; .qword second_value.as_ptr() as *mut u8 as _ + ); + let start = ops.offset(); + dynasm!(ops + ; .arch aarch64 + ; adr x1, ->first_addr + ; adr x2, ->second_addr + ); + let edit = ops.offset(); + dynasm!(ops + ; .arch aarch64 + ; ldr x0, [x1] + ; ldar w0, [x0] + ; ret + ); + let end = ops.offset(); + + ops.commit().unwrap(); + + std::thread::scope(|scope| { + + // start our racing threads + let mut handles = Vec::new(); + for _ in 0 .. thread_count { + + // these get moved to each threads + let reader = ops.reader(); + let rejoin_threads_borrow = &rejoin_threads; + + handles.push(scope.spawn(move || { + + let mut bad_results = 0usize; + while !rejoin_threads_borrow.load(Ordering::Acquire) { + + let buf = reader.lock(); + let callable: extern "C" fn() -> u32 = unsafe { std::mem::transmute(buf.ptr(start)) }; + + let value = callable(); + if value != 0x12345678 { + bad_results += 1; + } + } + + bad_results + })); + } + + // wait a bit + std::thread::sleep(std::time::Duration::from_millis(1)); + + // change the code back and forth to see if errors happen + for _ in 0 .. 100 { + ops.alter(|modifier| { + modifier.goto(edit); + + dynasm!(modifier + ; .arch aarch64 + ; ldr x0, [x2] + ; ldar w0, [x0] + ; ret + ); + modifier.check_exact(end).unwrap(); + + // also change the values + first_value.store(0xDEADBEEF, Ordering::Release); + second_value.store(0x12345678, Ordering::Release); + + }).unwrap(); + + // wait a bit more + std::thread::sleep(std::time::Duration::from_millis(1)); + + // change it back + ops.alter(|modifier| { + modifier.goto(edit); + + dynasm!(modifier + ; .arch aarch64 + ; ldr x0, [x1] + ; ldar w0, [x0] + ; ret + ); + modifier.check_exact(end).unwrap(); + + // also change the values + first_value.store(0x12345678, Ordering::Release); + second_value.store(0xDEADBEEF, Ordering::Release); + + }).unwrap(); + + // wait a bit more + std::thread::sleep(std::time::Duration::from_millis(1)); + } + + // join our threads + rejoin_threads.store(true, Ordering::Release); + + let errors: usize = handles.into_iter().map(|handle| handle.join().unwrap()).sum(); + + assert_eq!(errors, 0, "racing threads read the wrong value"); + + }); +} diff --git a/testing/tests/aarch64_complex.rs b/testing/tests/aarch64_complex.rs index c3b1ed79a..4b6611ec8 100644 --- a/testing/tests/aarch64_complex.rs +++ b/testing/tests/aarch64_complex.rs @@ -10,7 +10,7 @@ use dynasmrt::components::LitPool; macro_rules! my_dynasm { ($ops:ident $($t:tt)*) => { dynasm!($ops - ; .arch x64 + ; .arch aarch64 ; .alias test, x1 $($t)* ) @@ -35,7 +35,6 @@ fn complex() { // interesting testcases my_dynasm!(ops - ; .arch aarch64 ; aligned: // no args ; nop diff --git a/testing/tests/x64_0_complex.rs b/testing/tests/x64_0_complex.rs index 08ee62bb9..1875625e7 100644 --- a/testing/tests/x64_0_complex.rs +++ b/testing/tests/x64_0_complex.rs @@ -177,7 +177,7 @@ fn complex() { // dynasm in expr position match 1 { 0 => (), - _ => dynasm!(ops; inc rax) + _ => my_dynasm!(ops; inc rax) } // fixups