From 9f6e751d24938f0325b569788417d3a656663829 Mon Sep 17 00:00:00 2001 From: kaukabrizvi Date: Wed, 21 Aug 2024 21:16:18 +0000 Subject: [PATCH 1/4] Change diff and max_diff to a percentage --- tests/regression/src/lib.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/regression/src/lib.rs b/tests/regression/src/lib.rs index 6caab561b0e..314227752d4 100644 --- a/tests/regression/src/lib.rs +++ b/tests/regression/src/lib.rs @@ -301,7 +301,7 @@ mod tests { let diff = find_instruction_count(&diff_content) .expect("Failed to parse cg_annotate --diff output"); // percentage difference is the overall difference divided by the previous instruction count - let diff_percentage = diff as f64 / self.prev_profile_count as f64; + let diff_percentage = (diff as f64 / self.prev_profile_count as f64) * 100.0; assert!( diff_percentage <= max_diff, "Instruction count difference exceeds the threshold, regression of {diff_percentage}% ({diff} instructions). @@ -364,7 +364,7 @@ mod tests { /// Test to create new config, set security policy, host_callback information, load/trust certs, and build config. #[test] fn test_set_config() { - valgrind_test("test_set_config", 0.01, |ctrl| { + valgrind_test("test_set_config", 0.1, |ctrl| { ctrl.stop_instrumentation(); ctrl.start_instrumentation(); let keypair_rsa = CertKeyPair::default(); @@ -378,7 +378,7 @@ mod tests { /// Test which creates a TestPair from config using `rsa_4096_sha512`. Only measures a pair handshake. #[test] fn test_rsa_handshake() { - valgrind_test("test_rsa_handshake", 0.01, |ctrl| { + valgrind_test("test_rsa_handshake", 0.001, |ctrl| { ctrl.stop_instrumentation(); let keypair_rsa = CertKeyPair::default(); let config = set_config(&security::DEFAULT_TLS13, keypair_rsa)?; From 962455f5d3bc1000c368be2af34408da8ce1f746 Mon Sep 17 00:00:00 2001 From: kaukabrizvi Date: Wed, 21 Aug 2024 23:13:20 +0000 Subject: [PATCH 2/4] Find_threshold script for contributing harnesses --- tests/regression/README.md | 13 +++ tests/regression/find_threshold/Cargo.toml | 6 ++ tests/regression/find_threshold/src/main.rs | 94 +++++++++++++++++++++ tests/regression/src/lib.rs | 6 +- 4 files changed, 116 insertions(+), 3 deletions(-) create mode 100644 tests/regression/find_threshold/Cargo.toml create mode 100644 tests/regression/find_threshold/src/main.rs diff --git a/tests/regression/README.md b/tests/regression/README.md index 2f779c86dbe..e22bd38693f 100644 --- a/tests/regression/README.md +++ b/tests/regression/README.md @@ -138,3 +138,16 @@ Performs an RSA handshake in s2n-tls and validates the handshake process utilizi ### test_session_resumption Performs an RSA handshake with server authentication. Then, performs a resumption handshake using the session ticket obtained from the previous handshake. + +## Contributing Test Harnesses + +To contribute to the test harnesses, you should define a test name, wrap the test code in the valgrind_test() function, and find the threshold for the test. To find the threshold, follow this workflow: + +1. Navigate to the find_threshold directory `cd find_threshold` +2. Set the commit id to your current commit id, this lets the script find the profile when cargo test is run. +`EXPORT commit_id=#commit_id` +3. Run `cargo run test_name= "your_test_name"` +4. The script should run and output the threshold for your test + +### Threshold Setting Philosophy +The find_threshold script runs the test a 100 times to find the range of instruction count outputs. Since there could be non-determinism attributed to a particular test, this script helps to find that threshold. By finding `range/minimum_value` we set an upper bound for percentage differences that can be attributed to non-determinism. Now, when a change is introduced that exceeds that percentage threshold we can be confident it is a regression and not the result of non-determinism. diff --git a/tests/regression/find_threshold/Cargo.toml b/tests/regression/find_threshold/Cargo.toml new file mode 100644 index 00000000000..51182493311 --- /dev/null +++ b/tests/regression/find_threshold/Cargo.toml @@ -0,0 +1,6 @@ +[package] +name = "find_threshold" +version = "0.1.0" +edition = "2021" + +[dependencies] diff --git a/tests/regression/find_threshold/src/main.rs b/tests/regression/find_threshold/src/main.rs new file mode 100644 index 00000000000..0a3d4e0f68d --- /dev/null +++ b/tests/regression/find_threshold/src/main.rs @@ -0,0 +1,94 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +use std::env; +use std::fs::File; +use std::io::{self, BufRead, BufReader, Write}; +use std::process::{Command, Stdio}; + +fn find_instruction_count(output: &str) -> Result { + let reader = BufReader::new(output.as_bytes()); + for line in reader.lines() { + let line = line?; + if line.contains("PROGRAM TOTALS") { + if let Some(instructions) = line.split_whitespace().next() { + return instructions + .replace(',', "") + .parse::() + .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e)); + } + } + } + panic!("Failed to find instruction count in annotated file"); +} + +fn main() -> Result<(), io::Error> { + // Get the test name from the command line arguments + let args: Vec = env::args().collect(); + if args.len() != 2 { + eprintln!("Usage: cargo run "); + std::process::exit(1); + } + let test_name = &args[1]; + + // Get the commit ID from the environment + let commit_id = env::var("COMMIT_ID").expect("COMMIT_ID environment variable not set"); + + // Define the path to the annotated file + let file_path = format!("target/regression_artifacts/{commit_id}/{test_name}.annotated"); + + // Change the working directory to the parent directory + let current_dir = env::current_dir()?; + let parent_dir = current_dir + .parent() + .expect("Failed to find parent directory"); + env::set_current_dir(parent_dir)?; + + let output_file_path = format!("{test_name}_instruction_counts.csv"); + let mut output_file = File::create(output_file_path)?; + writeln!(output_file, "Run,Instruction Count")?; + + let mut instruction_counts = Vec::new(); + + for run_number in 1..=100 { + // Set the environment variable, run the test + Command::new("cargo") + .arg("test") + .env("PERF_MODE", "valgrind") + .stderr(Stdio::null()) + .output() + .expect("Failed to run cargo test"); + + // Read the file contents + let file_content = std::fs::read_to_string(&file_path)?; + // Find the instruction count + match find_instruction_count(&file_content) { + Ok(instruction_count) => { + instruction_counts.push(instruction_count); + writeln!(output_file, "{run_number},{instruction_count}")?; + println!( + "Run {run_number}: Instruction Count = {instruction_count}" + ); + } + Err(e) => { + eprintln!("Failed to find instruction count in {file_path}: {e}"); + instruction_counts.push(-1); + } + } + } + + // Calculate the range, minimum, and percentage variance + if let (Some(&min), Some(&max)) = ( + instruction_counts.iter().min(), + instruction_counts.iter().max(), + ) { + let range = max - min; + let percentage_variance = (range as f64 / min as f64) * 100.0; + println!("Instruction Count Range: {range}"); + println!("Percentage Variance: {:.6}%", percentage_variance); + } else { + eprintln!("Could not calculate range and percentage variance due to insufficient data."); + } + + Ok(()) +} diff --git a/tests/regression/src/lib.rs b/tests/regression/src/lib.rs index 314227752d4..0d7f7e103bf 100644 --- a/tests/regression/src/lib.rs +++ b/tests/regression/src/lib.rs @@ -364,7 +364,7 @@ mod tests { /// Test to create new config, set security policy, host_callback information, load/trust certs, and build config. #[test] fn test_set_config() { - valgrind_test("test_set_config", 0.1, |ctrl| { + valgrind_test("test_set_config", 0.237, |ctrl| { ctrl.stop_instrumentation(); ctrl.start_instrumentation(); let keypair_rsa = CertKeyPair::default(); @@ -378,7 +378,7 @@ mod tests { /// Test which creates a TestPair from config using `rsa_4096_sha512`. Only measures a pair handshake. #[test] fn test_rsa_handshake() { - valgrind_test("test_rsa_handshake", 0.001, |ctrl| { + valgrind_test("test_rsa_handshake", 0.0039, |ctrl| { ctrl.stop_instrumentation(); let keypair_rsa = CertKeyPair::default(); let config = set_config(&security::DEFAULT_TLS13, keypair_rsa)?; @@ -396,7 +396,7 @@ mod tests { fn test_session_resumption() { const KEY_NAME: &str = "InsecureTestKey"; const KEY_VALUE: [u8; 16] = [3, 1, 4, 1, 5, 9, 2, 6, 5, 3, 5, 8, 9, 7, 9, 3]; - valgrind_test("test_session_resumption", 0.01, |ctrl| { + valgrind_test("test_session_resumption", 0.0043, |ctrl| { ctrl.stop_instrumentation(); let keypair_rsa = CertKeyPair::default(); From 241bebfa508a2216e9cb6f75f03b518f5c0448f4 Mon Sep 17 00:00:00 2001 From: kaukabrizvi <100529019+kaukabrizvi@users.noreply.github.com> Date: Wed, 21 Aug 2024 16:16:24 -0700 Subject: [PATCH 3/4] Update README.md --- tests/regression/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/regression/README.md b/tests/regression/README.md index e22bd38693f..06c0812b1b6 100644 --- a/tests/regression/README.md +++ b/tests/regression/README.md @@ -146,8 +146,8 @@ To contribute to the test harnesses, you should define a test name, wrap the tes 1. Navigate to the find_threshold directory `cd find_threshold` 2. Set the commit id to your current commit id, this lets the script find the profile when cargo test is run. `EXPORT commit_id=#commit_id` -3. Run `cargo run test_name= "your_test_name"` -4. The script should run and output the threshold for your test +3. Run `cargo run "your_test_name"` +4. The script should run and output the threshold for your test along with a .csv file with each trial run result. ### Threshold Setting Philosophy The find_threshold script runs the test a 100 times to find the range of instruction count outputs. Since there could be non-determinism attributed to a particular test, this script helps to find that threshold. By finding `range/minimum_value` we set an upper bound for percentage differences that can be attributed to non-determinism. Now, when a change is introduced that exceeds that percentage threshold we can be confident it is a regression and not the result of non-determinism. From fb3d999d31b1ade9f405a2caea3060b89ed3ab0b Mon Sep 17 00:00:00 2001 From: kaukabrizvi <100529019+kaukabrizvi@users.noreply.github.com> Date: Wed, 21 Aug 2024 16:17:10 -0700 Subject: [PATCH 4/4] Update README.md --- tests/regression/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/regression/README.md b/tests/regression/README.md index 06c0812b1b6..135c83f5a75 100644 --- a/tests/regression/README.md +++ b/tests/regression/README.md @@ -150,4 +150,4 @@ To contribute to the test harnesses, you should define a test name, wrap the tes 4. The script should run and output the threshold for your test along with a .csv file with each trial run result. ### Threshold Setting Philosophy -The find_threshold script runs the test a 100 times to find the range of instruction count outputs. Since there could be non-determinism attributed to a particular test, this script helps to find that threshold. By finding `range/minimum_value` we set an upper bound for percentage differences that can be attributed to non-determinism. Now, when a change is introduced that exceeds that percentage threshold we can be confident it is a regression and not the result of non-determinism. +The find_threshold script runs the test 100 times to find the range of instruction count outputs. Since there could be non-determinism attributed to a particular test, this script helps to find that threshold. By finding `range/minimum_value` we set an upper bound for percentage differences that can be attributed to non-determinism. Now, when a change is introduced that exceeds that percentage threshold we can be confident it is a regression and not the result of non-determinism.