From 9f6e751d24938f0325b569788417d3a656663829 Mon Sep 17 00:00:00 2001
From: kaukabrizvi <kaukabrizvi13@gmail.com>
Date: Wed, 21 Aug 2024 21:16:18 +0000
Subject: [PATCH 1/4] Change diff and max_diff to a percentage

---
 tests/regression/src/lib.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/regression/src/lib.rs b/tests/regression/src/lib.rs
index 6caab561b0e..314227752d4 100644
--- a/tests/regression/src/lib.rs
+++ b/tests/regression/src/lib.rs
@@ -301,7 +301,7 @@ mod tests {
             let diff = find_instruction_count(&diff_content)
                 .expect("Failed to parse cg_annotate --diff output");
             // percentage difference is the overall difference divided by the previous instruction count
-            let diff_percentage = diff as f64 / self.prev_profile_count as f64;
+            let diff_percentage = (diff as f64 / self.prev_profile_count as f64) * 100.0;
             assert!(
                 diff_percentage <= max_diff,
                 "Instruction count difference exceeds the threshold, regression of {diff_percentage}% ({diff} instructions). 
@@ -364,7 +364,7 @@ mod tests {
     /// Test to create new config, set security policy, host_callback information, load/trust certs, and build config.
     #[test]
     fn test_set_config() {
-        valgrind_test("test_set_config", 0.01, |ctrl| {
+        valgrind_test("test_set_config", 0.1, |ctrl| {
             ctrl.stop_instrumentation();
             ctrl.start_instrumentation();
             let keypair_rsa = CertKeyPair::default();
@@ -378,7 +378,7 @@ mod tests {
     /// Test which creates a TestPair from config using `rsa_4096_sha512`. Only measures a pair handshake.
     #[test]
     fn test_rsa_handshake() {
-        valgrind_test("test_rsa_handshake", 0.01, |ctrl| {
+        valgrind_test("test_rsa_handshake", 0.001, |ctrl| {
             ctrl.stop_instrumentation();
             let keypair_rsa = CertKeyPair::default();
             let config = set_config(&security::DEFAULT_TLS13, keypair_rsa)?;

From 962455f5d3bc1000c368be2af34408da8ce1f746 Mon Sep 17 00:00:00 2001
From: kaukabrizvi <kaukabrizvi13@gmail.com>
Date: Wed, 21 Aug 2024 23:13:20 +0000
Subject: [PATCH 2/4] Find_threshold script for contributing harnesses

---
 tests/regression/README.md                  | 13 +++
 tests/regression/find_threshold/Cargo.toml  |  6 ++
 tests/regression/find_threshold/src/main.rs | 94 +++++++++++++++++++++
 tests/regression/src/lib.rs                 |  6 +-
 4 files changed, 116 insertions(+), 3 deletions(-)
 create mode 100644 tests/regression/find_threshold/Cargo.toml
 create mode 100644 tests/regression/find_threshold/src/main.rs

diff --git a/tests/regression/README.md b/tests/regression/README.md
index 2f779c86dbe..e22bd38693f 100644
--- a/tests/regression/README.md
+++ b/tests/regression/README.md
@@ -138,3 +138,16 @@ Performs an RSA handshake in s2n-tls and validates the handshake process utilizi
 ### test_session_resumption
 
 Performs an RSA handshake with server authentication. Then, performs a resumption handshake using the session ticket obtained from the previous handshake.
+
+## Contributing Test Harnesses
+
+To contribute to the test harnesses, you should define a test name, wrap the test code in the valgrind_test() function, and find the threshold for the test. To find the threshold, follow this workflow:
+
+1. Navigate to the find_threshold directory `cd find_threshold`
+2. Set the commit id to your current commit id, this lets the script find the profile when cargo test is run. 
+`EXPORT commit_id=#commit_id`
+3. Run `cargo run test_name= "your_test_name"`
+4. The script should run and output the threshold for your test
+
+### Threshold Setting Philosophy
+The find_threshold script runs the test a 100 times to find the range of instruction count outputs. Since there could be non-determinism attributed to a particular test, this script helps to find that threshold. By finding `range/minimum_value` we set an upper bound for percentage differences that can be attributed to non-determinism. Now, when a change is introduced that exceeds that percentage threshold we can be confident it is a regression and not the result of non-determinism.
diff --git a/tests/regression/find_threshold/Cargo.toml b/tests/regression/find_threshold/Cargo.toml
new file mode 100644
index 00000000000..51182493311
--- /dev/null
+++ b/tests/regression/find_threshold/Cargo.toml
@@ -0,0 +1,6 @@
+[package]
+name = "find_threshold"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
diff --git a/tests/regression/find_threshold/src/main.rs b/tests/regression/find_threshold/src/main.rs
new file mode 100644
index 00000000000..0a3d4e0f68d
--- /dev/null
+++ b/tests/regression/find_threshold/src/main.rs
@@ -0,0 +1,94 @@
+// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+use std::env;
+use std::fs::File;
+use std::io::{self, BufRead, BufReader, Write};
+use std::process::{Command, Stdio};
+
+fn find_instruction_count(output: &str) -> Result<i64, io::Error> {
+    let reader = BufReader::new(output.as_bytes());
+    for line in reader.lines() {
+        let line = line?;
+        if line.contains("PROGRAM TOTALS") {
+            if let Some(instructions) = line.split_whitespace().next() {
+                return instructions
+                    .replace(',', "")
+                    .parse::<i64>()
+                    .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e));
+            }
+        }
+    }
+    panic!("Failed to find instruction count in annotated file");
+}
+
+fn main() -> Result<(), io::Error> {
+    // Get the test name from the command line arguments
+    let args: Vec<String> = env::args().collect();
+    if args.len() != 2 {
+        eprintln!("Usage: cargo run <test_name>");
+        std::process::exit(1);
+    }
+    let test_name = &args[1];
+
+    // Get the commit ID from the environment
+    let commit_id = env::var("COMMIT_ID").expect("COMMIT_ID environment variable not set");
+
+    // Define the path to the annotated file
+    let file_path = format!("target/regression_artifacts/{commit_id}/{test_name}.annotated");
+
+    // Change the working directory to the parent directory
+    let current_dir = env::current_dir()?;
+    let parent_dir = current_dir
+        .parent()
+        .expect("Failed to find parent directory");
+    env::set_current_dir(parent_dir)?;
+
+    let output_file_path = format!("{test_name}_instruction_counts.csv");
+    let mut output_file = File::create(output_file_path)?;
+    writeln!(output_file, "Run,Instruction Count")?;
+
+    let mut instruction_counts = Vec::new();
+
+    for run_number in 1..=100 {
+        // Set the environment variable, run the test
+        Command::new("cargo")
+            .arg("test")
+            .env("PERF_MODE", "valgrind")
+            .stderr(Stdio::null())
+            .output()
+            .expect("Failed to run cargo test");
+
+        // Read the file contents
+        let file_content = std::fs::read_to_string(&file_path)?;
+        // Find the instruction count
+        match find_instruction_count(&file_content) {
+            Ok(instruction_count) => {
+                instruction_counts.push(instruction_count);
+                writeln!(output_file, "{run_number},{instruction_count}")?;
+                println!(
+                    "Run {run_number}: Instruction Count = {instruction_count}"
+                );
+            }
+            Err(e) => {
+                eprintln!("Failed to find instruction count in {file_path}: {e}");
+                instruction_counts.push(-1);
+            }
+        }
+    }
+
+    // Calculate the range, minimum, and percentage variance
+    if let (Some(&min), Some(&max)) = (
+        instruction_counts.iter().min(),
+        instruction_counts.iter().max(),
+    ) {
+        let range = max - min;
+        let percentage_variance = (range as f64 / min as f64) * 100.0;
+        println!("Instruction Count Range: {range}");
+        println!("Percentage Variance: {:.6}%", percentage_variance);
+    } else {
+        eprintln!("Could not calculate range and percentage variance due to insufficient data.");
+    }
+
+    Ok(())
+}
diff --git a/tests/regression/src/lib.rs b/tests/regression/src/lib.rs
index 314227752d4..0d7f7e103bf 100644
--- a/tests/regression/src/lib.rs
+++ b/tests/regression/src/lib.rs
@@ -364,7 +364,7 @@ mod tests {
     /// Test to create new config, set security policy, host_callback information, load/trust certs, and build config.
     #[test]
     fn test_set_config() {
-        valgrind_test("test_set_config", 0.1, |ctrl| {
+        valgrind_test("test_set_config", 0.237, |ctrl| {
             ctrl.stop_instrumentation();
             ctrl.start_instrumentation();
             let keypair_rsa = CertKeyPair::default();
@@ -378,7 +378,7 @@ mod tests {
     /// Test which creates a TestPair from config using `rsa_4096_sha512`. Only measures a pair handshake.
     #[test]
     fn test_rsa_handshake() {
-        valgrind_test("test_rsa_handshake", 0.001, |ctrl| {
+        valgrind_test("test_rsa_handshake", 0.0039, |ctrl| {
             ctrl.stop_instrumentation();
             let keypair_rsa = CertKeyPair::default();
             let config = set_config(&security::DEFAULT_TLS13, keypair_rsa)?;
@@ -396,7 +396,7 @@ mod tests {
     fn test_session_resumption() {
         const KEY_NAME: &str = "InsecureTestKey";
         const KEY_VALUE: [u8; 16] = [3, 1, 4, 1, 5, 9, 2, 6, 5, 3, 5, 8, 9, 7, 9, 3];
-        valgrind_test("test_session_resumption", 0.01, |ctrl| {
+        valgrind_test("test_session_resumption", 0.0043, |ctrl| {
             ctrl.stop_instrumentation();
             let keypair_rsa = CertKeyPair::default();
 

From 241bebfa508a2216e9cb6f75f03b518f5c0448f4 Mon Sep 17 00:00:00 2001
From: kaukabrizvi <100529019+kaukabrizvi@users.noreply.github.com>
Date: Wed, 21 Aug 2024 16:16:24 -0700
Subject: [PATCH 3/4] Update README.md

---
 tests/regression/README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/regression/README.md b/tests/regression/README.md
index e22bd38693f..06c0812b1b6 100644
--- a/tests/regression/README.md
+++ b/tests/regression/README.md
@@ -146,8 +146,8 @@ To contribute to the test harnesses, you should define a test name, wrap the tes
 1. Navigate to the find_threshold directory `cd find_threshold`
 2. Set the commit id to your current commit id, this lets the script find the profile when cargo test is run. 
 `EXPORT commit_id=#commit_id`
-3. Run `cargo run test_name= "your_test_name"`
-4. The script should run and output the threshold for your test
+3. Run `cargo run "your_test_name"`
+4. The script should run and output the threshold for your test along with a .csv file with each trial run result.
 
 ### Threshold Setting Philosophy
 The find_threshold script runs the test a 100 times to find the range of instruction count outputs. Since there could be non-determinism attributed to a particular test, this script helps to find that threshold. By finding `range/minimum_value` we set an upper bound for percentage differences that can be attributed to non-determinism. Now, when a change is introduced that exceeds that percentage threshold we can be confident it is a regression and not the result of non-determinism.

From fb3d999d31b1ade9f405a2caea3060b89ed3ab0b Mon Sep 17 00:00:00 2001
From: kaukabrizvi <100529019+kaukabrizvi@users.noreply.github.com>
Date: Wed, 21 Aug 2024 16:17:10 -0700
Subject: [PATCH 4/4] Update README.md

---
 tests/regression/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/regression/README.md b/tests/regression/README.md
index 06c0812b1b6..135c83f5a75 100644
--- a/tests/regression/README.md
+++ b/tests/regression/README.md
@@ -150,4 +150,4 @@ To contribute to the test harnesses, you should define a test name, wrap the tes
 4. The script should run and output the threshold for your test along with a .csv file with each trial run result.
 
 ### Threshold Setting Philosophy
-The find_threshold script runs the test a 100 times to find the range of instruction count outputs. Since there could be non-determinism attributed to a particular test, this script helps to find that threshold. By finding `range/minimum_value` we set an upper bound for percentage differences that can be attributed to non-determinism. Now, when a change is introduced that exceeds that percentage threshold we can be confident it is a regression and not the result of non-determinism.
+The find_threshold script runs the test 100 times to find the range of instruction count outputs. Since there could be non-determinism attributed to a particular test, this script helps to find that threshold. By finding `range/minimum_value` we set an upper bound for percentage differences that can be attributed to non-determinism. Now, when a change is introduced that exceeds that percentage threshold we can be confident it is a regression and not the result of non-determinism.