diff --git a/.gitignore b/.gitignore index ad5ed8373c1..6eecfc650fe 100644 --- a/.gitignore +++ b/.gitignore @@ -40,3 +40,5 @@ build/ result result-* *.class +# Exclude rust build directories +*target/ diff --git a/bindings/rust/s2n-tls/Cargo.toml b/bindings/rust/s2n-tls/Cargo.toml index a678b4cd4c3..aa064a2bd04 100644 --- a/bindings/rust/s2n-tls/Cargo.toml +++ b/bindings/rust/s2n-tls/Cargo.toml @@ -15,6 +15,7 @@ unstable-ktls = ["s2n-tls-sys/unstable-ktls"] quic = ["s2n-tls-sys/quic"] fips = ["s2n-tls-sys/fips"] pq = ["s2n-tls-sys/pq"] +unstable-testing = [] [dependencies] errno = { version = "0.3" } diff --git a/bindings/rust/s2n-tls/src/lib.rs b/bindings/rust/s2n-tls/src/lib.rs index 00c58200cba..27dc0f6f534 100644 --- a/bindings/rust/s2n-tls/src/lib.rs +++ b/bindings/rust/s2n-tls/src/lib.rs @@ -27,5 +27,5 @@ pub mod security; pub use s2n_tls_sys as ffi; -#[cfg(test)] -mod testing; +#[cfg(any(feature = "unstable-testing", test))] +pub mod testing; diff --git a/tests/regression/Cargo.toml b/tests/regression/Cargo.toml new file mode 100644 index 00000000000..d8f07d9ab63 --- /dev/null +++ b/tests/regression/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "regression" +version = "0.1.0" +edition = "2021" + +[dependencies] +s2n-tls = { path = "../../bindings/rust/s2n-tls", features = ["unstable-testing"] } +bytes = { version = "1", optional = true } +errno = { version = "0.3" } +libc = "0.2" +crabgrind = "0.1" +futures-test = "0.3.30" +[profile.release] +debug = true diff --git a/tests/regression/README.md b/tests/regression/README.md new file mode 100644 index 00000000000..f79c0964da9 --- /dev/null +++ b/tests/regression/README.md @@ -0,0 +1,106 @@ +# Regression Testing for s2n-tls + +This folder contains regression tests and benchmarking tools for the `s2n-tls` library. The tests focus on various aspects of TLS connections. + +## Testing Philosophy + +Currently, s2n-tls implements a wall clock benchmarking tool which measures end-to-end handshake performance to compare s2n-tls with rustls and OpenSSL. In the past, s2n-tls has tried benchmarking to detect regressions through criterion in Rust, but the subprocess and spin-up time contributed to performance measurement which made the results inaccurate and difficult to use in CI. The project has a slightly different focus, learning from these existing tools. Performance assertion in s2n-tls focuses on a benchmarking tool that can detail performance by API path and do so with enough repeatability and accuracy to detect regressions between two versions of s2n-tls so that performance analysis can occur at PR time. This means that the scope of each harness is limited and mutually exclusive of other harnesses since we are intersted in measuring the performance of the important paths a TLS connection typically follows. +## Contents + +1. **lib.rs** + - **test_set_config**: Builds a new s2n-tls config with a security policy, host callback and certs + - **test_rsa_handshake**: Performs an RSA handshake in s2n-tls. + +2. **Cargo.toml** + - The configuration file for building and running the regression tests using Cargo. + + +## Prerequisites + +Ensure you have the following installed: +- Rust (with Cargo) +- Valgrind (for cachegrind instrumentation) + +## Running the Harnesses with Valgrind (scalar performance) +To run the harnesses with Valgrind and store the annotated results, run: + +``` +ENABLE_VALGRIND = true cargo test +``` + +This will recursively call all tests with valgrind enabled so the performance output is generated and stored +## Running the tests w/o Valgrind + +``` +cargo test +``` + +This will run the tests without valgrind to test if the process completes as expected +## Sample Output for Valgrind test + +Running the test will run all harnesses and fail if any number of harnesses exceed the performance threshold. For example, a regression test faliure could look like: +``` +---- tests::test_set_security_policy_and_build stdout ---- +Running command: valgrind --tool=cachegrind --cachegrind-out-file=cachegrind_test_set_security_policy_and_build.out /home/ubuntu/proj/s2n/tests/regression/target/debug/deps/regression-7c7d86aeafe3b426 test_set_security_policy_and_build +Running command: cg_annotate cachegrind_test_set_security_policy_and_build.out > perf_outputs/test_set_security_policy_and_build.annotated.txt +thread 'tests::test_set_security_policy_and_build' panicked at src/lib.rs:174:9: +Instruction count difference in test_set_security_policy_and_build exceeds the threshold, regression of 13975865 instructions +note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace + +---- tests::test_rsa_handshake stdout ---- +Running command: valgrind --tool=cachegrind --cachegrind-out-file=cachegrind_test_rsa_handshake.out /home/ubuntu/proj/s2n/tests/regression/target/debug/deps/regression-7c7d86aeafe3b426 test_rsa_handshake +Running command: cg_annotate cachegrind_test_rsa_handshake.out > perf_outputs/test_rsa_handshake.annotated.txt +thread 'tests::test_rsa_handshake' panicked at src/lib.rs:174:9: +Instruction count difference in test_rsa_handshake exceeds the threshold, regression of 51176459 instructions + + +failures: + tests::test_rsa_handshake + tests::test_set_security_policy_and_build +``` + +It also produces annotated cachegrind files stored in the `perf_ouput` directory which detail the instruction counts, how many instructions a particular file/function account for, and the contribution of individual lines of code to the overall instruction count. For example, these are the first few lines of the output generated for 'test_rsa_handshake.annotated.txt': + +``` +-------------------------------------------------------------------------------- +-- Summary +-------------------------------------------------------------------------------- +Ir_________________ + +79,270,744 (100.0%) PROGRAM TOTALS + +-------------------------------------------------------------------------------- +-- File:function summary +-------------------------------------------------------------------------------- + Ir_______________________ file:function + +< 71,798,872 (90.6%, 90.6%) /home/ubuntu/.cargo/registry/src/index.crates.io-6f17d22bba15001f/aws-lc-sys-0.19.0/aws-lc/generated-src/linux-x86_64/crypto/fipsmodule/x86_64-mont5.S: + 54,908,926 (69.3%) aws_lc_0_19_0_bn_sqr8x_internal + 15,699,024 (19.8%) mul4x_internal + 1,114,840 (1.4%) __bn_post4x_internal + +< 1,551,316 (2.0%, 92.5%) /home/ubuntu/.cargo/registry/src/index.crates.io-6f17d22bba15001f/aws-lc-sys-0.19.0/aws-lc/generated-src/linux-x86_64/crypto/fipsmodule/p256-x86_64-asm.S: + 676,336 (0.9%) __ecp_nistz256_mul_montq + 475,750 (0.6%) __ecp_nistz256_sqr_montq + 95,732 (0.1%) aws_lc_0_19_0_ecp_nistz256_point_double + +< 833,553 (1.1%, 93.6%) /home/ubuntu/.cargo/registry/src/index.crates.io-6f17d22bba15001f/aws-lc-sys-0.19.0/aws-lc/generated-src/linux-x86_64/crypto/fipsmodule/sha256-x86_64.S: + 830,671 (1.0%) sha256_block_data_order_avx + +< 557,697 (0.7%, 94.3%) /home/ubuntu/.cargo/registry/src/index.crates.io-6f17d22bba15001f/aws-lc-sys-0.19.0/aws-lc/generated-src/linux-x86_64/crypto/fipsmodule/x86_64-mont.S: + 493,032 (0.6%) bn_mul4x_mont + +``` + +### Understanding the Annotated Output +The total instruction counts are listed at the top, and segmented by file:function beneath it. When comparing versions of s2n-tls (during PR workflow or otherwise) this can be useful to pinpoint the source of instruction count difference to inform you on how changes to the code impact performance. This [link](https://valgrind.org/docs/manual/cg-manual.html#cg-manual.running-cg_annotate:~:text=Information%20Source%20Code%20Documentation%20Contact%20How%20to%20Help%20Gallery,5.2.3.%C2%A0Running%20cg_annotate,-Before%20using%20cg_annotate) provides a more detailed description to fully understand the output file. + +## Test Details + +### test_set_config + +Configures and creates a new s2n-tls configuration with a specified security policy and loads a certificate key pair. Ensures the configuration is valid and can be built. + +### test_rsa_handshake + +Performs an RSA handshake in s2n-tls and validates the handshake process utilizing rsa_4096_sha512. diff --git a/tests/regression/src/lib.rs b/tests/regression/src/lib.rs new file mode 100644 index 00000000000..6a748b778ee --- /dev/null +++ b/tests/regression/src/lib.rs @@ -0,0 +1,170 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +use s2n_tls::{ + config::Builder, + security, + testing::{CertKeyPair, InsecureAcceptAllCertificatesHandler}, +}; +type Error = s2n_tls::error::Error; + +/// Function to create default config with specified parameters. +pub fn set_config( + cipher_prefs: &security::Policy, + keypair: CertKeyPair, +) -> Result { + let mut builder = Builder::new(); + builder + .set_security_policy(cipher_prefs) + .expect("Unable to set config cipher preferences"); + builder + .set_verify_host_callback(InsecureAcceptAllCertificatesHandler {}) + .expect("Unable to set a host verify callback."); + builder + .load_pem(keypair.cert(), keypair.key()) + .expect("Unable to load cert/pem"); + builder.trust_pem(keypair.cert()).expect("load cert pem"); + builder.build() +} + +#[cfg(test)] +mod tests { + use super::*; + use crabgrind as cg; + use s2n_tls::testing::TestPair; + use std::{ + env, + fs::{create_dir_all, File}, + io::{self, BufRead, Write}, + path::Path, + process::Command, + }; + + /// Configurable threshold for regression testing. + /// Tests will fail if the instruction count difference is greater than the value of this constant. + const MAX_DIFF: u64 = 1_000_000; + + struct InstrumentationControl; + + impl InstrumentationControl { + fn stop_instrumentation(&self) { + cg::cachegrind::stop_instrumentation(); + } + + fn start_instrumentation(&self) { + cg::cachegrind::start_instrumentation(); + } + } + /// Environment variable to determine whether to run under valgrind or solely test functionality. + fn is_running_under_valgrind() -> bool { + env::var("ENABLE_VALGRIND").is_ok() + } + + fn valgrind_test(test_name: &str, test_body: F) -> Result<(), s2n_tls::error::Error> + where + F: FnOnce(&InstrumentationControl) -> Result<(), s2n_tls::error::Error>, + { + if !is_running_under_valgrind() { + let ctrl = InstrumentationControl; + test_body(&ctrl) + } else { + run_valgrind_test(test_name); + Ok(()) + } + } + + /// Test to create new config, set security policy, host_callback information, load/trust certs, and build config. + #[test] + fn test_set_config() { + valgrind_test("test_set_config", |ctrl| { + ctrl.stop_instrumentation(); + ctrl.start_instrumentation(); + let keypair_rsa = CertKeyPair::default(); + let _config = + set_config(&security::DEFAULT_TLS13, keypair_rsa).expect("Failed to build config"); + Ok(()) + }) + .unwrap(); + } + + /// Test which creates a TestPair from config using `rsa_4096_sha512`. Only measures a pair handshake. + #[test] + fn test_rsa_handshake() { + valgrind_test("test_rsa_handshake", |ctrl| { + ctrl.stop_instrumentation(); + // Example usage with RSA keypair (default) + let keypair_rsa = CertKeyPair::default(); + let config = set_config(&security::DEFAULT_TLS13, keypair_rsa)?; + // Create a pair (client + server) using that config, start handshake measurement + let mut pair = TestPair::from_config(&config); + // Assert a successful handshake + ctrl.start_instrumentation(); + assert!(pair.handshake().is_ok()); + ctrl.stop_instrumentation(); + Ok(()) + }) + .unwrap(); + } + /// Function to run specified test using valgrind + fn run_valgrind_test(test_name: &str) { + let exe_path = std::env::args().next().unwrap(); + create_dir_all(Path::new("target/cg_artifacts")).unwrap(); + let output_file = format!("target/cg_artifacts/cachegrind_{}.out", test_name); + let output_command = format!("--cachegrind-out-file={}", &output_file); + let mut command = Command::new("valgrind"); + command + .args(["--tool=cachegrind", &output_command, &exe_path, test_name]) + // Ensures that the recursive call is made to the actual harness code block rather than back to this function + .env_remove("ENABLE_VALGRIND"); + + println!("Running command: {:?}", command); + let status = command.status().expect("Failed to execute valgrind"); + + if !status.success() { + panic!("Valgrind failed"); + } + + let annotate_output = Command::new("cg_annotate") + .arg(&output_file) + .output() + .expect("Failed to run cg_annotate"); + + if !annotate_output.status.success() { + panic!("cg_annotate failed"); + } + create_dir_all(Path::new("target/perf_outputs")).unwrap(); + let annotate_file = format!("target/perf_outputs/{}.annotated.txt", test_name); + let mut file = File::create(&annotate_file).expect("Failed to create annotation file"); + file.write_all(&annotate_output.stdout) + .expect("Failed to write annotation file"); + + let count = find_instruction_count(&annotate_file) + .expect("Failed to get instruction count from file"); + // This is temporary code to showcase the future diff functionality, here the code regresses by 10% each time so this test will almost always fail + let new_count = count + count / 10; + let diff = new_count - count; + assert!(diff <= self::MAX_DIFF, "Instruction count difference in {} exceeds the threshold, regression of {} instructions", test_name, diff); + } + + /// Parses the annotated file for the overall instruction count total + fn find_instruction_count(file_path: &str) -> Result { + let path = Path::new(file_path); + let file = File::open(path)?; + let reader = io::BufReader::new(file); + // Example of the line being parsed: + // "79,278,369 (100.0%) PROGRAM TOTALS" + for line in reader.lines() { + let line = line?; + if line.contains("PROGRAM TOTALS") { + if let Some(instructions) = line.split_whitespace().next() { + return instructions + .replace(',', "") + .parse::() + .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e)); + } + } + } + + panic!("Failed to find instruction count in annotated file"); + } +}