From 2b1ded817542de0bdd4e815ff4d5e04d093510e0 Mon Sep 17 00:00:00 2001 From: peytonr18 Date: Tue, 8 Oct 2024 13:41:10 -0700 Subject: [PATCH 01/13] Squash merge for adding KVP functionality via tracing and repo instrumentation across spans and diagnostic events. --- Cargo.toml | 9 + README.md | 2 +- azurekvp/Cargo.toml | 28 +++ azurekvp/src/kvp.rs | 503 ++++++++++++++++++++++++++++++++++++++++ azurekvp/src/lib.rs | 6 + azurekvp/src/tracing.rs | 154 ++++++++++++ doc/libazurekvp.md | 70 ++++++ src/main.rs | 85 +++++-- 8 files changed, 834 insertions(+), 23 deletions(-) create mode 100644 azurekvp/Cargo.toml create mode 100644 azurekvp/src/kvp.rs create mode 100644 azurekvp/src/lib.rs create mode 100644 azurekvp/src/tracing.rs create mode 100644 doc/libazurekvp.md diff --git a/Cargo.toml b/Cargo.toml index 3071b2b4..4a6ed325 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,8 +14,10 @@ anyhow = "1.0.81" tokio = { version = "1", features = ["full"] } tracing-subscriber = { version = "0.3.18", features = ["env-filter"] } tracing = "0.1.40" +tracing-opentelemetry = "0.18.0" # We work fine with any version of 4, but 4.5 bumped MSRV to 1.74 clap = { version = "<=4.4", features = ["derive", "cargo", "env"] } +sys-info = "0.9" [dev-dependencies] # Purely for the MSRV requirement. @@ -23,11 +25,17 @@ assert_cmd = "<=2.0.13" predicates = "<=3.1.0" predicates-core = "<=1.0.6" predicates-tree = "<=1.0.9" +opentelemetry_sdk = "0.21" +opentelemetry = "0.21" [dependencies.libazureinit] path = "libazureinit" version = "0.1.0" +[dependencies.azurekvp] +path = "azurekvp" +version = "0.1.0" + [profile.dev] incremental = true @@ -42,6 +50,7 @@ path = "tests/functional_tests.rs" [workspace] members = [ "libazureinit", + "azurekvp", ] [features] diff --git a/README.md b/README.md index 5787ec1d..46a3570c 100644 --- a/README.md +++ b/README.md @@ -47,4 +47,4 @@ Any use of third-party trademarks or logos are subject to those third-party's po ## libazureinit -For common library used by this reference implementation, please refer to [libazureinit](https://github.com/Azure/azure-init/tree/main/libazureinit/). +For common library used by this reference implementation, please refer to [libazureinit](https://github.com/Azure/azure-init/tree/main/libazureinit/). \ No newline at end of file diff --git a/azurekvp/Cargo.toml b/azurekvp/Cargo.toml new file mode 100644 index 00000000..e13903a0 --- /dev/null +++ b/azurekvp/Cargo.toml @@ -0,0 +1,28 @@ +[package] +name = "azurekvp" +version = "0.1.1" +edition = "2021" +repository = "https://github.com/Azure/azure-init/" +homepage = "https://github.com/Azure/azure-init/" +license = "MIT" +description = "A binary library for implementing OpenTelemetry KVP for Linux VMs on Azure." + + +[dependencies] +opentelemetry = "0.18.0" +nix = "0.26" +chrono = "0.4" +uuid = { version = "1.2", features = ["v4"] } +tracing-subscriber = { version = "0.3.18", features = ["env-filter"] } +tracing = "0.1.40" +tracing-opentelemetry = "0.18.0" +anyhow = "1.0.81" +tokio = { version = "1", features = ["full"] } + +[dev-dependencies] +tempfile = "3.3" +sys-info = "0.9" + +[lib] +name = "azurekvp" +path = "src/lib.rs" \ No newline at end of file diff --git a/azurekvp/src/kvp.rs b/azurekvp/src/kvp.rs new file mode 100644 index 00000000..e070aa2f --- /dev/null +++ b/azurekvp/src/kvp.rs @@ -0,0 +1,503 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! This module provides core functionality for handling telemetry tracing +//! related to azure-init's interaction with Hyper-V KVP (Key-Value Pair) storage. +//! +//! # Constants +//! - `HV_KVP_EXCHANGE_MAX_KEY_SIZE`: Defines the maximum key size for KVP exchange. +//! - `HV_KVP_EXCHANGE_MAX_VALUE_SIZE`: Defines the maximum value size for KVP exchange. +//! - `HV_KVP_AZURE_MAX_VALUE_SIZE`: Maximum value size before splitting into multiple slices. +//! + +use std::fmt as std_fmt; +use std::fmt::Write as std_write; +use std::time::{Duration, SystemTime, UNIX_EPOCH}; +use tracing::field::Visit; +use tracing_subscriber::Layer; + +use crate::tracing::{handle_event, handle_span, MyInstant}; + +use tracing::span::{Attributes, Id}; +use tracing::Subscriber; +use tracing_subscriber::layer::Context as TracingContext; +use tracing_subscriber::registry::LookupSpan; + +use nix::fcntl::{flock, FlockArg}; +use std::fs::OpenOptions; +use std::io::{self, Error, ErrorKind, Write}; +use std::os::fd::AsRawFd; +use std::os::unix::fs::MetadataExt; +use std::path::Path; + +const HV_KVP_EXCHANGE_MAX_KEY_SIZE: usize = 512; +const HV_KVP_EXCHANGE_MAX_VALUE_SIZE: usize = 2048; +const HV_KVP_AZURE_MAX_VALUE_SIZE: usize = 1024; +const EVENT_PREFIX: &str = concat!("azure-init-", env!("CARGO_PKG_VERSION")); + +/// A custom visitor that captures the value of the `msg` field from a tracing event. +/// It implements the `tracing::field::Visit` trait and records the value into +/// a provided mutable string reference. +/// +/// This visitor is primarily used in the `on_event` method of the `EmitKVPLayer` +/// to extract event messages and log them as key-value pairs. +pub struct StringVisitor<'a> { + string: &'a mut String, +} + +impl<'a> Visit for StringVisitor<'a> { + /// Records the debug representation of the event's value and stores it in the provided string. + /// + /// # Arguments + /// * `_field` - A reference to the event's field metadata. + /// * `value` - The debug value associated with the field. + fn record_debug( + &mut self, + _field: &tracing::field::Field, + value: &dyn std_fmt::Debug, + ) { + write!(self.string, "{:?}", value).unwrap(); + } +} +/// A custom tracing layer that emits span data as key-value pairs (KVP) +/// to a file for consumption by the Hyper-V daemon. This struct captures +/// spans and events from the tracing system and writes them to a +/// specified file in KVP format. +pub struct EmitKVPLayer { + file_path: std::path::PathBuf, +} + +impl EmitKVPLayer { + pub fn new(file_path: std::path::PathBuf) -> Self { + Self { file_path } + } +} + +impl Layer for EmitKVPLayer +where + S: Subscriber + for<'lookup> LookupSpan<'lookup>, +{ + /// Handles event occurrences within a span by extracting the `msg` field, + /// recording the event, and writing the event as key-value pairs (KVP) to a log file. + /// + /// This method uses the `StringVisitor` to capture the message of the event and + /// links the event to the span by writing both span and event data into the same file. + /// + /// If an `ERROR` level event is encountered, a failure flag is inserted into the span's extensions. + /// + /// # Example + /// ```rust + /// use tracing::{event, Level}; + /// event!(Level::INFO, msg = "Event message"); + /// ``` + fn on_event(&self, event: &tracing::Event<'_>, ctx: TracingContext<'_, S>) { + let mut event_message = String::new(); + + let mut visitor = StringVisitor { + string: &mut event_message, + }; + + event.record(&mut visitor); + + if let Some(span) = ctx.lookup_current() { + let mut extensions = span.extensions_mut(); + + if event.metadata().level() == &tracing::Level::ERROR { + extensions.insert("failure".to_string()); + } + + let event_instant = MyInstant::now(); + + handle_event(&event_message, &span, &self.file_path, event_instant); + } + } + + /// Called when a new span is created. Records the start time of the span + /// and stores it as an extension within the span's context, to be used + /// for generating telemetry data for Hyper-V. + fn on_new_span( + &self, + _attrs: &Attributes<'_>, + id: &Id, + ctx: TracingContext<'_, S>, + ) { + let start_instant = MyInstant::now(); + if let Some(span) = ctx.span(id) { + span.extensions_mut().insert(start_instant); + } + } + + /// Called when a span is closed and checks for any logged errors to report. + /// This method handles encoding and writing the span data + /// (start and end times) to the specified KVP file for Hyper-V telemetry. + /// + /// # Arguments + /// * `id` - The unique identifier for the span. + /// * `ctx` - The current tracing context, which can be used to access the span. + fn on_close(&self, id: Id, ctx: TracingContext) { + if let Some(span) = ctx.span(&id) { + let mut status = "success".to_string(); + let mut event_level = "INFO".to_string(); + + if let Some(recorded_status) = span.extensions().get::() { + if recorded_status == "failure" { + status = "failure".to_string(); + event_level = "ERROR".to_string(); + } + } + + let end_time = SystemTime::now(); + + handle_span( + &span, + self.file_path.as_path(), + &status, + &event_level, + end_time, + ); + } + } +} + +/// This function serves as a wrapper that orchestrates the necessary steps to log +/// telemetry data to a file. It first truncates the guest pool file if needed, then +/// generates a unique event key using the provided event metadata, encodes the key-value +/// pair, and writes the result to the KVP file. +pub fn handle_kvp_operation( + file_path: &Path, + event_level: &str, + event_name: &str, + span_id: &str, + event_value: &str, +) { + truncate_guest_pool_file(file_path).expect("Failed to truncate KVP file"); + + let event_key = generate_event_key(event_level, event_name, span_id); + + let encoded_kvp = encode_kvp_item(&event_key, event_value); + if let Err(e) = write_to_kvp_file(file_path, &encoded_kvp) { + eprintln!("Error writing to KVP file: {}", e); + } +} + +/// Generates a unique event key by combining the event level, name, and span ID. +/// +/// # Arguments +/// * `event_level` - The logging level (e.g., "INFO", "DEBUG"). +/// * `event_name` - The name of the event. +/// * `span_id` - A unique identifier for the span. +fn generate_event_key( + event_level: &str, + event_name: &str, + span_id: &str, +) -> String { + format!( + "{}|{}|{}|{}", + EVENT_PREFIX, event_level, event_name, span_id + ) +} + +/// Writes the encoded key-value pair (KVP) data to a specified KVP file. +/// +/// This function appends the provided encoded KVP data to the specified file and ensures +/// that file locking is handled properly to avoid race conditions. It locks the file exclusively, +/// writes the data, flushes the output, and then unlocks the file. +fn write_to_kvp_file( + file_path: &Path, + encoded_kvp: &Vec>, +) -> io::Result<()> { + let mut file = + match OpenOptions::new().append(true).create(true).open(file_path) { + Ok(file) => file, + Err(e) => { + eprintln!("Failed to open log file: {}", e); + return Err(e); // Return the error if the file can't be opened + } + }; + + let fd = file.as_raw_fd(); + if let Err(e) = flock(fd, FlockArg::LockExclusive) { + eprintln!("Failed to lock the file: {}", e); + return Err(io::Error::new( + io::ErrorKind::Other, + "File locking failed", + )); + } + + // Write the encoded KVP data + for kvp in encoded_kvp { + if let Err(e) = file.write_all(&kvp[..]) { + eprintln!("Failed to write to log file: {}", e); + return Err(e); // Return the error if writing fails + } + } + + if let Err(e) = file.flush() { + eprintln!("Failed to flush the log file: {}", e); + return Err(e); // Return the error if flushing fails + } + + if let Err(e) = flock(fd, FlockArg::Unlock) { + eprintln!("Failed to unlock the file: {}", e); + return Err(io::Error::new( + io::ErrorKind::Other, + "File unlocking failed", + )); + } + + Ok(()) +} + +/// Encodes a key-value pair (KVP) into one or more byte slices. If the value +/// exceeds the allowed size, it is split into multiple slices for encoding. +/// This is used for logging events to a KVP file. +/// +/// # Arguments +/// * `key` - The key as a string slice. +/// * `value` - The value associated with the key. +fn encode_kvp_item(key: &str, value: &str) -> Vec> { + let key_bytes = key.as_bytes(); + let value_bytes = value.as_bytes(); + + let key_len = key_bytes.len().min(HV_KVP_EXCHANGE_MAX_KEY_SIZE); + let mut key_buf = vec![0u8; HV_KVP_EXCHANGE_MAX_KEY_SIZE]; + key_buf[..key_len].copy_from_slice(&key_bytes[..key_len]); + + if value_bytes.len() <= HV_KVP_AZURE_MAX_VALUE_SIZE { + let mut value_buf = vec![0u8; HV_KVP_EXCHANGE_MAX_VALUE_SIZE]; + let value_len = value_bytes.len().min(HV_KVP_EXCHANGE_MAX_VALUE_SIZE); + value_buf[..value_len].copy_from_slice(&value_bytes[..value_len]); + + vec![encode_kvp_slice(key_buf, value_buf)] + } else { + println!("Value exceeds max size, splitting into multiple slices."); + + let mut kvp_slices = Vec::new(); + let mut start = 0; + while start < value_bytes.len() { + let end = + (start + HV_KVP_AZURE_MAX_VALUE_SIZE).min(value_bytes.len()); + let mut value_buf = vec![0u8; HV_KVP_EXCHANGE_MAX_VALUE_SIZE]; + value_buf[..end - start].copy_from_slice(&value_bytes[start..end]); + + kvp_slices.push(encode_kvp_slice(key_buf.clone(), value_buf)); + start += HV_KVP_AZURE_MAX_VALUE_SIZE; + } + kvp_slices + } +} + +/// Combines the key and value of a KVP into a single byte slice, ensuring +/// proper formatting for consumption by hv_kvp_daemon service, +/// which typically reads from /var/lib/hyperv/.kvp_pool_1. +fn encode_kvp_slice(key: Vec, value: Vec) -> Vec { + let mut buffer = Vec::with_capacity( + HV_KVP_EXCHANGE_MAX_KEY_SIZE + HV_KVP_EXCHANGE_MAX_VALUE_SIZE, + ); + buffer.extend_from_slice(&key); + buffer.extend_from_slice(&value); + buffer +} + +/// Decodes a KVP byte slice into its corresponding key and value strings. +/// This is useful for inspecting or logging raw KVP data. +pub fn decode_kvp_item( + record_data: &[u8], +) -> Result<(String, String), &'static str> { + let record_data_len = record_data.len(); + let expected_len = + HV_KVP_EXCHANGE_MAX_KEY_SIZE + HV_KVP_EXCHANGE_MAX_VALUE_SIZE; + + if record_data_len != expected_len { + return Err("record_data len not correct."); + } + + let key = String::from_utf8( + record_data[0..HV_KVP_EXCHANGE_MAX_KEY_SIZE].to_vec(), + ) + .unwrap_or_else(|_| String::new()) + .trim_end_matches('\x00') + .to_string(); + + let value = String::from_utf8( + record_data[HV_KVP_EXCHANGE_MAX_KEY_SIZE..record_data_len].to_vec(), + ) + .unwrap_or_else(|_| String::new()) + .trim_end_matches('\x00') + .to_string(); + + Ok((key, value)) +} + +/// Truncates the guest pool KVP file if it contains stale data (i.e., data +/// older than the system's boot time). Logs whether the file was truncated +/// or no action was needed. +fn truncate_guest_pool_file(kvp_file: &Path) -> Result<(), Error> { + let boot_time = SystemTime::now() + .duration_since(UNIX_EPOCH) + .map_err(|e| Error::new(std::io::ErrorKind::Other, e))? + .as_secs() + - get_uptime()?.as_secs(); + + match kvp_file.metadata() { + Ok(metadata) => { + if metadata.mtime() < boot_time as i64 { + OpenOptions::new() + .write(true) + .truncate(true) + .open(kvp_file)?; + println!("Truncated the KVP file due to stale data."); + } else { + println!( + "File has been truncated since boot, no action taken." + ); + } + } + Err(ref e) if e.kind() == ErrorKind::NotFound => { + println!("File not found: {:?}", kvp_file); + return Ok(()); + } + Err(e) => { + return Err(e); + } + } + + Ok(()) +} + +/// Reads the system's uptime from `/proc/uptime`, which can be used for +/// various time-based calculations or checks, such as determining whether +/// a file contains stale data. +fn get_uptime() -> Result { + let uptime = std::fs::read_to_string("/proc/uptime")?; + let uptime_seconds: f64 = uptime + .split_whitespace() + .next() + .unwrap_or("0") + .parse() + .unwrap_or(0.0); + Ok(Duration::from_secs(uptime_seconds as u64)) +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::NamedTempFile; + use tokio::time::{sleep, Duration}; + use tracing::instrument; + use tracing::{event, Level}; + use tracing_subscriber::{layer::SubscriberExt, Registry}; + + #[instrument] + async fn mock_child_function(index: usize) { + event!( + Level::INFO, + msg = format!("Event in child span for item {}", index) + ); + sleep(Duration::from_millis(200)).await; + } + + #[instrument] + async fn mock_provision() -> Result<(), anyhow::Error> { + let kernel_version = sys_info::os_release() + .unwrap_or("Unknown Kernel Version".to_string()); + let os_version = + sys_info::os_type().unwrap_or("Unknown OS Version".to_string()); + let azure_init_version = env!("CARGO_PKG_VERSION"); + + event!( + Level::INFO, + msg = format!( + "Kernel Version: {}, OS Version: {}, Azure-Init Version: {}", + kernel_version, os_version, azure_init_version + ) + ); + + event!(Level::INFO, msg = "Provisioning started"); + + mock_child_function(0).await; + sleep(Duration::from_millis(300)).await; + + event!(Level::INFO, msg = "Provisioning completed"); + + Ok(()) + } + + #[instrument] + async fn mock_failure_function() -> Result<(), anyhow::Error> { + let error_message = "Simulated failure during processing"; + event!(Level::ERROR, msg = %error_message); + + sleep(Duration::from_millis(100)).await; + + Err(anyhow::anyhow!(error_message)) + } + + #[tokio::test] + async fn test_emit_kvp_layer() { + let temp_file = + NamedTempFile::new().expect("Failed to create tempfile"); + let temp_path = temp_file.path().to_path_buf(); + + let emit_kvp_layer = EmitKVPLayer::new(temp_path.clone()); + + let subscriber = Registry::default().with(emit_kvp_layer); + let default_guard = tracing::subscriber::set_default(subscriber); + + let _ = mock_provision().await; + let _ = mock_failure_function().await; + + drop(default_guard); + + let contents = + std::fs::read(temp_path).expect("Failed to read temp file"); + println!("Contents of the file (in bytes):\n{:?}", contents); + + let slice_size = 512 + 2048; + + let num_slices = (contents.len() + slice_size - 1) / slice_size; + let expected_len = num_slices * slice_size; + + assert_eq!( + contents.len(), + expected_len, + "Encoded buffer length is incorrect. Expected {} but got {}", + expected_len, + contents.len() + ); + + for i in 0..num_slices { + let start = i * slice_size; + let end = start + slice_size; + let slice = &contents[start..end]; + + println!("Processing slice {}: start={}, end={}", i, start, end); + println!("Slice length: {}", slice.len()); + + let key_section = &slice[..512]; + let value_section = &slice[512..]; + + match decode_kvp_item(slice) { + Ok((key, value)) => { + println!("Decoded KVP - Key: {}", key); + println!("Decoded KVP - Value: {}\n", value); + } + Err(e) => { + panic!("Failed to decode KVP: {}", e); + } + } + + assert!( + key_section.iter().any(|&b| b != 0), + "Key section in slice {} should contain non-zero bytes", + i + ); + + assert!( + value_section.iter().any(|&b| b != 0), + "Value section in slice {} should contain non-zero bytes", + i + ); + } + } +} diff --git a/azurekvp/src/lib.rs b/azurekvp/src/lib.rs new file mode 100644 index 00000000..a34fbc81 --- /dev/null +++ b/azurekvp/src/lib.rs @@ -0,0 +1,6 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +mod kvp; +pub use kvp::{decode_kvp_item, EmitKVPLayer}; +pub mod tracing; diff --git a/azurekvp/src/tracing.rs b/azurekvp/src/tracing.rs new file mode 100644 index 00000000..def1ebec --- /dev/null +++ b/azurekvp/src/tracing.rs @@ -0,0 +1,154 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! This module provides core functionality for handling telemetry tracing. + +use crate::kvp::handle_kvp_operation; + +use chrono::{DateTime, Utc}; +use opentelemetry::{global, sdk::trace as sdktrace, trace::TracerProvider}; +use std::path::Path; +use std::time::{Instant, SystemTime, UNIX_EPOCH}; +use uuid::Uuid; + +/// A wrapper around `std::time::Instant` that provides convenient methods +/// for time tracking in spans and events. Implements the `Deref` trait, allowing +/// access to the underlying `Instant` methods. +/// +/// This struct captures the start time of spans/events and measures the elapsed time. +#[derive(Clone)] +pub struct MyInstant(Instant); +impl std::ops::Deref for MyInstant { + type Target = Instant; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} +impl MyInstant { + pub fn now() -> Self { + MyInstant(Instant::now()) + } +} + +/// Initializes the tracing system by setting up a tracing provider and +/// registering it globally. This function returns a tracer instance +/// associated with the "azure-kvp" application. +/// +/// # Returns +/// A `sdktrace::Tracer` object that can be used to create and manage spans. +pub fn initialize_tracing() -> sdktrace::Tracer { + let provider = sdktrace::TracerProvider::builder().build(); + global::set_tracer_provider(provider.clone()); + provider.tracer("azure-kvp") +} + +/// Handles span data by truncating the guest pool file, encoding key-value pairs +/// with span metadata, and writing the encoded data to a log file. +/// +/// # Arguments +/// * `span` - A reference to the span being processed, which contains the metadata and context. +/// * `file_path` - A reference to the path where the span data should be written. +/// * `status` - A string representing the status of the span (e.g., "success", "failure"). +/// * `event_level` - The logging level of the span (e.g., "INFO", "ERROR"). +/// * `end_time` - The `SystemTime` representing when the span ended. +pub fn handle_span( + span: &tracing_subscriber::registry::SpanRef<'_, S>, + file_path: &Path, + status: &str, + event_level: &str, + end_time: SystemTime, +) where + S: tracing::Subscriber + + for<'lookup> tracing_subscriber::registry::LookupSpan<'lookup>, +{ + let span_context = span.metadata(); + let span_id = Uuid::new_v4(); + + if let Some(start_instant) = span.extensions().get::() { + let elapsed = start_instant.elapsed(); + + let start_time = end_time + .checked_sub(elapsed) + .expect("SystemTime subtraction failed"); + + let start_time_dt = DateTime::::from( + UNIX_EPOCH + + std::time::Duration::from_millis( + start_time.duration_since(UNIX_EPOCH).unwrap().as_millis() + as u64, + ), + ) + .format("%Y-%m-%dT%H:%M:%S%.3fZ") + .to_string(); + + let end_time_dt = DateTime::::from( + UNIX_EPOCH + + std::time::Duration::from_millis( + end_time.duration_since(UNIX_EPOCH).unwrap().as_millis() + as u64, + ), + ) + .format("%Y-%m-%dT%H:%M:%S%.3fZ") + .to_string(); + + let event_value = format!( + "Start: {} | End: {} | Status: {}", + start_time_dt, end_time_dt, status + ); + + handle_kvp_operation( + file_path, + event_level, + span_context.name(), + &span_id.to_string(), + &event_value, + ); + } +} + +/// Handles event data by encoding the message and metadata as key-value pairs (KVP) +/// and writing the encoded data to the specified log file. +/// +/// # Arguments +/// * `event_message` - A string message associated with the event. +/// * `span` - The span associated with the event, used to retrieve span context and metadata. +/// * `file_path` - The path to the log file where the encoded KVP data should be written. +/// * `event_instant` - A `MyInstant` representing when the event occurred. +pub fn handle_event( + event_message: &str, + span: &tracing_subscriber::registry::SpanRef<'_, S>, + file_path: &Path, + event_instant: MyInstant, +) where + S: tracing::Subscriber + + for<'lookup> tracing_subscriber::registry::LookupSpan<'lookup>, +{ + let span_context = span.metadata(); + let span_id: Uuid = Uuid::new_v4(); + + let event_time = SystemTime::now() + .checked_sub(event_instant.elapsed()) + .expect("SystemTime subtraction failed"); + + let event_time_dt = DateTime::::from( + UNIX_EPOCH + + std::time::Duration::from_millis( + event_time.duration_since(UNIX_EPOCH).unwrap().as_millis() + as u64, + ), + ) + .format("%Y-%m-%dT%H:%M:%S%.3fZ") + .to_string(); + + let event_value = + format!("Time: {} | Event: {}", event_time_dt, event_message); + + handle_kvp_operation( + file_path, + "INFO", + span_context.name(), + &span_id.to_string(), + &event_value, + ); +} diff --git a/doc/libazurekvp.md b/doc/libazurekvp.md new file mode 100644 index 00000000..9ac06f97 --- /dev/null +++ b/doc/libazurekvp.md @@ -0,0 +1,70 @@ +## Tracing Logic Overview + +### How Tracing is Set Up + +The tracing setup in this project is built around three key layers, each with its own responsibility: + +1. **EmitKVPLayer**: Custom Layer for Span Processing +2. **OpenTelemetryLayer**: Context Propagation and Span Export +3. **stderr_layer**: Formatting and Logging to stderr + +These layers work together, yet independently, to process span data as it flows through the program. + +### Layer Overview + +#### 1. EmitKVPLayer + +- **Purpose**: This custom layer is responsible for processing spans and events by capturing their metadata, generating key-value pairs (KVPs), encoding them into a specific format, and writing the encoded data to the VM's Hyper-V file for consumption by the `hv_kvp_daemon` service. + +- **How It Works**: + - **Span Processing**: When a span is created, `EmitKVPLayer` processes the span's metadata, generating a unique key for the span and encoding the span data into a binary format that can be consumed by Hyper-V. + - **Event Processing**: When an event is emitted using the `event!` macro, the `on_event` method in `EmitKVPLayer` processes the event, capturing its message and linking it to the current span. Events are useful for tracking specific points in time within a span, such as errors, warnings, retries, or important state changes. Events are recorded independently of spans but they are be tied to the span they occur within by using the same span metadata. + - Both span and event data are written to the `/var/lib/hyperv/.kvp_pool_1` file, which is typically monitored by the Hyper-V `hv_kvp_daemon` service. + - The `hv_kvp_daemon` uses this file to exchange key-value pair (KVP) data between the virtual machine and the Hyper-V host. This mechanism is crucial for telemetry and data synchronization. + +- **Reference**: For more details on how the Hyper-V Data Exchange Service works, refer to the official documentation here: [Hyper-V Data Exchange Service (KVP)](https://learn.microsoft.com/en-us/virtualization/hyper-v-on-windows/reference/integration-services#hyper-v-data-exchange-service-kvp). + +#### 2. OpenTelemetryLayer + +- **Purpose**: This layer integrates with the OpenTelemetry framework to handle context propagation and export span data to an external tracing backend (e.g., Jaeger, Prometheus) or to stdout. +- **How It Works**: + - As spans are created and processed, the `OpenTelemetryLayer` ensures that context is propagated correctly across different parts of the program, which is crucial in distributed systems for tracing requests across service boundaries. + - The span data is then exported to a configured backend or stdout, where it can be visualized and analyzed using OpenTelemetry-compatible tools. + +#### 3. stderr_layer + +- **Purpose**: This layer formats and logs span and event data to stderr or a specified log file, providing a human-readable output for immediate inspection. +- **How It Works**: + - Each span's lifecycle events, as well as individual emitted events, are logged in a structured format, making it easy to see the flow of execution in the console or log files. + - This layer is particularly useful for debugging and monitoring during development. + +### How the Layers Work Together + +- **Independent Processing**: Each of these layers processes spans and events independently. When a span is created, it triggers the `on_new_span` method in each layer, and when an event is emitted, it triggers the `on_event` method. As the span progresses through its lifecycle (`on_enter`, `on_close`), each layer performs its respective tasks. +- **Order of Execution**: The layers are executed in the order they are added in the `initialize_tracing` function. For instance, `EmitKVPLayer` might process a span before `OpenTelemetryLayer`, but this order only affects the sequence of operations, not the functionality or output of each layer. +- **No Cross-Layer Dependencies**: Each layer operates independently of the others. For example, the `EmitKVPLayer` encodes and logs span and event data without affecting how `OpenTelemetryLayer` exports span data to a backend. This modular design allows each layer to be modified, replaced, or removed without impacting the others. + +In the `main.rs` file, the tracing logic is initialized. Spans are instrumented using the `#[instrument]` attribute and events can be created with the `event!` macro to monitor the execution of the function. Here's an example: + +```rust +#[instrument(name = "root")] +async fn provision() -> Result<(), anyhow::Error> { + event!(Level::INFO, msg = "Starting the provision process..."); + // Other logic... +} +``` + +1. **Initialization**: + The `initialize_tracing` function is called at the start of the program to set up the tracing subscriber with the configured layers (`EmitKVPLayer`, `OpenTelemetryLayer`, and `stderr_layer`). + +2. **Instrumenting the `provision()` Function**: + The `#[instrument]` attribute is used to automatically create a span for the `provision()` function. + - The `name = "root"` part of the `#[instrument]` attribute specifies the name of the span. + - This span will trace the entire execution of the `provision()` function, capturing any relevant metadata (e.g., function parameters, return values). + +3. **Span Processing**: + As the `provision()` function is called and spans are created, entered, exited, and closed, they are processed by the layers configured in `initialize_tracing`: + - **EmitKVPLayer** processes the span, generates key-value pairs, encodes them, and writes them directly to `/var/lib/hyperv/.kvp_pool_1`. + - **OpenTelemetryLayer** handles context propagation and exports span data to a tracing backend or stdout. + - **stderr_layer** logs span information to stderr or another specified output for immediate visibility. + diff --git a/src/main.rs b/src/main.rs index ac35efff..6189d5e6 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,9 +1,5 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. - -use std::process::ExitCode; -use std::time::Duration; - use anyhow::Context; use clap::Parser; use libazureinit::imds::InstanceMetadata; @@ -15,10 +11,18 @@ use libazureinit::{ reqwest::{header, Client}, HostnameProvisioner, PasswordProvisioner, Provision, UserProvisioner, }; +use std::process::ExitCode; +use std::time::Duration; use tracing::instrument; +use tracing::{event, Level}; +use tracing_opentelemetry::OpenTelemetryLayer; use tracing_subscriber::fmt::format::FmtSpan; -use tracing_subscriber::layer::SubscriberExt; -use tracing_subscriber::EnvFilter; +use tracing_subscriber::{ + fmt, layer::SubscriberExt, EnvFilter, Layer, Registry, +}; + +use azurekvp::tracing::initialize_tracing; +use azurekvp::EmitKVPLayer; const VERSION: &str = env!("CARGO_PKG_VERSION"); @@ -56,8 +60,10 @@ fn get_environment() -> Result { } } - environment - .ok_or_else(|| anyhow::anyhow!("Unable to get list of block devices")) + environment.ok_or_else(|| { + event!(Level::ERROR, "Unable to get list of block devices"); + anyhow::anyhow!("Unable to get list of block devices") + }) } #[instrument(skip_all)] @@ -83,25 +89,40 @@ fn get_username( .linux_prov_conf_set .username }) - .ok_or(LibError::UsernameFailure.into()) + .ok_or_else(|| { + event!(Level::ERROR, "Username Failure"); + LibError::UsernameFailure.into() + }) } #[tokio::main] async fn main() -> ExitCode { - let stderr = tracing_subscriber::fmt::layer() + let tracer = initialize_tracing(); + + let otel_layer = OpenTelemetryLayer::new(tracer); + let emit_kvp_layer = EmitKVPLayer::new(std::path::PathBuf::from( + "/var/lib/hyperv/.kvp_pool_1", + )); + + let stderr_layer = fmt::layer() .with_span_events(FmtSpan::NEW | FmtSpan::CLOSE) - .with_writer(std::io::stderr); - let registry = tracing_subscriber::registry() - .with(stderr) - .with(EnvFilter::from_env("AZURE_INIT_LOG")); - tracing::subscriber::set_global_default(registry).expect( - "Only an application should set the global default; \ - a library is mis-using the tracing API.", - ); + .with_writer(std::io::stderr) + .with_filter(EnvFilter::from_env("AZURE_INIT_LOG")); + + let subscriber = Registry::default() + .with(stderr_layer) + .with(otel_layer) + .with(emit_kvp_layer); + + tracing::subscriber::set_global_default(subscriber) + .expect("Failed to set global tracing subscriber"); - match provision().await { + let result = provision().await; + + match result { Ok(_) => ExitCode::SUCCESS, Err(e) => { + event!(Level::ERROR, "Provisioning failed with error: {:?}", e); eprintln!("{:?}", e); let config: u8 = exitcode::CONFIG .try_into() @@ -117,8 +138,22 @@ async fn main() -> ExitCode { } } -#[instrument] +#[instrument(name = "root")] async fn provision() -> Result<(), anyhow::Error> { + let kernel_version = + sys_info::os_release().unwrap_or("Unknown Kernel Version".to_string()); + let os_version = + sys_info::os_type().unwrap_or("Unknown OS Version".to_string()); + let azure_init_version = env!("CARGO_PKG_VERSION"); + + event!( + Level::INFO, + msg = format!( + "Kernel Version: {}, OS Version: {}, Azure-Init Version: {}", + kernel_version, os_version, azure_init_version + ) + ); + let opts = Cli::parse(); let mut default_headers = header::HeaderMap::new(); @@ -184,7 +219,10 @@ async fn provision() -> Result<(), anyhow::Error> { None, // default wireserver goalstate URL ) .await - .with_context(|| "Failed to get desired goalstate.")?; + .with_context(|| { + event!(Level::ERROR, "Failed to get the desired goalstate."); + "Failed to get desired goalstate." + })?; goalstate::report_health( &client, @@ -194,7 +232,10 @@ async fn provision() -> Result<(), anyhow::Error> { None, // default wireserver health URL ) .await - .with_context(|| "Failed to report VM health.")?; + .with_context(|| { + event!(Level::ERROR, "Failed to report VM health."); + "Failed to report VM health." + })?; Ok(()) } From 60af3778db0534c0ee199a6278b63e9b50de69fa Mon Sep 17 00:00:00 2001 From: peytonr18 Date: Mon, 11 Nov 2024 17:49:49 -0800 Subject: [PATCH 02/13] Refactoring KVP PR to move kvp.rs into main libazureinit crate, combine handle_event with on_event, combine handle_span with on_close, adjust kvp writer logic to rely on a tokio task to send the encoded KVP over a channel and other minor adjustmnets in reference to PR comments. --- Cargo.toml | 7 +- azurekvp/Cargo.toml | 28 -- azurekvp/src/lib.rs | 6 - azurekvp/src/tracing.rs | 154 ---------- libazureinit/Cargo.toml | 7 + {azurekvp => libazureinit}/src/kvp.rs | 412 +++++++++++++++++--------- libazureinit/src/lib.rs | 4 + libazureinit/src/tracing.rs | 16 + src/main.rs | 26 +- 9 files changed, 325 insertions(+), 335 deletions(-) delete mode 100644 azurekvp/Cargo.toml delete mode 100644 azurekvp/src/lib.rs delete mode 100644 azurekvp/src/tracing.rs rename {azurekvp => libazureinit}/src/kvp.rs (54%) create mode 100644 libazureinit/src/tracing.rs diff --git a/Cargo.toml b/Cargo.toml index 4a6ed325..da03810b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,7 +17,7 @@ tracing = "0.1.40" tracing-opentelemetry = "0.18.0" # We work fine with any version of 4, but 4.5 bumped MSRV to 1.74 clap = { version = "<=4.4", features = ["derive", "cargo", "env"] } -sys-info = "0.9" +sysinfo = "0.27" [dev-dependencies] # Purely for the MSRV requirement. @@ -32,10 +32,6 @@ opentelemetry = "0.21" path = "libazureinit" version = "0.1.0" -[dependencies.azurekvp] -path = "azurekvp" -version = "0.1.0" - [profile.dev] incremental = true @@ -50,7 +46,6 @@ path = "tests/functional_tests.rs" [workspace] members = [ "libazureinit", - "azurekvp", ] [features] diff --git a/azurekvp/Cargo.toml b/azurekvp/Cargo.toml deleted file mode 100644 index e13903a0..00000000 --- a/azurekvp/Cargo.toml +++ /dev/null @@ -1,28 +0,0 @@ -[package] -name = "azurekvp" -version = "0.1.1" -edition = "2021" -repository = "https://github.com/Azure/azure-init/" -homepage = "https://github.com/Azure/azure-init/" -license = "MIT" -description = "A binary library for implementing OpenTelemetry KVP for Linux VMs on Azure." - - -[dependencies] -opentelemetry = "0.18.0" -nix = "0.26" -chrono = "0.4" -uuid = { version = "1.2", features = ["v4"] } -tracing-subscriber = { version = "0.3.18", features = ["env-filter"] } -tracing = "0.1.40" -tracing-opentelemetry = "0.18.0" -anyhow = "1.0.81" -tokio = { version = "1", features = ["full"] } - -[dev-dependencies] -tempfile = "3.3" -sys-info = "0.9" - -[lib] -name = "azurekvp" -path = "src/lib.rs" \ No newline at end of file diff --git a/azurekvp/src/lib.rs b/azurekvp/src/lib.rs deleted file mode 100644 index a34fbc81..00000000 --- a/azurekvp/src/lib.rs +++ /dev/null @@ -1,6 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. - -mod kvp; -pub use kvp::{decode_kvp_item, EmitKVPLayer}; -pub mod tracing; diff --git a/azurekvp/src/tracing.rs b/azurekvp/src/tracing.rs deleted file mode 100644 index def1ebec..00000000 --- a/azurekvp/src/tracing.rs +++ /dev/null @@ -1,154 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. - -//! This module provides core functionality for handling telemetry tracing. - -use crate::kvp::handle_kvp_operation; - -use chrono::{DateTime, Utc}; -use opentelemetry::{global, sdk::trace as sdktrace, trace::TracerProvider}; -use std::path::Path; -use std::time::{Instant, SystemTime, UNIX_EPOCH}; -use uuid::Uuid; - -/// A wrapper around `std::time::Instant` that provides convenient methods -/// for time tracking in spans and events. Implements the `Deref` trait, allowing -/// access to the underlying `Instant` methods. -/// -/// This struct captures the start time of spans/events and measures the elapsed time. -#[derive(Clone)] -pub struct MyInstant(Instant); -impl std::ops::Deref for MyInstant { - type Target = Instant; - - fn deref(&self) -> &Self::Target { - &self.0 - } -} -impl MyInstant { - pub fn now() -> Self { - MyInstant(Instant::now()) - } -} - -/// Initializes the tracing system by setting up a tracing provider and -/// registering it globally. This function returns a tracer instance -/// associated with the "azure-kvp" application. -/// -/// # Returns -/// A `sdktrace::Tracer` object that can be used to create and manage spans. -pub fn initialize_tracing() -> sdktrace::Tracer { - let provider = sdktrace::TracerProvider::builder().build(); - global::set_tracer_provider(provider.clone()); - provider.tracer("azure-kvp") -} - -/// Handles span data by truncating the guest pool file, encoding key-value pairs -/// with span metadata, and writing the encoded data to a log file. -/// -/// # Arguments -/// * `span` - A reference to the span being processed, which contains the metadata and context. -/// * `file_path` - A reference to the path where the span data should be written. -/// * `status` - A string representing the status of the span (e.g., "success", "failure"). -/// * `event_level` - The logging level of the span (e.g., "INFO", "ERROR"). -/// * `end_time` - The `SystemTime` representing when the span ended. -pub fn handle_span( - span: &tracing_subscriber::registry::SpanRef<'_, S>, - file_path: &Path, - status: &str, - event_level: &str, - end_time: SystemTime, -) where - S: tracing::Subscriber - + for<'lookup> tracing_subscriber::registry::LookupSpan<'lookup>, -{ - let span_context = span.metadata(); - let span_id = Uuid::new_v4(); - - if let Some(start_instant) = span.extensions().get::() { - let elapsed = start_instant.elapsed(); - - let start_time = end_time - .checked_sub(elapsed) - .expect("SystemTime subtraction failed"); - - let start_time_dt = DateTime::::from( - UNIX_EPOCH - + std::time::Duration::from_millis( - start_time.duration_since(UNIX_EPOCH).unwrap().as_millis() - as u64, - ), - ) - .format("%Y-%m-%dT%H:%M:%S%.3fZ") - .to_string(); - - let end_time_dt = DateTime::::from( - UNIX_EPOCH - + std::time::Duration::from_millis( - end_time.duration_since(UNIX_EPOCH).unwrap().as_millis() - as u64, - ), - ) - .format("%Y-%m-%dT%H:%M:%S%.3fZ") - .to_string(); - - let event_value = format!( - "Start: {} | End: {} | Status: {}", - start_time_dt, end_time_dt, status - ); - - handle_kvp_operation( - file_path, - event_level, - span_context.name(), - &span_id.to_string(), - &event_value, - ); - } -} - -/// Handles event data by encoding the message and metadata as key-value pairs (KVP) -/// and writing the encoded data to the specified log file. -/// -/// # Arguments -/// * `event_message` - A string message associated with the event. -/// * `span` - The span associated with the event, used to retrieve span context and metadata. -/// * `file_path` - The path to the log file where the encoded KVP data should be written. -/// * `event_instant` - A `MyInstant` representing when the event occurred. -pub fn handle_event( - event_message: &str, - span: &tracing_subscriber::registry::SpanRef<'_, S>, - file_path: &Path, - event_instant: MyInstant, -) where - S: tracing::Subscriber - + for<'lookup> tracing_subscriber::registry::LookupSpan<'lookup>, -{ - let span_context = span.metadata(); - let span_id: Uuid = Uuid::new_v4(); - - let event_time = SystemTime::now() - .checked_sub(event_instant.elapsed()) - .expect("SystemTime subtraction failed"); - - let event_time_dt = DateTime::::from( - UNIX_EPOCH - + std::time::Duration::from_millis( - event_time.duration_since(UNIX_EPOCH).unwrap().as_millis() - as u64, - ), - ) - .format("%Y-%m-%dT%H:%M:%S%.3fZ") - .to_string(); - - let event_value = - format!("Time: {} | Event: {}", event_time_dt, event_message); - - handle_kvp_operation( - file_path, - "INFO", - span_context.name(), - &span_id.to_string(), - &event_value, - ); -} diff --git a/libazureinit/Cargo.toml b/libazureinit/Cargo.toml index daf2f17c..e7c187ca 100644 --- a/libazureinit/Cargo.toml +++ b/libazureinit/Cargo.toml @@ -20,8 +20,15 @@ block-utils = "0.11.1" tracing = "0.1.40" strum = { version = "0.26.3", features = ["derive"] } fstab = "0.4.0" +tracing-subscriber = { version = "0.3.18", features = ["env-filter"] } +opentelemetry = "0.18.0" +uuid = { version = "1.2", features = ["v4"] } +anyhow = "1.0.81" +sysinfo = "0.27" +chrono = "0.4" [dev-dependencies] +tracing-test = { version = "0.2", features = ["no-env-filter"] } tempfile = "3" tokio = { version = "1", features = ["full"] } tokio-util = "0.7.11" diff --git a/azurekvp/src/kvp.rs b/libazureinit/src/kvp.rs similarity index 54% rename from azurekvp/src/kvp.rs rename to libazureinit/src/kvp.rs index e070aa2f..879691c2 100644 --- a/azurekvp/src/kvp.rs +++ b/libazureinit/src/kvp.rs @@ -10,31 +10,61 @@ //! - `HV_KVP_AZURE_MAX_VALUE_SIZE`: Maximum value size before splitting into multiple slices. //! -use std::fmt as std_fmt; -use std::fmt::Write as std_write; -use std::time::{Duration, SystemTime, UNIX_EPOCH}; -use tracing::field::Visit; -use tracing_subscriber::Layer; - -use crate::tracing::{handle_event, handle_span, MyInstant}; - -use tracing::span::{Attributes, Id}; -use tracing::Subscriber; -use tracing_subscriber::layer::Context as TracingContext; -use tracing_subscriber::registry::LookupSpan; - -use nix::fcntl::{flock, FlockArg}; -use std::fs::OpenOptions; -use std::io::{self, Error, ErrorKind, Write}; -use std::os::fd::AsRawFd; -use std::os::unix::fs::MetadataExt; -use std::path::Path; +use std::{ + fmt::{self as std_fmt, Write as std_write}, + fs::{File, OpenOptions}, + io::{self, Error, ErrorKind, Write}, + os::unix::fs::MetadataExt, + path::Path, + time::{Duration, Instant, SystemTime, UNIX_EPOCH}, +}; + +use tracing::{ + field::Visit, + span::{Attributes, Id}, + Subscriber, +}; + +use tracing_subscriber::{ + layer::Context as TracingContext, registry::LookupSpan, Layer, +}; + +use sysinfo::{System, SystemExt}; + +use tokio::sync::{mpsc::UnboundedReceiver, mpsc::UnboundedSender}; + +//use crate::tracing::{handle_event, handle_span}; + +use chrono::{DateTime, Utc}; +use std::fmt; +use uuid::Uuid; const HV_KVP_EXCHANGE_MAX_KEY_SIZE: usize = 512; const HV_KVP_EXCHANGE_MAX_VALUE_SIZE: usize = 2048; const HV_KVP_AZURE_MAX_VALUE_SIZE: usize = 1024; const EVENT_PREFIX: &str = concat!("azure-init-", env!("CARGO_PKG_VERSION")); +/// A wrapper around `std::time::Instant` that provides convenient methods +/// for time tracking in spans and events. Implements the `Deref` trait, allowing +/// access to the underlying `Instant` methods. +/// +/// This struct captures the start time of spans/events and measures the elapsed time. +#[derive(Clone)] +struct MyInstant(Instant); + +impl std::ops::Deref for MyInstant { + type Target = Instant; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} +impl MyInstant { + pub fn now() -> Self { + MyInstant(Instant::now()) + } +} + /// A custom visitor that captures the value of the `msg` field from a tracing event. /// It implements the `tracing::field::Visit` trait and records the value into /// a provided mutable string reference. @@ -59,17 +89,97 @@ impl<'a> Visit for StringVisitor<'a> { write!(self.string, "{:?}", value).unwrap(); } } -/// A custom tracing layer that emits span data as key-value pairs (KVP) -/// to a file for consumption by the Hyper-V daemon. This struct captures -/// spans and events from the tracing system and writes them to a -/// specified file in KVP format. + +/// Represents the state of a span within the `EmitKVPLayer`. +enum SpanStatus { + Success, + Failure, +} + +impl SpanStatus { + fn as_str(&self) -> &'static str { + match self { + SpanStatus::Success => "success", + SpanStatus::Failure => "failure", + } + } + + fn level(&self) -> &'static str { + match self { + SpanStatus::Success => "INFO", + SpanStatus::Failure => "ERROR", + } + } +} + +impl fmt::Display for SpanStatus { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.as_str()) + } +} +/// A custom tracing layer that emits span and event data as key-value pairs (KVP) +/// to a file for Hyper-V telemetry consumption. The layer manages the asynchronous +/// writing of telemetry data to a specified file in KVP format. +/// +/// `EmitKVPLayer` initializes the file at creation, manages a dedicated writer +/// task, and provides functions to send encoded data for logging. pub struct EmitKVPLayer { - file_path: std::path::PathBuf, + events_tx: UnboundedSender>, } impl EmitKVPLayer { - pub fn new(file_path: std::path::PathBuf) -> Self { - Self { file_path } + /// Creates a new `EmitKVPLayer`, initializing the log file and starting + /// an asynchronous writer task for handling incoming KVP data. + /// + /// # Arguments + /// * `file_path` - The file path where the KVP data will be stored. + /// + pub fn new(file_path: std::path::PathBuf) -> Result { + truncate_guest_pool_file(&file_path)?; + + let file = OpenOptions::new() + .append(true) + .create(true) + .open(&file_path)?; + + let (events_tx, events_rx): ( + UnboundedSender>, + UnboundedReceiver>, + ) = tokio::sync::mpsc::unbounded_channel(); + + tokio::spawn(Self::kvp_writer(file, events_rx)); + + Ok(Self { events_tx }) + } + + /// An asynchronous task that serializes incoming KVP data to the specified file. + /// This function manages the file I/O operations to ensure the data is written + /// and flushed consistently. + /// + /// # Arguments + /// * `file` - The file where KVP data will be written. + /// * `events` - A receiver that provides encoded KVP data as it arrives. + async fn kvp_writer( + mut file: File, + mut events: UnboundedReceiver>, + ) -> io::Result<()> { + while let Some(encoded_kvp) = events.recv().await { + if let Err(e) = file.write_all(&encoded_kvp) { + eprintln!("Failed to write to log file: {}", e); + } + if let Err(e) = file.flush() { + eprintln!("Failed to flush the log file: {}", e); + } + } + Ok(()) + } + + /// Sends encoded KVP data to the writer task for asynchronous logging. + /// + /// # Arguments + /// * `message` - The encoded data to send as a vector of bytes (Vec). + pub fn send_event(&self, message: Vec) { + let _ = self.events_tx.send(message); } } @@ -77,13 +187,20 @@ impl Layer for EmitKVPLayer where S: Subscriber + for<'lookup> LookupSpan<'lookup>, { - /// Handles event occurrences within a span by extracting the `msg` field, - /// recording the event, and writing the event as key-value pairs (KVP) to a log file. + /// Handles event occurrences within a span, capturing and recording the event's message + /// and context metadata as key-value pairs (KVP) for logging. /// - /// This method uses the `StringVisitor` to capture the message of the event and - /// links the event to the span by writing both span and event data into the same file. + /// This function extracts the event's `msg` field using `StringVisitor`, constructs a + /// formatted event string, and then encodes it as KVP data to be sent to the + /// `EmitKVPLayer` for asynchronous file storage. /// - /// If an `ERROR` level event is encountered, a failure flag is inserted into the span's extensions. + /// If an `ERROR` level event is encountered, it marks the span's status as a failure, + /// which will be reflected in the span's data upon closure. + /// + /// # Arguments + /// * `event` - The tracing event instance containing the message and metadata. + /// * `ctx` - The current tracing context, which is used to access the span associated + /// with the event. /// /// # Example /// ```rust @@ -103,12 +220,34 @@ where let mut extensions = span.extensions_mut(); if event.metadata().level() == &tracing::Level::ERROR { - extensions.insert("failure".to_string()); + extensions.insert(SpanStatus::Failure); } - let event_instant = MyInstant::now(); - - handle_event(&event_message, &span, &self.file_path, event_instant); + let span_context = span.metadata(); + let span_id: Uuid = Uuid::new_v4(); + + let event_time = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_else(|_| { + span.extensions() + .get::() + .map(|instant| instant.elapsed()) + .unwrap_or_default() + }); + + let event_time_dt = DateTime::::from(UNIX_EPOCH + event_time) + .format("%Y-%m-%dT%H:%M:%S%.3fZ"); + + let event_value = + format!("Time: {} | Event: {}", event_time_dt, event_message); + + handle_kvp_operation( + self, + "INFO", + span_context.name(), + &span_id.to_string(), + &event_value, + ); } } @@ -126,58 +265,83 @@ where span.extensions_mut().insert(start_instant); } } - - /// Called when a span is closed and checks for any logged errors to report. - /// This method handles encoding and writing the span data - /// (start and end times) to the specified KVP file for Hyper-V telemetry. + /// Called when a span is closed, finalizing and logging the span's data. This method + /// records the span's start and end times, status (e.g., success or failure), and other metadata, + /// then sends it to `EmitKVPLayer` for KVP logging. + /// + /// If any errors were recorded in the span (such as `ERROR` level events), the span + /// status is marked as `Failure`; otherwise, it is marked as `Success`. /// /// # Arguments /// * `id` - The unique identifier for the span. - /// * `ctx` - The current tracing context, which can be used to access the span. + /// * `ctx` - The current tracing context, used to access the span's metadata and status. fn on_close(&self, id: Id, ctx: TracingContext) { if let Some(span) = ctx.span(&id) { - let mut status = "success".to_string(); - let mut event_level = "INFO".to_string(); - - if let Some(recorded_status) = span.extensions().get::() { - if recorded_status == "failure" { - status = "failure".to_string(); - event_level = "ERROR".to_string(); - } - } + let span_status = if span.extensions().get::().is_some() + { + SpanStatus::Failure + } else { + SpanStatus::Success + }; let end_time = SystemTime::now(); - handle_span( - &span, - self.file_path.as_path(), - &status, - &event_level, - end_time, - ); + let span_context = span.metadata(); + let span_id = Uuid::new_v4(); + + if let Some(start_instant) = span.extensions().get::() { + let elapsed = start_instant.elapsed(); + + let start_time = + end_time.checked_sub(elapsed).unwrap_or(UNIX_EPOCH); + + let start_time_dt = DateTime::::from( + UNIX_EPOCH + + start_time + .duration_since(UNIX_EPOCH) + .unwrap_or_default(), + ) + .format("%Y-%m-%dT%H:%M:%S%.3fZ"); + + let end_time_dt = DateTime::::from( + UNIX_EPOCH + + end_time + .duration_since(UNIX_EPOCH) + .unwrap_or_default(), + ) + .format("%Y-%m-%dT%H:%M:%S%.3fZ"); + + let event_value = format!( + "Start: {} | End: {} | Status: {}", + start_time_dt, end_time_dt, span_status + ); + + handle_kvp_operation( + self, + span_status.level(), + span_context.name(), + &span_id.to_string(), + &event_value, + ); + } } } } -/// This function serves as a wrapper that orchestrates the necessary steps to log -/// telemetry data to a file. It first truncates the guest pool file if needed, then -/// generates a unique event key using the provided event metadata, encodes the key-value -/// pair, and writes the result to the KVP file. +/// Handles the orchestration of key-value pair (KVP) encoding and logging operations +/// by generating a unique event key, encoding it with the provided value, and sending +/// it to the `EmitKVPLayer` for logging. pub fn handle_kvp_operation( - file_path: &Path, + emit_kvp_layer: &EmitKVPLayer, event_level: &str, event_name: &str, span_id: &str, event_value: &str, ) { - truncate_guest_pool_file(file_path).expect("Failed to truncate KVP file"); - let event_key = generate_event_key(event_level, event_name, span_id); - let encoded_kvp = encode_kvp_item(&event_key, event_value); - if let Err(e) = write_to_kvp_file(file_path, &encoded_kvp) { - eprintln!("Error writing to KVP file: {}", e); - } + let encoded_kvp_flattened: Vec = encoded_kvp.concat(); + emit_kvp_layer.send_event(encoded_kvp_flattened); } /// Generates a unique event key by combining the event level, name, and span ID. @@ -197,57 +361,6 @@ fn generate_event_key( ) } -/// Writes the encoded key-value pair (KVP) data to a specified KVP file. -/// -/// This function appends the provided encoded KVP data to the specified file and ensures -/// that file locking is handled properly to avoid race conditions. It locks the file exclusively, -/// writes the data, flushes the output, and then unlocks the file. -fn write_to_kvp_file( - file_path: &Path, - encoded_kvp: &Vec>, -) -> io::Result<()> { - let mut file = - match OpenOptions::new().append(true).create(true).open(file_path) { - Ok(file) => file, - Err(e) => { - eprintln!("Failed to open log file: {}", e); - return Err(e); // Return the error if the file can't be opened - } - }; - - let fd = file.as_raw_fd(); - if let Err(e) = flock(fd, FlockArg::LockExclusive) { - eprintln!("Failed to lock the file: {}", e); - return Err(io::Error::new( - io::ErrorKind::Other, - "File locking failed", - )); - } - - // Write the encoded KVP data - for kvp in encoded_kvp { - if let Err(e) = file.write_all(&kvp[..]) { - eprintln!("Failed to write to log file: {}", e); - return Err(e); // Return the error if writing fails - } - } - - if let Err(e) = file.flush() { - eprintln!("Failed to flush the log file: {}", e); - return Err(e); // Return the error if flushing fails - } - - if let Err(e) = flock(fd, FlockArg::Unlock) { - eprintln!("Failed to unlock the file: {}", e); - return Err(io::Error::new( - io::ErrorKind::Other, - "File unlocking failed", - )); - } - - Ok(()) -} - /// Encodes a key-value pair (KVP) into one or more byte slices. If the value /// exceeds the allowed size, it is split into multiple slices for encoding. /// This is used for logging events to a KVP file. @@ -365,18 +478,15 @@ fn truncate_guest_pool_file(kvp_file: &Path) -> Result<(), Error> { Ok(()) } -/// Reads the system's uptime from `/proc/uptime`, which can be used for -/// various time-based calculations or checks, such as determining whether -/// a file contains stale data. +/// Retrieves the system's uptime using the `sysinfo` crate, returning the duration +/// since the system booted. This can be useful for time-based calculations or checks, +/// such as determining whether data is stale or calculating the approximate boot time. fn get_uptime() -> Result { - let uptime = std::fs::read_to_string("/proc/uptime")?; - let uptime_seconds: f64 = uptime - .split_whitespace() - .next() - .unwrap_or("0") - .parse() - .unwrap_or(0.0); - Ok(Duration::from_secs(uptime_seconds as u64)) + let mut system = System::new(); + system.refresh_system(); + + let uptime_seconds = system.uptime(); + Ok(Duration::from_secs(uptime_seconds)) } #[cfg(test)] @@ -399,10 +509,15 @@ mod tests { #[instrument] async fn mock_provision() -> Result<(), anyhow::Error> { - let kernel_version = sys_info::os_release() + let mut system = System::new(); + system.refresh_system(); + + let kernel_version = system + .kernel_version() .unwrap_or("Unknown Kernel Version".to_string()); - let os_version = - sys_info::os_type().unwrap_or("Unknown OS Version".to_string()); + let os_version = system + .os_version() + .unwrap_or("Unknown OS Version".to_string()); let azure_init_version = env!("CARGO_PKG_VERSION"); event!( @@ -439,7 +554,8 @@ mod tests { NamedTempFile::new().expect("Failed to create tempfile"); let temp_path = temp_file.path().to_path_buf(); - let emit_kvp_layer = EmitKVPLayer::new(temp_path.clone()); + let emit_kvp_layer = EmitKVPLayer::new(temp_path.clone()) + .expect("Failed to create EmitKVPLayer"); let subscriber = Registry::default().with(emit_kvp_layer); let default_guard = tracing::subscriber::set_default(subscriber); @@ -447,6 +563,8 @@ mod tests { let _ = mock_provision().await; let _ = mock_failure_function().await; + sleep(Duration::from_secs(1)).await; + drop(default_guard); let contents = @@ -500,4 +618,32 @@ mod tests { ); } } + + #[tokio::test] + async fn test_truncate_guest_pool_file() { + let temp_file = + NamedTempFile::new().expect("Failed to create tempfile"); + let temp_path = temp_file.path().to_path_buf(); + + std::fs::write(&temp_path, "Some initial data") + .expect("Failed to write initial data"); + + let result = truncate_guest_pool_file(&temp_path); + + assert!( + result.is_ok(), + "truncate_guest_pool_file returned an error: {:?}", + result + ); + + if let Ok(contents) = std::fs::read_to_string(&temp_path) { + if contents.is_empty() { + println!("File was truncated as expected."); + } else { + println!("File was not truncated (this is expected if file has been truncated since boot)."); + } + } else { + panic!("Failed to read the temp file after truncation attempt."); + } + } } diff --git a/libazureinit/src/lib.rs b/libazureinit/src/lib.rs index 2fe8a5de..63ea1b01 100644 --- a/libazureinit/src/lib.rs +++ b/libazureinit/src/lib.rs @@ -15,6 +15,10 @@ pub use provision::{ Provision, }; +mod kvp; +pub use kvp::{decode_kvp_item, EmitKVPLayer}; +pub mod tracing; + #[cfg(test)] mod unittest; diff --git a/libazureinit/src/tracing.rs b/libazureinit/src/tracing.rs new file mode 100644 index 00000000..555fffc4 --- /dev/null +++ b/libazureinit/src/tracing.rs @@ -0,0 +1,16 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use opentelemetry::{global, sdk::trace as sdktrace, trace::TracerProvider}; + +/// Initializes the tracing system by setting up a tracing provider and +/// registering it globally. This function returns a tracer instance +/// associated with the "azure-kvp" application. +/// +/// # Returns +/// A sdktrace::Tracer object that can be used to create and manage spans. +pub fn initialize_tracing() -> sdktrace::Tracer { + let provider = sdktrace::TracerProvider::builder().build(); + global::set_tracer_provider(provider.clone()); + provider.tracer("azure-kvp") +} diff --git a/src/main.rs b/src/main.rs index 6189d5e6..31a07c2a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -13,6 +13,7 @@ use libazureinit::{ }; use std::process::ExitCode; use std::time::Duration; +use sysinfo::{System, SystemExt}; use tracing::instrument; use tracing::{event, Level}; use tracing_opentelemetry::OpenTelemetryLayer; @@ -21,8 +22,8 @@ use tracing_subscriber::{ fmt, layer::SubscriberExt, EnvFilter, Layer, Registry, }; -use azurekvp::tracing::initialize_tracing; -use azurekvp::EmitKVPLayer; +use libazureinit::tracing::initialize_tracing; +use libazureinit::EmitKVPLayer; const VERSION: &str = env!("CARGO_PKG_VERSION"); @@ -100,9 +101,15 @@ async fn main() -> ExitCode { let tracer = initialize_tracing(); let otel_layer = OpenTelemetryLayer::new(tracer); - let emit_kvp_layer = EmitKVPLayer::new(std::path::PathBuf::from( + let emit_kvp_layer = match EmitKVPLayer::new(std::path::PathBuf::from( "/var/lib/hyperv/.kvp_pool_1", - )); + )) { + Ok(layer) => Some(layer), + Err(e) => { + event!(Level::ERROR, "Failed to initialize EmitKVPLayer: {}. Continuing without KVP logging.", e); + None + } + }; let stderr_layer = fmt::layer() .with_span_events(FmtSpan::NEW | FmtSpan::CLOSE) @@ -140,10 +147,13 @@ async fn main() -> ExitCode { #[instrument(name = "root")] async fn provision() -> Result<(), anyhow::Error> { - let kernel_version = - sys_info::os_release().unwrap_or("Unknown Kernel Version".to_string()); - let os_version = - sys_info::os_type().unwrap_or("Unknown OS Version".to_string()); + let system = System::new(); + let kernel_version = system + .kernel_version() + .unwrap_or("Unknown Kernel Version".to_string()); + let os_version = system + .os_version() + .unwrap_or("Unknown OS Version".to_string()); let azure_init_version = env!("CARGO_PKG_VERSION"); event!( From f119cc57cf77bc3638b756c2a3fb80741ced4d20 Mon Sep 17 00:00:00 2001 From: peytonr18 Date: Tue, 12 Nov 2024 15:04:18 -0800 Subject: [PATCH 03/13] Update tracing setup for compatibility with OpenTelemetry 0.27.0 - Updated tracing initialization to align with OpenTelemetry API (0.27.0) and SDK (0.26.0) requirements, moving from the older 0.18.0 version. - Adjusted the tracing configuration in tracing.rs to reflect changes in layer setup and API separation in the new versions, specifically adapting to the SDK and API split in OpenTelemetry. - Centralized layer creation in tracing.rs to maintain all tracing configuration in a single module, reducing tracing setup in main.rs for cleaner organization. - Updated Cargo.toml dependencies to reflect new versions and restructured OpenTelemetry and tracing crates. --- Cargo.toml | 1 - libazureinit/Cargo.toml | 9 ++++-- libazureinit/src/tracing.rs | 56 +++++++++++++++++++++++++++++++------ src/main.rs | 35 +++-------------------- 4 files changed, 58 insertions(+), 43 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index da03810b..cbba7352 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,7 +14,6 @@ anyhow = "1.0.81" tokio = { version = "1", features = ["full"] } tracing-subscriber = { version = "0.3.18", features = ["env-filter"] } tracing = "0.1.40" -tracing-opentelemetry = "0.18.0" # We work fine with any version of 4, but 4.5 bumped MSRV to 1.74 clap = { version = "<=4.4", features = ["derive", "cargo", "env"] } sysinfo = "0.27" diff --git a/libazureinit/Cargo.toml b/libazureinit/Cargo.toml index e7c187ca..e7611d05 100644 --- a/libazureinit/Cargo.toml +++ b/libazureinit/Cargo.toml @@ -20,16 +20,21 @@ block-utils = "0.11.1" tracing = "0.1.40" strum = { version = "0.26.3", features = ["derive"] } fstab = "0.4.0" +regex = "1" +lazy_static = "1.4" +tempfile = "3.3.0" tracing-subscriber = { version = "0.3.18", features = ["env-filter"] } -opentelemetry = "0.18.0" +opentelemetry = "0.26" +opentelemetry_sdk = "0.26" +tracing-opentelemetry = "0.27" uuid = { version = "1.2", features = ["v4"] } anyhow = "1.0.81" sysinfo = "0.27" chrono = "0.4" [dev-dependencies] -tracing-test = { version = "0.2", features = ["no-env-filter"] } tempfile = "3" +tracing-test = { version = "0.2", features = ["no-env-filter"] } tokio = { version = "1", features = ["full"] } tokio-util = "0.7.11" whoami = "1" diff --git a/libazureinit/src/tracing.rs b/libazureinit/src/tracing.rs index 555fffc4..201a1b70 100644 --- a/libazureinit/src/tracing.rs +++ b/libazureinit/src/tracing.rs @@ -1,16 +1,54 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. -use opentelemetry::{global, sdk::trace as sdktrace, trace::TracerProvider}; - -/// Initializes the tracing system by setting up a tracing provider and -/// registering it globally. This function returns a tracer instance -/// associated with the "azure-kvp" application. -/// -/// # Returns -/// A sdktrace::Tracer object that can be used to create and manage spans. +use opentelemetry::{global, trace::TracerProvider}; +use opentelemetry_sdk::trace::{ + self as sdktrace, Config, Sampler, TracerProvider as SdkTracerProvider, +}; +use tracing::{event, Level}; +use tracing_opentelemetry::OpenTelemetryLayer; +use tracing_subscriber::fmt::format::FmtSpan; +use tracing_subscriber::{ + fmt, layer::SubscriberExt, EnvFilter, Layer, Registry, +}; + +use crate::EmitKVPLayer; + pub fn initialize_tracing() -> sdktrace::Tracer { - let provider = sdktrace::TracerProvider::builder().build(); + let config = Config::default().with_sampler(Sampler::AlwaysOn); + + let provider = SdkTracerProvider::builder().with_config(config).build(); + global::set_tracer_provider(provider.clone()); provider.tracer("azure-kvp") } + +pub fn setup_layers( + tracer: sdktrace::Tracer, +) -> Result<(), Box> { + let otel_layer = OpenTelemetryLayer::new(tracer); + + let emit_kvp_layer = match EmitKVPLayer::new(std::path::PathBuf::from( + "/var/lib/hyperv/.kvp_pool_1", + )) { + Ok(layer) => Some(layer), + Err(e) => { + event!(Level::ERROR, "Failed to initialize EmitKVPLayer: {}. Continuing without KVP logging.", e); + None + } + }; + + let stderr_layer = fmt::layer() + .with_span_events(FmtSpan::NEW | FmtSpan::CLOSE) + .with_writer(std::io::stderr) + .with_filter(EnvFilter::from_env("AZURE_INIT_LOG")); + + let subscriber = Registry::default() + .with(stderr_layer) + .with(otel_layer) + .with(emit_kvp_layer); + + tracing::subscriber::set_global_default(subscriber)?; + + Ok(()) +} diff --git a/src/main.rs b/src/main.rs index 31a07c2a..e5a759d9 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,6 +3,7 @@ use anyhow::Context; use clap::Parser; use libazureinit::imds::InstanceMetadata; +use libazureinit::tracing::{initialize_tracing, setup_layers}; use libazureinit::User; use libazureinit::{ error::Error as LibError, @@ -16,14 +17,6 @@ use std::time::Duration; use sysinfo::{System, SystemExt}; use tracing::instrument; use tracing::{event, Level}; -use tracing_opentelemetry::OpenTelemetryLayer; -use tracing_subscriber::fmt::format::FmtSpan; -use tracing_subscriber::{ - fmt, layer::SubscriberExt, EnvFilter, Layer, Registry, -}; - -use libazureinit::tracing::initialize_tracing; -use libazureinit::EmitKVPLayer; const VERSION: &str = env!("CARGO_PKG_VERSION"); @@ -100,29 +93,9 @@ fn get_username( async fn main() -> ExitCode { let tracer = initialize_tracing(); - let otel_layer = OpenTelemetryLayer::new(tracer); - let emit_kvp_layer = match EmitKVPLayer::new(std::path::PathBuf::from( - "/var/lib/hyperv/.kvp_pool_1", - )) { - Ok(layer) => Some(layer), - Err(e) => { - event!(Level::ERROR, "Failed to initialize EmitKVPLayer: {}. Continuing without KVP logging.", e); - None - } - }; - - let stderr_layer = fmt::layer() - .with_span_events(FmtSpan::NEW | FmtSpan::CLOSE) - .with_writer(std::io::stderr) - .with_filter(EnvFilter::from_env("AZURE_INIT_LOG")); - - let subscriber = Registry::default() - .with(stderr_layer) - .with(otel_layer) - .with(emit_kvp_layer); - - tracing::subscriber::set_global_default(subscriber) - .expect("Failed to set global tracing subscriber"); + if let Err(e) = setup_layers(tracer) { + eprintln!("Warning: Failed to set up tracing layers: {:?}", e); + } let result = provision().await; From c3dd388cf9138f1c559917cf8420eaa6afd2c3f6 Mon Sep 17 00:00:00 2001 From: peytonr18 Date: Wed, 4 Dec 2024 09:35:23 -0800 Subject: [PATCH 04/13] Resolving lifetime declaration of StringVisitor clippy error, along with some small whitespace markdown changes. --- doc/libazurekvp.md | 17 ++++++++--------- libazureinit/src/kvp.rs | 2 +- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/doc/libazurekvp.md b/doc/libazurekvp.md index 9ac06f97..d9fffb5e 100644 --- a/doc/libazurekvp.md +++ b/doc/libazurekvp.md @@ -1,6 +1,6 @@ -## Tracing Logic Overview +# Tracing Logic Overview -### How Tracing is Set Up +## How Tracing is Set Up The tracing setup in this project is built around three key layers, each with its own responsibility: @@ -10,35 +10,35 @@ The tracing setup in this project is built around three key layers, each with it These layers work together, yet independently, to process span data as it flows through the program. -### Layer Overview +## Layer Overview -#### 1. EmitKVPLayer +### 1. EmitKVPLayer - **Purpose**: This custom layer is responsible for processing spans and events by capturing their metadata, generating key-value pairs (KVPs), encoding them into a specific format, and writing the encoded data to the VM's Hyper-V file for consumption by the `hv_kvp_daemon` service. - **How It Works**: - **Span Processing**: When a span is created, `EmitKVPLayer` processes the span's metadata, generating a unique key for the span and encoding the span data into a binary format that can be consumed by Hyper-V. - - **Event Processing**: When an event is emitted using the `event!` macro, the `on_event` method in `EmitKVPLayer` processes the event, capturing its message and linking it to the current span. Events are useful for tracking specific points in time within a span, such as errors, warnings, retries, or important state changes. Events are recorded independently of spans but they are be tied to the span they occur within by using the same span metadata. + - **Event Processing**: When an event is emitted using the `event!` macro, the `on_event` method in `EmitKVPLayer` processes the event, capturing its message and linking it to the current span. Events are useful for tracking specific points in time within a span, such as errors, warnings, retries, or important state changes. Events are recorded independently of spans but they are be tied to the span they occur within by using the same span metadata. - Both span and event data are written to the `/var/lib/hyperv/.kvp_pool_1` file, which is typically monitored by the Hyper-V `hv_kvp_daemon` service. - The `hv_kvp_daemon` uses this file to exchange key-value pair (KVP) data between the virtual machine and the Hyper-V host. This mechanism is crucial for telemetry and data synchronization. - **Reference**: For more details on how the Hyper-V Data Exchange Service works, refer to the official documentation here: [Hyper-V Data Exchange Service (KVP)](https://learn.microsoft.com/en-us/virtualization/hyper-v-on-windows/reference/integration-services#hyper-v-data-exchange-service-kvp). -#### 2. OpenTelemetryLayer +### 2. OpenTelemetryLayer - **Purpose**: This layer integrates with the OpenTelemetry framework to handle context propagation and export span data to an external tracing backend (e.g., Jaeger, Prometheus) or to stdout. - **How It Works**: - As spans are created and processed, the `OpenTelemetryLayer` ensures that context is propagated correctly across different parts of the program, which is crucial in distributed systems for tracing requests across service boundaries. - The span data is then exported to a configured backend or stdout, where it can be visualized and analyzed using OpenTelemetry-compatible tools. -#### 3. stderr_layer +### 3. stderr_layer - **Purpose**: This layer formats and logs span and event data to stderr or a specified log file, providing a human-readable output for immediate inspection. - **How It Works**: - Each span's lifecycle events, as well as individual emitted events, are logged in a structured format, making it easy to see the flow of execution in the console or log files. - This layer is particularly useful for debugging and monitoring during development. -### How the Layers Work Together +## How the Layers Work Together - **Independent Processing**: Each of these layers processes spans and events independently. When a span is created, it triggers the `on_new_span` method in each layer, and when an event is emitted, it triggers the `on_event` method. As the span progresses through its lifecycle (`on_enter`, `on_close`), each layer performs its respective tasks. - **Order of Execution**: The layers are executed in the order they are added in the `initialize_tracing` function. For instance, `EmitKVPLayer` might process a span before `OpenTelemetryLayer`, but this order only affects the sequence of operations, not the functionality or output of each layer. @@ -67,4 +67,3 @@ async fn provision() -> Result<(), anyhow::Error> { - **EmitKVPLayer** processes the span, generates key-value pairs, encodes them, and writes them directly to `/var/lib/hyperv/.kvp_pool_1`. - **OpenTelemetryLayer** handles context propagation and exports span data to a tracing backend or stdout. - **stderr_layer** logs span information to stderr or another specified output for immediate visibility. - diff --git a/libazureinit/src/kvp.rs b/libazureinit/src/kvp.rs index 879691c2..ed949382 100644 --- a/libazureinit/src/kvp.rs +++ b/libazureinit/src/kvp.rs @@ -75,7 +75,7 @@ pub struct StringVisitor<'a> { string: &'a mut String, } -impl<'a> Visit for StringVisitor<'a> { +impl Visit for StringVisitor<'_> { /// Records the debug representation of the event's value and stores it in the provided string. /// /// # Arguments From b96c33fb6447dc7b901a08084031cb2f5cf43098 Mon Sep 17 00:00:00 2001 From: peytonr18 Date: Fri, 6 Dec 2024 10:03:56 -0800 Subject: [PATCH 05/13] Add unit test to validate slice value length and replace hardcoded values with constants in tests --- libazureinit/src/kvp.rs | 46 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 42 insertions(+), 4 deletions(-) diff --git a/libazureinit/src/kvp.rs b/libazureinit/src/kvp.rs index ed949382..b907c86c 100644 --- a/libazureinit/src/kvp.rs +++ b/libazureinit/src/kvp.rs @@ -41,7 +41,7 @@ use uuid::Uuid; const HV_KVP_EXCHANGE_MAX_KEY_SIZE: usize = 512; const HV_KVP_EXCHANGE_MAX_VALUE_SIZE: usize = 2048; -const HV_KVP_AZURE_MAX_VALUE_SIZE: usize = 1024; +const HV_KVP_AZURE_MAX_VALUE_SIZE: usize = 1022; const EVENT_PREFIX: &str = concat!("azure-init-", env!("CARGO_PKG_VERSION")); /// A wrapper around `std::time::Instant` that provides convenient methods @@ -571,7 +571,8 @@ mod tests { std::fs::read(temp_path).expect("Failed to read temp file"); println!("Contents of the file (in bytes):\n{:?}", contents); - let slice_size = 512 + 2048; + let slice_size = + HV_KVP_EXCHANGE_MAX_KEY_SIZE + HV_KVP_EXCHANGE_MAX_VALUE_SIZE; let num_slices = (contents.len() + slice_size - 1) / slice_size; let expected_len = num_slices * slice_size; @@ -592,8 +593,8 @@ mod tests { println!("Processing slice {}: start={}, end={}", i, start, end); println!("Slice length: {}", slice.len()); - let key_section = &slice[..512]; - let value_section = &slice[512..]; + let key_section = &slice[..HV_KVP_EXCHANGE_MAX_KEY_SIZE]; + let value_section = &slice[HV_KVP_EXCHANGE_MAX_KEY_SIZE..]; match decode_kvp_item(slice) { Ok((key, value)) => { @@ -646,4 +647,41 @@ mod tests { panic!("Failed to read the temp file after truncation attempt."); } } + + #[test] + fn test_encode_kvp_item_value_length() { + let key = "test_key"; + let value = "A".repeat(HV_KVP_AZURE_MAX_VALUE_SIZE * 2 + 50); + let encoded_slices = encode_kvp_item(&key, &value); + + assert!( + !encoded_slices.is_empty(), + "Encoded slices should not be empty" + ); + + for (i, slice) in encoded_slices.iter().enumerate() { + let (decoded_key, decoded_value) = + decode_kvp_item(slice).expect("Failed to decode slice"); + + println!("Slice {}: Key: {}", i, decoded_key); + println!( + "Slice {}: Value (length {}): {}", + i, + decoded_value.len(), + decoded_value + ); + + assert_eq!(decoded_key, key, "Key does not match for slice {}", i); + + assert!( + decoded_value.len() <= HV_KVP_AZURE_MAX_VALUE_SIZE, + "Value length exceeds limit for slice {}: {} > {}", + i, + decoded_value.len(), + HV_KVP_AZURE_MAX_VALUE_SIZE + ); + } + + println!("All slices adhere to Azure's max value size limit."); + } } From 352d4cdb03146800590de3308bd57f5f818bdf57 Mon Sep 17 00:00:00 2001 From: peytonr18 Date: Fri, 6 Dec 2024 14:17:28 -0800 Subject: [PATCH 06/13] Refactor module structure and rename tracing.rs to logging.rs in azure-init - Renamed tracing.rs to logging.rs to avoid conflicts with external tracing dependency. - Moved kvp.rs and logging.rs into the main azure-init crate as modules in main.rs. - Refactored encode_kvp_item to use chunks for cleaner logic and more efficient logic. - Integrated handle_kvp_operation into the EmitKVPLayer struct. - Improved tracing mechanisms and adjusted string formatting for better readability and parsing. --- Cargo.toml | 9 +- libazureinit/Cargo.toml | 8 -- libazureinit/src/lib.rs | 4 - {libazureinit/src => src}/kvp.rs | 109 +++++++++--------- libazureinit/src/tracing.rs => src/logging.rs | 2 +- src/main.rs | 28 ++--- 6 files changed, 76 insertions(+), 84 deletions(-) rename {libazureinit/src => src}/kvp.rs (89%) rename libazureinit/src/tracing.rs => src/logging.rs (98%) diff --git a/Cargo.toml b/Cargo.toml index 199c64eb..f0458742 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,14 +16,19 @@ tokio = { version = "1", features = ["full"] } tracing = "0.1.40" clap = { version = "4.5.21", features = ["derive", "cargo", "env"] } sysinfo = "0.27" +tracing-subscriber = { version = "0.3.18", features = ["env-filter"] } +opentelemetry = "0.26" +opentelemetry_sdk = "0.26" +tracing-opentelemetry = "0.27" +uuid = { version = "1.2", features = ["v4"] } +chrono = "0.4" [dev-dependencies] assert_cmd = "2.0.16" predicates = "3.1.2" predicates-core = "1.0.8" predicates-tree = "1.0.11" -opentelemetry_sdk = "0.21" -opentelemetry = "0.21" +tempfile = "3.3.0" [dependencies.libazureinit] path = "libazureinit" diff --git a/libazureinit/Cargo.toml b/libazureinit/Cargo.toml index 5c8b11e5..cc8bea6e 100644 --- a/libazureinit/Cargo.toml +++ b/libazureinit/Cargo.toml @@ -24,14 +24,6 @@ fstab = "0.4.0" regex = "1" lazy_static = "1.4" tempfile = "3.3.0" -tracing-subscriber = { version = "0.3.18", features = ["env-filter"] } -opentelemetry = "0.26" -opentelemetry_sdk = "0.26" -tracing-opentelemetry = "0.27" -uuid = { version = "1.2", features = ["v4"] } -anyhow = "1.0.81" -sysinfo = "0.27" -chrono = "0.4" [dev-dependencies] tracing-test = { version = "0.2", features = ["no-env-filter"] } diff --git a/libazureinit/src/lib.rs b/libazureinit/src/lib.rs index 63ea1b01..2fe8a5de 100644 --- a/libazureinit/src/lib.rs +++ b/libazureinit/src/lib.rs @@ -15,10 +15,6 @@ pub use provision::{ Provision, }; -mod kvp; -pub use kvp::{decode_kvp_item, EmitKVPLayer}; -pub mod tracing; - #[cfg(test)] mod unittest; diff --git a/libazureinit/src/kvp.rs b/src/kvp.rs similarity index 89% rename from libazureinit/src/kvp.rs rename to src/kvp.rs index b907c86c..49ae504a 100644 --- a/libazureinit/src/kvp.rs +++ b/src/kvp.rs @@ -33,8 +33,6 @@ use sysinfo::{System, SystemExt}; use tokio::sync::{mpsc::UnboundedReceiver, mpsc::UnboundedSender}; -//use crate::tracing::{handle_event, handle_span}; - use chrono::{DateTime, Utc}; use std::fmt; use uuid::Uuid; @@ -86,11 +84,13 @@ impl Visit for StringVisitor<'_> { _field: &tracing::field::Field, value: &dyn std_fmt::Debug, ) { - write!(self.string, "{:?}", value).unwrap(); + write!(self.string, "{}={:?}; ", _field.name(), value) + .expect("Writing to a string should never fail"); } } /// Represents the state of a span within the `EmitKVPLayer`. +#[derive(Copy, Clone, Debug)] enum SpanStatus { Success, Failure, @@ -181,6 +181,22 @@ impl EmitKVPLayer { pub fn send_event(&self, message: Vec) { let _ = self.events_tx.send(message); } + + /// Handles the orchestration of key-value pair (KVP) encoding and logging operations + /// by generating a unique event key, encoding it with the provided value, and sending + /// it to the `EmitKVPLayer` for logging. + pub fn handle_kvp_operation( + &self, + event_level: &str, + event_name: &str, + span_id: &str, + event_value: &str, + ) { + let event_key = generate_event_key(event_level, event_name, span_id); + let encoded_kvp = encode_kvp_item(&event_key, event_value); + let encoded_kvp_flattened: Vec = encoded_kvp.concat(); + self.send_event(encoded_kvp_flattened); + } } impl Layer for EmitKVPLayer @@ -241,9 +257,8 @@ where let event_value = format!("Time: {} | Event: {}", event_time_dt, event_message); - handle_kvp_operation( - self, - "INFO", + self.handle_kvp_operation( + event.metadata().level().as_str(), span_context.name(), &span_id.to_string(), &event_value, @@ -277,12 +292,11 @@ where /// * `ctx` - The current tracing context, used to access the span's metadata and status. fn on_close(&self, id: Id, ctx: TracingContext) { if let Some(span) = ctx.span(&id) { - let span_status = if span.extensions().get::().is_some() - { - SpanStatus::Failure - } else { - SpanStatus::Success - }; + let span_status = span + .extensions() + .get::() + .copied() + .unwrap_or(SpanStatus::Success); let end_time = SystemTime::now(); @@ -316,8 +330,7 @@ where start_time_dt, end_time_dt, span_status ); - handle_kvp_operation( - self, + self.handle_kvp_operation( span_status.level(), span_context.name(), &span_id.to_string(), @@ -328,22 +341,6 @@ where } } -/// Handles the orchestration of key-value pair (KVP) encoding and logging operations -/// by generating a unique event key, encoding it with the provided value, and sending -/// it to the `EmitKVPLayer` for logging. -pub fn handle_kvp_operation( - emit_kvp_layer: &EmitKVPLayer, - event_level: &str, - event_name: &str, - span_id: &str, - event_value: &str, -) { - let event_key = generate_event_key(event_level, event_name, span_id); - let encoded_kvp = encode_kvp_item(&event_key, event_value); - let encoded_kvp_flattened: Vec = encoded_kvp.concat(); - emit_kvp_layer.send_event(encoded_kvp_flattened); -} - /// Generates a unique event key by combining the event level, name, and span ID. /// /// # Arguments @@ -376,28 +373,16 @@ fn encode_kvp_item(key: &str, value: &str) -> Vec> { let mut key_buf = vec![0u8; HV_KVP_EXCHANGE_MAX_KEY_SIZE]; key_buf[..key_len].copy_from_slice(&key_bytes[..key_len]); - if value_bytes.len() <= HV_KVP_AZURE_MAX_VALUE_SIZE { + let mut kvp_slices = Vec::new(); + + for chunk in value_bytes.chunks(HV_KVP_AZURE_MAX_VALUE_SIZE) { let mut value_buf = vec![0u8; HV_KVP_EXCHANGE_MAX_VALUE_SIZE]; - let value_len = value_bytes.len().min(HV_KVP_EXCHANGE_MAX_VALUE_SIZE); - value_buf[..value_len].copy_from_slice(&value_bytes[..value_len]); - - vec![encode_kvp_slice(key_buf, value_buf)] - } else { - println!("Value exceeds max size, splitting into multiple slices."); - - let mut kvp_slices = Vec::new(); - let mut start = 0; - while start < value_bytes.len() { - let end = - (start + HV_KVP_AZURE_MAX_VALUE_SIZE).min(value_bytes.len()); - let mut value_buf = vec![0u8; HV_KVP_EXCHANGE_MAX_VALUE_SIZE]; - value_buf[..end - start].copy_from_slice(&value_bytes[start..end]); - - kvp_slices.push(encode_kvp_slice(key_buf.clone(), value_buf)); - start += HV_KVP_AZURE_MAX_VALUE_SIZE; - } - kvp_slices + value_buf[..chunk.len()].copy_from_slice(chunk); + + kvp_slices.push(encode_kvp_slice(key_buf.clone(), value_buf)); } + + kvp_slices } /// Combines the key and value of a KVP into a single byte slice, ensuring @@ -414,6 +399,7 @@ fn encode_kvp_slice(key: Vec, value: Vec) -> Vec { /// Decodes a KVP byte slice into its corresponding key and value strings. /// This is useful for inspecting or logging raw KVP data. +#[cfg(test)] pub fn decode_kvp_item( record_data: &[u8], ) -> Result<(String, String), &'static str> { @@ -433,7 +419,11 @@ pub fn decode_kvp_item( .to_string(); let value = String::from_utf8( - record_data[HV_KVP_EXCHANGE_MAX_KEY_SIZE..record_data_len].to_vec(), + record_data[HV_KVP_EXCHANGE_MAX_KEY_SIZE..] + .iter() + .take(HV_KVP_AZURE_MAX_VALUE_SIZE) + .cloned() + .collect(), ) .unwrap_or_else(|_| String::new()) .trim_end_matches('\x00') @@ -574,7 +564,7 @@ mod tests { let slice_size = HV_KVP_EXCHANGE_MAX_KEY_SIZE + HV_KVP_EXCHANGE_MAX_VALUE_SIZE; - let num_slices = (contents.len() + slice_size - 1) / slice_size; + let num_slices = contents.len().div_ceil(slice_size); let expected_len = num_slices * slice_size; assert_eq!( @@ -652,7 +642,8 @@ mod tests { fn test_encode_kvp_item_value_length() { let key = "test_key"; let value = "A".repeat(HV_KVP_AZURE_MAX_VALUE_SIZE * 2 + 50); - let encoded_slices = encode_kvp_item(&key, &value); + + let encoded_slices = encode_kvp_item(key, &value); assert!( !encoded_slices.is_empty(), @@ -660,6 +651,13 @@ mod tests { ); for (i, slice) in encoded_slices.iter().enumerate() { + assert_eq!( + slice.len(), + HV_KVP_EXCHANGE_MAX_KEY_SIZE + HV_KVP_EXCHANGE_MAX_VALUE_SIZE, + "Slice {} length is incorrect", + i + ); + let (decoded_key, decoded_value) = decode_kvp_item(slice).expect("Failed to decode slice"); @@ -671,11 +669,10 @@ mod tests { decoded_value ); - assert_eq!(decoded_key, key, "Key does not match for slice {}", i); - + assert_eq!(decoded_key, key, "Key mismatch in slice {}", i); assert!( decoded_value.len() <= HV_KVP_AZURE_MAX_VALUE_SIZE, - "Value length exceeds limit for slice {}: {} > {}", + "Value length exceeds limit in slice {}: {} > {}", i, decoded_value.len(), HV_KVP_AZURE_MAX_VALUE_SIZE diff --git a/libazureinit/src/tracing.rs b/src/logging.rs similarity index 98% rename from libazureinit/src/tracing.rs rename to src/logging.rs index 201a1b70..eef10f42 100644 --- a/libazureinit/src/tracing.rs +++ b/src/logging.rs @@ -12,7 +12,7 @@ use tracing_subscriber::{ fmt, layer::SubscriberExt, EnvFilter, Layer, Registry, }; -use crate::EmitKVPLayer; +use crate::kvp::EmitKVPLayer; pub fn initialize_tracing() -> sdktrace::Tracer { let config = Config::default().with_sampler(Sampler::AlwaysOn); diff --git a/src/main.rs b/src/main.rs index bc0eb362..f412636c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,9 +1,13 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. + +mod kvp; +mod logging; +pub use logging::{initialize_tracing, setup_layers}; + use anyhow::Context; use clap::Parser; use libazureinit::imds::InstanceMetadata; -use libazureinit::tracing::{initialize_tracing, setup_layers}; use libazureinit::User; use libazureinit::{ error::Error as LibError, @@ -16,7 +20,6 @@ use std::process::ExitCode; use std::time::Duration; use sysinfo::{System, SystemExt}; use tracing::instrument; -use tracing::{event, Level}; const VERSION: &str = env!("CARGO_PKG_VERSION"); @@ -55,7 +58,7 @@ fn get_environment() -> Result { } environment.ok_or_else(|| { - event!(Level::ERROR, "Unable to get list of block devices"); + tracing::error!("Unable to get list of block devices"); anyhow::anyhow!("Unable to get list of block devices") }) } @@ -78,7 +81,7 @@ fn get_username( .username }) .ok_or_else(|| { - event!(Level::ERROR, "Username Failure"); + tracing::error!("Username Failure"); LibError::UsernameFailure.into() }) } @@ -96,7 +99,7 @@ async fn main() -> ExitCode { match result { Ok(_) => ExitCode::SUCCESS, Err(e) => { - event!(Level::ERROR, "Provisioning failed with error: {:?}", e); + tracing::error!("Provisioning failed with error: {:?}", e); eprintln!("{:?}", e); let config: u8 = exitcode::CONFIG .try_into() @@ -123,12 +126,11 @@ async fn provision() -> Result<(), anyhow::Error> { .unwrap_or("Unknown OS Version".to_string()); let azure_init_version = env!("CARGO_PKG_VERSION"); - event!( - Level::INFO, - msg = format!( - "Kernel Version: {}, OS Version: {}, Azure-Init Version: {}", - kernel_version, os_version, azure_init_version - ) + tracing::info!( + "Kernel Version: {}, OS Version: {}, Azure-Init Version: {}", + kernel_version, + os_version, + azure_init_version ); let opts = Cli::parse(); @@ -197,7 +199,7 @@ async fn provision() -> Result<(), anyhow::Error> { ) .await .with_context(|| { - event!(Level::ERROR, "Failed to get the desired goalstate."); + tracing::error!("Failed to get the desired goalstate."); "Failed to get desired goalstate." })?; @@ -210,7 +212,7 @@ async fn provision() -> Result<(), anyhow::Error> { ) .await .with_context(|| { - event!(Level::ERROR, "Failed to report VM health."); + tracing::error!("Failed to report VM health."); "Failed to report VM health." })?; From c87ac52dc85700a29aaf7100a77e4c7eb3a213f4 Mon Sep 17 00:00:00 2001 From: peytonr18 Date: Thu, 12 Dec 2024 14:38:06 -0800 Subject: [PATCH 07/13] Merge main into probertson-kvp-test --- doc/configuration.md | 174 +++++ libazureinit/Cargo.toml | 4 +- libazureinit/src/config.rs | 901 +++++++++++++++++++++++++ libazureinit/src/lib.rs | 10 +- libazureinit/src/provision/hostname.rs | 12 +- libazureinit/src/provision/mod.rs | 182 ++--- libazureinit/src/provision/password.rs | 12 +- libazureinit/src/provision/ssh.rs | 88 +-- libazureinit/src/provision/user.rs | 14 +- src/main.rs | 43 +- tests/functional_tests.rs | 19 +- 11 files changed, 1244 insertions(+), 215 deletions(-) create mode 100644 doc/configuration.md create mode 100644 libazureinit/src/config.rs diff --git a/doc/configuration.md b/doc/configuration.md new file mode 100644 index 00000000..2425ba97 --- /dev/null +++ b/doc/configuration.md @@ -0,0 +1,174 @@ +# Custom Configuration Design for azure-init + +## Objective + +The azure-init custom configuration architecture enables dynamic and flexible management of various settings for virtual machines provisioned with the light-weight agent. Customizable settings include SSH, IMDS, provisioning media, azure proxy agent, wireserver, and telemetry. + +## Design + +The system supports default configurations with user-specified overrides via configuration files or CLI parameters. + +### Key Features + +- **Config Validation**: Validates user-provided settings against supported options, rejecting invalid values during deserialization. +- **Built-in Defaults**: The system defines defaults directly in the code, eliminating the need for a separate default config file. +- **Merging of Configurations**: Combines configuration sources hierarchically, applying defaults first, followed by file-based overrides, and finally CLI parameters for the highest precedence. + +## Config File Structure + +**Format**: TOML + +- Users can override default settings by supplying a single configuration file or multiple `.toml` files in a directory. + +### CLI Parameters + +Example: `--config /etc/azure-init/` + +## Configuration Hierarchy for `azure-init` + +Configuration can be set via a single file, a directory containing multiple files, or the default values defined in the code. + +### Configuration Loading Order + +#### 1. Defaults in Code + +The configuration process starts with the built-in defaults specified in `Config::default()`. + +#### 2. Base File and Directory Loading Logic + +- After applying default values, `azure-init` checks for a base `azure-init.toml` file. If it exists, it is loaded as the base configuration. +- If an `azure-init.toml.d` directory exists, its `.toml` files are loaded and merged in lexicographical order. +- If neither the `azure-init.toml` nor the directory exists, the configuration remains as defined by the built-in defaults. + + ```text + /etc/azure-init/ + ├── azure-init.toml # Base configuration + └── azure-init.toml.d/ + ├── 01-network.toml # Additional network configuration + ├── 02-ssh.toml # Additional SSH configuration + └── 99-overrides.toml # Final overrides + ``` + +- Each `.toml` file is merged into the configuration in the sorted order. If two files define the same configuration field, the value from the file processed last will take precedence. For example, in the order above, the final value(s) would come from `99-overrides.toml`. + +#### 3. CLI Override (`--config`) + +- The `--config` flag specifies a configuration path that can point to either a single file or a directory. + - **File:** If a file is specified, it is merged as the final layer, overriding all prior configurations. + - **Directory:** If a directory is specified, `.toml` files within it are loaded and merged in, following the same rules specified in the Directory Loading Logic section. + - **Example:** `azure-init --config /path/to/custom-config.toml` + +### Example: Directory with Multiple .toml Files + +**Command:** + +```sh +azure-init --config /path/to/custom-config-directory +``` + +**Directory Structure:** + +```bash +/path/to/custom-config-directory/ +├── azure-init.toml # Base configuration +└── azure-init.toml.d/ + ├── 01-network.toml # Network configuration + ├── 02-ssh.toml # SSH configuration + └── 99-overrides.toml # Overrides +``` + +**Order of Merging:** + +1. Applies defaults from `Config::default()` as defined in `config.rs`. +2. Loads `azure-init.toml` as the base configuration, if present. +3. Merges configuration `.toml` files found in `azure-init.toml.d` in lexicographical order. The last file in the sorted order takes precedence. + - `01-network.toml` + - `02-ssh.toml` + - `99-overrides.toml` +4. Applies any CLI overrides, either from a file or a directory. + +## Validation and Deserialization Process + +Azure Init uses strict validation on configuration fields to ensure they match expected types and values. If a configuration includes an unsupported value or incorrect type, deserialization will fail. + +### Error Handling During Deserialization + +- When a configuration file is loaded, its contents are parsed and converted from `.toml` into structured data. If a field in the file contains an invalid value (e.g., `query_sshd_config` is set to `"not_a_boolean"` instead of `true` or `false`), the parsing process will fail with a deserialization error due to the mismatched type. + +### Propagation of Deserialization Errors + +- When deserialization fails, an error is logged to indicate that the configuration file could not be parsed correctly. This error propagates through the application, causing the provisioning process to fail. The application will not proceed with provisioning if the configuration is invalid. + +### Example of an Unsupported Value + +Here’s an example configuration with an invalid value for `query_sshd_config`. This field expects a boolean (`true` or `false`), but in this case, an unsupported string value `"not_a_boolean"` is provided. + +```toml +# Invalid value for query_sshd_config (not a boolean) +[ssh] +query_sshd_config = "not_a_boolean" # This will cause a validation failure +``` + +## Sample of Valid Configuration File + +```toml +[ssh] +authorized_keys_path = ".ssh/authorized_keys" +query_sshd_config = true + +[hostname_provisioners] +backends = ["hostnamectl"] + +[user_provisioners] +backends = ["useradd"] + +[password_provisioners] +backends = ["passwd"] + +[imds] +connection_timeout_secs = 2.0 +read_timeout_secs = 60 +total_retry_timeout_secs = 300 + +[provisioning_media] +enable = true + +[azure_proxy_agent] +enable = true + +[wireserver] +connection_timeout_secs = 2.0 +read_timeout_secs = 60 +total_retry_timeout_secs = 1200 + +[telemetry] +kvp_diagnostics = true +``` + +## Behavior of `azure-init` on Invalid Configuration + +`azure-init` handles configuration issues by logging errors and either using default values or halting functionality, depending on the severity of the issue. Here’s how it responds to different types of problems: + +### 1. Invalid Configuration + +- If a configuration file contains syntax errors (e.g., malformed TOML) or unsupported values for fields (e.g., invalid enums), `azure-init` logs the error and terminates. The provisioning process does not proceed when configuration parsing fails. + +### 2. Missing or Invalid SSH Configuration + +- `query_sshd_config = true`: + - `azure-init` attempts to dynamically query the authorized keys path using the `sshd -G` command. + - If `sshd -G` succeeds: The dynamically queried path is used for the authorized keys. + - If `sshd -G` fails: The failure is logged, but azure-init continues using the fallback path specified in authorized_keys_path (default: `.ssh/authorized_keys`). +- `query_sshd_config = false`: + - `azure-init` skips querying `sshd -G` entirely + - The value in `authorized_keys_path` is used directly, without any dynamic path detection. + +### 3. Handling of Provisioners in `azure-init` + +The `azure-init` configuration allows for custom settings of hostnames, user creation, and password setup through the use of provisioners. If `backends` are specified but do not contain a valid provisioner, `azure-init` logs an error and halts provisioning. + +## Package Considerations + +To ensure smooth operation and compatibility across distributions, `azure-init`, should be packaged with a consistent configuration setup. + +- Distributions packaging `azure-init` are expected to maintain the base configuration file at `/etc/azure-init/azure-init.toml`, with necessary overrides applied from a `.d` subdirectory. diff --git a/libazureinit/Cargo.toml b/libazureinit/Cargo.toml index cc8bea6e..f85f4ff0 100644 --- a/libazureinit/Cargo.toml +++ b/libazureinit/Cargo.toml @@ -19,11 +19,12 @@ serde_json = "1.0.96" nix = {version = "0.29.0", features = ["fs", "user"]} block-utils = "0.11.1" tracing = "0.1.40" -strum = { version = "0.26.3", features = ["derive"] } fstab = "0.4.0" +toml = "0.5" regex = "1" lazy_static = "1.4" tempfile = "3.3.0" +figment = { version = "0.10", features = ["toml"] } [dev-dependencies] tracing-test = { version = "0.2", features = ["no-env-filter"] } @@ -31,6 +32,7 @@ tempfile = "3" tokio = { version = "1", features = ["full"] } tokio-util = "0.7.11" whoami = "1" +anyhow = "1.0.81" [lib] name = "libazureinit" diff --git a/libazureinit/src/config.rs b/libazureinit/src/config.rs new file mode 100644 index 00000000..b5edef6a --- /dev/null +++ b/libazureinit/src/config.rs @@ -0,0 +1,901 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! Module for configuring azure-init. +//! +//! This module provides configuration structures and methods for loading and merging +//! configurations from files or directories. Configurations can be customized using +//! `Config` struct options to define settings for SSH, hostname provisioners, user +//! provisioners, IMDS, provisioning media, and telemetry. +use crate::error::Error; +use figment::{ + providers::{Format, Serialized, Toml}, + Figment, +}; +use serde::{Deserialize, Serialize}; +use std::path::PathBuf; +use std::{fmt, fs}; +use toml; +use tracing; + +#[derive(Default, Serialize, Deserialize, Debug, Clone, Copy, PartialEq)] +#[serde(rename_all = "lowercase")] +#[non_exhaustive] +pub enum HostnameProvisioner { + #[default] + Hostnamectl, + #[cfg(test)] + FakeHostnamectl, +} + +#[derive(Default, Serialize, Deserialize, Debug, Clone, Copy, PartialEq)] +#[serde(rename_all = "lowercase")] +#[non_exhaustive] +pub enum UserProvisioner { + #[default] + Useradd, + #[cfg(test)] + FakeUseradd, +} + +#[derive(Default, Serialize, Deserialize, Debug, Clone, Copy, PartialEq)] +#[serde(rename_all = "lowercase")] +#[non_exhaustive] +pub enum PasswordProvisioner { + #[default] + Passwd, + #[cfg(test)] + FakePasswd, +} + +/// SSH configuration struct. +/// +/// Holds settings for managing SSH behavior, including the authorized keys path +/// and options for querying the SSH configuration. +#[derive(Serialize, Deserialize, Debug, Clone)] +#[serde(default)] +pub struct Ssh { + /// Specifies the path to the authorized keys file for SSH. Defaults to `.ssh/authorized_keys`. + pub authorized_keys_path: PathBuf, + + /// When `true`, `azure-init` attempts to dynamically query the authorized keys path via `sshd -G`. + /// If `sshd -G` fails, `azure-init` reports the failure but continues using `authorized_keys_path`. + /// When `false`, `azure-init` directly uses the `authorized_keys_path` as specified. + pub query_sshd_config: bool, +} + +impl Default for Ssh { + fn default() -> Self { + Self { + authorized_keys_path: PathBuf::from(".ssh/authorized_keys"), + query_sshd_config: true, + } + } +} + +/// Hostname provisioner configuration struct. +/// +/// Holds settings for hostname management, allowing specification of provisioner +/// backends for hostname configuration. +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct HostnameProvisioners { + /// List of hostname provisioner backends to use. Defaults to `hostnamectl`. + pub backends: Vec, +} + +impl Default for HostnameProvisioners { + fn default() -> Self { + Self { + backends: vec![HostnameProvisioner::default()], + } + } +} + +/// User provisioner configuration struct. +/// +/// Configures provisioners responsible for user account creation and management. +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct UserProvisioners { + /// List of user provisioner backends to use. Defaults to `useradd`. + pub backends: Vec, +} + +impl Default for UserProvisioners { + fn default() -> Self { + Self { + backends: vec![UserProvisioner::default()], + } + } +} + +/// Password provisioner configuration struct. +/// +/// Configures provisioners responsible for managing user passwords. +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct PasswordProvisioners { + /// List of password provisioner backends to use. Defaults to `passwd`. + pub backends: Vec, +} + +impl Default for PasswordProvisioners { + fn default() -> Self { + Self { + backends: vec![PasswordProvisioner::default()], + } + } +} + +/// IMDS (Instance Metadata Service) configuration struct. +/// +/// Holds timeout settings for connecting to and reading from the Instance Metadata Service. +#[derive(Serialize, Deserialize, Debug, Clone)] +#[serde(default)] +pub struct Imds { + /// Timeout in seconds for establishing a connection to the IMDS. + pub connection_timeout_secs: f64, + + /// Timeout in seconds for reading data from the IMDS. + pub read_timeout_secs: f64, + + /// Total retry timeout in seconds for IMDS requests. + pub total_retry_timeout_secs: f64, +} + +impl Default for Imds { + fn default() -> Self { + Self { + connection_timeout_secs: 2.0, + read_timeout_secs: 60.0, + total_retry_timeout_secs: 300.0, + } + } +} + +/// Provisioning media configuration struct. +/// +/// Determines whether provisioning media is enabled. +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct ProvisioningMedia { + /// Flag to enable or disable provisioning media. Defaults to `true`. + pub enable: bool, +} + +impl Default for ProvisioningMedia { + fn default() -> Self { + Self { enable: true } + } +} + +/// Azure proxy agent configuration struct. +/// +/// Configures whether the Azure proxy agent is enabled. +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct AzureProxyAgent { + /// Flag to enable or disable the Azure proxy agent. Defaults to `true`. + pub enable: bool, +} + +impl Default for AzureProxyAgent { + fn default() -> Self { + Self { enable: true } + } +} + +/// Wire server configuration struct. +/// +/// Holds timeout settings for connecting to and reading from the Azure wire server. +#[derive(Serialize, Deserialize, Debug, Clone)] +#[serde(default)] +pub struct Wireserver { + /// Timeout in seconds for establishing a connection to the wire server. + pub connection_timeout_secs: f64, + + /// Timeout in seconds for reading data from the wire server. + pub read_timeout_secs: f64, + + /// Total retry timeout in seconds for wire server requests. + pub total_retry_timeout_secs: f64, +} + +impl Default for Wireserver { + fn default() -> Self { + Self { + connection_timeout_secs: 2.0, + read_timeout_secs: 60.0, + total_retry_timeout_secs: 1200.0, + } + } +} + +/// Telemetry configuration struct. +/// +/// Configures telemetry behavior, including diagnostic settings. +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct Telemetry { + /// Flag to enable or disable KVP diagnostics. Defaults to `true`. + pub kvp_diagnostics: bool, +} + +impl Default for Telemetry { + fn default() -> Self { + Self { + kvp_diagnostics: true, + } + } +} + +/// General configuration struct for azure-init. +/// +/// Aggregates all configuration settings for managing SSH, provisioning, IMDS, media, +/// and telemetry, supporting loading from file or directory and merging configurations. +#[derive(Default, Serialize, Deserialize, Debug, Clone)] +#[serde(default)] +pub struct Config { + pub ssh: Ssh, + pub hostname_provisioners: HostnameProvisioners, + pub user_provisioners: UserProvisioners, + pub password_provisioners: PasswordProvisioners, + pub imds: Imds, + pub provisioning_media: ProvisioningMedia, + pub azure_proxy_agent: AzureProxyAgent, + pub wireserver: Wireserver, + pub telemetry: Telemetry, +} + +/// Implements `Display` for `Config`, formatting it as a readable TOML string. +/// +/// Uses `toml::to_string_pretty` to serialize the configuration. If serialization fails, +/// a fallback message is displayed.. +impl fmt::Display for Config { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "{}", + toml::to_string_pretty(self) + .unwrap_or_else(|_| "Unable to serialize config.".to_string()) + ) + } +} + +/// Loads the configuration for `azure-init`. +/// +/// This method uses the `Figment` library to load configuration from the following sources, +/// in order of priority: +/// +/// 1. **Defaults**: Base configuration from `Config::default()`. +/// 2. **Main File**: `azure-init.toml`, if present. +/// 3. **Directory Files**: `.toml` files in `azure-init.d`, sorted lexicographically. +/// 4. **CLI Overrides**: A file or directory specified via the CLI. +/// +/// Later sources override earlier ones in case of conflicts. +impl Config { + const BASE_CONFIG: &'static str = "/etc/azure-init.toml"; + const DROP_IN_CONFIG: &'static str = "/etc/azure-init.d/"; + + /// Load provisioning configuration. + /// + /// In addition to the provided path, configuration will also be loaded from the default locations. + pub fn load(path: Option) -> Result { + Self::load_from( + PathBuf::from(Self::BASE_CONFIG), + PathBuf::from(Self::DROP_IN_CONFIG), + path, + ) + } + + fn load_from( + base_path: PathBuf, + drop_in_path: PathBuf, + path: Option, + ) -> Result { + let mut figment = + Figment::from(Serialized::defaults(Config::default())); + + if base_path.exists() { + tracing::info!(path=?base_path, "Loading base configuration file"); + figment = figment.merge(Toml::file(base_path)); + } else { + tracing::warn!( + "Base configuration file {} not found, using defaults.", + base_path.display() + ); + } + + figment = Self::merge_toml_directory(figment, drop_in_path)?; + + if let Some(cli_path) = path { + if cli_path.is_dir() { + figment = Self::merge_toml_directory(figment, cli_path)?; + } else { + tracing::info!( + "Merging configuration file from CLI: {:?}", + cli_path + ); + figment = figment.merge(Toml::file(cli_path)); + } + } + + figment.extract::().map_err(|e| { + tracing::error!("Failed to extract configuration: {:?}", e); + Error::Io(std::io::Error::new( + std::io::ErrorKind::InvalidData, + format!("Configuration error: {:?}", e), + )) + }) + } + + /// Helper function to merge `.toml` files from a directory into the Figment configuration. + fn merge_toml_directory( + mut figment: Figment, + dir_path: PathBuf, + ) -> Result { + if dir_path.is_dir() { + let mut entries: Vec<_> = fs::read_dir(&dir_path) + .map_err(|e| { + tracing::error!( + "Failed to read directory {:?}: {:?}", + dir_path, + e + ); + Error::Io(e) + })? + .filter_map(Result::ok) + .map(|entry| entry.path()) + .filter(|path| { + path.extension().map_or(false, |ext| ext == "toml") + }) + .collect(); + + entries.sort(); + + for path_entry in entries { + tracing::info!("Merging configuration file: {:?}", path_entry); + figment = figment.merge(Toml::file(path_entry)); + } + Ok(figment) + } else { + tracing::info!("Directory {:?} not found, skipping.", dir_path); + Ok(figment.clone()) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use anyhow::{Error, Ok}; + use std::fs; + use std::io::Write; + use tempfile::tempdir; + use tracing; + + #[derive(Debug)] + struct MockCli { + config: Option, + } + + impl MockCli { + fn parse_from(args: Vec<&str>) -> Self { + let mut config = None; + + let mut args_iter = args.into_iter(); + while let Some(arg) = args_iter.next() { + match arg { + "--config" => { + if let Some(path) = args_iter.next() { + config = Some(PathBuf::from(path)); + } + } + _ => {} + } + } + + Self { config } + } + } + + #[test] + fn test_load_invalid_config() -> Result<(), Error> { + tracing::info!("Starting test_load_invalid_config..."); + + let dir = tempdir()?; + let drop_in_path = dir.path().join("drop_in_path"); + let file_path = dir.path().join("invalid_config.toml"); + + tracing::info!("Writing an invalid configuration file..."); + let mut file = fs::File::create(&file_path)?; + writeln!( + file, + r#" + [ssh] + authorized_keys_path = ".ssh/authorized_keys" + query_sshd_config = "not_a_boolean" + "# + )?; + + tracing::info!("Attempting to load configuration from file..."); + let result: Result = + Config::load_from(file_path, drop_in_path, None); + + assert!(result.is_err(), "Expected an error due to invalid config"); + + tracing::info!( + "test_load_invalid_config completed with expected error." + ); + + Ok(()) + } + + #[test] + fn test_load_invalid_hostname_provisioner_config() -> Result<(), Error> { + tracing::info!( + "Starting test_load_invalid_hostname_provisioner_config..." + ); + + let dir = tempdir()?; + let drop_in_path = dir.path().join("drop_in_path"); + let file_path = + dir.path().join("invalid_hostname_provisioner_config.toml"); + + tracing::info!( + "Writing an invalid hostname provisioner configuration file..." + ); + let mut file = fs::File::create(&file_path)?; + writeln!( + file, + r#" + [hostname_provisioners] + backends = ["invalid_backend"] + "# + )?; + + tracing::info!("Attempting to load hostname provisioner configuration from file..."); + let result: Result = + Config::load_from(file_path, drop_in_path, None); + assert!( + result.is_err(), + "Expected an error due to invalid hostname provisioner config" + ); + + tracing::info!("test_load_invalid_hostname_provisioner_config completed with expected error."); + + Ok(()) + } + + #[test] + fn test_load_invalid_user_provisioner_config() -> Result<(), Error> { + tracing::info!("Starting test_load_invalid_user_provisioner_config..."); + + let dir = tempdir()?; + let drop_in_path = dir.path().join("drop_in_path"); + let file_path = dir.path().join("invalid_user_provisioner_config.toml"); + + tracing::info!( + "Writing an invalid user provisioner configuration file..." + ); + let mut file = fs::File::create(&file_path)?; + writeln!( + file, + r#" + [user_provisioners] + backends = ["invalid_user_backend"] + "# + )?; + + tracing::info!( + "Attempting to load user provisioner configuration from file..." + ); + let result: Result = + Config::load_from(file_path, drop_in_path, None); + assert!( + result.is_err(), + "Expected an error due to invalid user provisioner config" + ); + + tracing::info!("test_load_invalid_user_provisioner_config completed with expected error."); + + Ok(()) + } + + #[test] + fn test_load_invalid_password_provisioner_config() -> Result<(), Error> { + tracing::info!( + "Starting test_load_invalid_password_provisioner_config..." + ); + + let dir = tempdir()?; + let drop_in_path: PathBuf = dir.path().join("drop_in_path"); + let file_path = + dir.path().join("invalid_password_provisioner_config.toml"); + + tracing::info!( + "Writing an invalid password provisioner configuration file..." + ); + let mut file = fs::File::create(&file_path)?; + writeln!( + file, + r#" + [password_provisioners] + backends = ["invalid_password_backend"] + "# + )?; + + tracing::info!("Attempting to load password provisioner configuration from file..."); + let result: Result = + Config::load_from(file_path, drop_in_path, None); + assert!( + result.is_err(), + "Expected an error due to invalid password provisioner config" + ); + + tracing::info!("test_load_invalid_password_provisioner_config completed with expected error."); + + Ok(()) + } + + #[test] + fn test_empty_config_file() -> Result<(), Error> { + tracing::info!( + "Starting test_empty_config_file_uses_defaults_when_merged..." + ); + + let dir = tempdir()?; + let drop_in_path: PathBuf = dir.path().join("drop_in_path"); + let empty_file_path = dir.path().join("empty_config.toml"); + + tracing::info!("Creating an empty configuration file..."); + fs::File::create(&empty_file_path)?; + + tracing::info!("Loading configuration with empty file..."); + let config = Config::load_from(empty_file_path, drop_in_path, None)?; + + tracing::info!("Verifying configuration matches defaults..."); + assert_eq!( + config.ssh.authorized_keys_path.to_str().unwrap(), + ".ssh/authorized_keys" + ); + + assert!(config.ssh.query_sshd_config); + + assert_eq!( + config.hostname_provisioners.backends, + vec![HostnameProvisioner::Hostnamectl] + ); + + assert_eq!( + config.user_provisioners.backends, + vec![UserProvisioner::Useradd] + ); + + assert_eq!( + config.password_provisioners.backends, + vec![PasswordProvisioner::Passwd] + ); + + assert_eq!(config.imds.connection_timeout_secs, 2.0); + assert_eq!(config.imds.read_timeout_secs, 60.0); + assert_eq!(config.imds.total_retry_timeout_secs, 300.0); + + assert!(config.provisioning_media.enable); + + assert!(config.azure_proxy_agent.enable); + + assert_eq!(config.wireserver.connection_timeout_secs, 2.0); + assert_eq!(config.wireserver.read_timeout_secs, 60.0); + assert_eq!(config.wireserver.total_retry_timeout_secs, 1200.0); + + assert!(config.telemetry.kvp_diagnostics); + + tracing::info!("test_empty_config_file_uses_defaults_when_merged completed successfully."); + + Ok(()) + } + + #[test] + fn test_custom_config() -> Result<(), Error> { + let dir = tempdir()?; + let drop_in_path: PathBuf = dir.path().join("drop_in_path"); + let override_file_path = dir.path().join("override_config.toml"); + + tracing::info!( + "Writing an override configuration file with custom values..." + ); + let mut override_file = fs::File::create(&override_file_path)?; + writeln!( + override_file, + r#"[ssh] + authorized_keys_path = ".ssh/authorized_keys" + query_sshd_config = false + [user_provisioners] + backends = ["useradd"] + [password_provisioners] + backends = ["passwd"] + [imds] + connection_timeout_secs = 5.0 + read_timeout_secs = 120.0 + [provisioning_media] + enable = false + [azure_proxy_agent] + enable = false + [telemetry] + kvp_diagnostics = false + "# + )?; + + tracing::info!("Loading override configuration from file..."); + let config = Config::load_from(override_file_path, drop_in_path, None) + .map_err(|e| { + tracing::error!( + "Failed to load override configuration file: {:?}", + e + ); + e + })?; + + tracing::info!("Verifying merged SSH configuration values..."); + assert_eq!( + config.ssh.authorized_keys_path.to_str().unwrap(), + ".ssh/authorized_keys" + ); + assert!(!config.ssh.query_sshd_config); + + tracing::info!( + "Verifying default hostname provisioner configuration..." + ); + assert_eq!( + config.hostname_provisioners.backends, + vec![HostnameProvisioner::Hostnamectl] + ); + + tracing::info!("Verifying merged user provisioner configuration..."); + assert_eq!( + config.user_provisioners.backends, + vec![UserProvisioner::Useradd] + ); + + tracing::info!( + "Verifying merged password provisioner configuration..." + ); + assert_eq!( + config.password_provisioners.backends, + vec![PasswordProvisioner::Passwd] + ); + + tracing::info!("Verifying merged IMDS configuration..."); + assert_eq!(config.imds.connection_timeout_secs, 5.0); + assert_eq!(config.imds.read_timeout_secs, 120.0); + assert_eq!(config.imds.total_retry_timeout_secs, 300.0); + + tracing::info!("Verifying merged provisioning media configuration..."); + assert!(!config.provisioning_media.enable); + + tracing::info!("Verifying merged Azure proxy agent configuration..."); + assert!(!config.azure_proxy_agent.enable); + + tracing::info!("Verifying merged telemetry configuration..."); + assert!(!config.telemetry.kvp_diagnostics); + + tracing::info!( + "test_load_and_merge_with_default_config completed successfully." + ); + + Ok(()) + } + + #[test] + fn test_default_config() -> Result<(), Error> { + let dir = tempdir()?; + let drop_in_path: PathBuf = dir.path().join("drop_in_path"); + let base_path = dir.path().join("base_path"); + + tracing::info!("Starting test_default_config..."); + + tracing::info!("Loading default configuration without overrides..."); + let config = Config::load_from(base_path, drop_in_path, None)?; + + tracing::info!("Verifying default SSH configuration values..."); + assert_eq!( + config.ssh.authorized_keys_path.to_str().unwrap(), + ".ssh/authorized_keys" + ); + assert!(config.ssh.query_sshd_config); + + tracing::info!("Verifying default hostname provisioner..."); + assert_eq!( + config.hostname_provisioners.backends, + vec![HostnameProvisioner::Hostnamectl] + ); + + tracing::info!("Verifying default user provisioner..."); + assert_eq!( + config.user_provisioners.backends, + vec![UserProvisioner::Useradd] + ); + + tracing::info!("Verifying default password provisioner..."); + assert_eq!( + config.password_provisioners.backends, + vec![PasswordProvisioner::Passwd] + ); + + tracing::info!("Verifying default IMDS configuration..."); + assert_eq!(config.imds.connection_timeout_secs, 2.0); + assert_eq!(config.imds.read_timeout_secs, 60.0); + assert_eq!(config.imds.total_retry_timeout_secs, 300.0); + + tracing::info!("Verifying default provisioning media configuration..."); + assert!(config.provisioning_media.enable); + + tracing::info!("Verifying default Azure proxy agent configuration..."); + assert!(config.azure_proxy_agent.enable); + + tracing::info!("Verifying default wireserver configuration..."); + assert_eq!(config.wireserver.connection_timeout_secs, 2.0); + assert_eq!(config.wireserver.read_timeout_secs, 60.0); + assert_eq!(config.wireserver.total_retry_timeout_secs, 1200.0); + + tracing::info!("Verifying default telemetry configuration..."); + assert!(config.telemetry.kvp_diagnostics); + + tracing::info!("test_default_config completed successfully."); + + Ok(()) + } + + #[test] + fn test_custom_config_via_cli() -> Result<(), Error> { + let dir = tempdir()?; + let drop_in_path: PathBuf = dir.path().join("drop_in_path"); + let base_path = dir.path().join("base_path"); + let override_file_path = dir.path().join("override_config.toml"); + + fs::write( + &override_file_path, + r#"[ssh] + authorized_keys_path = ".ssh/authorized_keys" + query_sshd_config = false + [user_provisioners] + backends = ["useradd"] + [password_provisioners] + backends = ["passwd"] + [imds] + connection_timeout_secs = 5.0 + read_timeout_secs = 120.0 + [provisioning_media] + enable = false + [azure_proxy_agent] + enable = false + [telemetry] + kvp_diagnostics = false + "#, + )?; + + let args = vec![ + "azure-init", + "--config", + override_file_path.to_str().unwrap(), + ]; + + let opts = MockCli::parse_from(args); + + assert_eq!(opts.config, Some(override_file_path.clone())); + + let config = Config::load_from( + base_path, + drop_in_path, + Some(override_file_path), + )?; + + assert_eq!( + config.ssh.authorized_keys_path.to_str().unwrap(), + ".ssh/authorized_keys" + ); + assert!(!config.ssh.query_sshd_config); + + assert_eq!( + config.user_provisioners.backends, + vec![UserProvisioner::Useradd] + ); + + assert_eq!( + config.password_provisioners.backends, + vec![PasswordProvisioner::Passwd] + ); + + assert_eq!(config.imds.connection_timeout_secs, 5.0); + assert_eq!(config.imds.read_timeout_secs, 120.0); + assert_eq!(config.imds.total_retry_timeout_secs, 300.0); + + assert!(!config.provisioning_media.enable); + assert!(!config.azure_proxy_agent.enable); + assert!(!config.telemetry.kvp_diagnostics); + + Ok(()) + } + + #[test] + fn test_directory_config_via_cli() -> Result<(), Error> { + let dir = tempdir()?; + let drop_in_path: PathBuf = dir.path().join("drop_in_path"); + let base_path = dir.path().join("base_path"); + + let args = vec!["azure-init", "--config", dir.path().to_str().unwrap()]; + + let opts = MockCli::parse_from(args); + + assert_eq!(opts.config, Some(dir.path().to_path_buf())); + + let config = Config::load_from(base_path, drop_in_path, None)?; + + assert!(config.ssh.authorized_keys_path.is_relative()); + assert_eq!( + config.ssh.authorized_keys_path.to_str().unwrap(), + ".ssh/authorized_keys" + ); + + Ok(()) + } + + #[test] + fn test_merge_toml_basic_and_progressive() -> Result<(), Error> { + tracing::info!("Starting test_merge_toml_basic_and_progressive..."); + + let dir = tempdir()?; + let drop_in_path: PathBuf = dir.path().join("drop_in_path"); + fs::create_dir_all(&drop_in_path)?; + + let base_file_path = dir.path().join("base_config.toml"); + let override_file_path_1 = drop_in_path.join("override_config_1.toml"); + let override_file_path_2 = drop_in_path.join("override_config_2.toml"); + + tracing::info!("Writing base configuration..."); + let mut base_file = fs::File::create(&base_file_path)?; + writeln!( + base_file, + r#" + [ssh] + query_sshd_config = true + [telemetry] + kvp_diagnostics = true + "# + )?; + + tracing::info!("Writing first override configuration..."); + let mut override_file_1 = fs::File::create(&override_file_path_1)?; + writeln!( + override_file_1, + r#" + [ssh] + authorized_keys_path = "/custom/.ssh/authorized_keys" + "# + )?; + + tracing::info!("Writing second override configuration..."); + let mut override_file_2 = fs::File::create(&override_file_path_2)?; + writeln!( + override_file_2, + r#" + [ssh] + query_sshd_config = false + [telemetry] + kvp_diagnostics = false + "# + )?; + + tracing::info!("Loading and merging configurations..."); + let config = Config::load_from(base_file_path, drop_in_path, None)?; + + tracing::info!("Verifying merged configuration..."); + assert_eq!( + config.ssh.authorized_keys_path.to_str().unwrap(), + "/custom/.ssh/authorized_keys", + ); + assert!(!config.ssh.query_sshd_config); + assert!(!config.telemetry.kvp_diagnostics); + + tracing::info!( + "test_merge_toml_basic_and_progressive completed successfully." + ); + Ok(()) + } +} diff --git a/libazureinit/src/lib.rs b/libazureinit/src/lib.rs index 2fe8a5de..47d39699 100644 --- a/libazureinit/src/lib.rs +++ b/libazureinit/src/lib.rs @@ -1,6 +1,7 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. - +pub mod config; +pub use config::{HostnameProvisioner, PasswordProvisioner, UserProvisioner}; pub mod error; pub mod goalstate; pub(crate) mod http; @@ -8,12 +9,7 @@ pub mod imds; pub mod media; mod provision; -pub use provision::{ - hostname::Provisioner as HostnameProvisioner, - password::Provisioner as PasswordProvisioner, - user::{Provisioner as UserProvisioner, User}, - Provision, -}; +pub use provision::{user::User, Provision}; #[cfg(test)] mod unittest; diff --git a/libazureinit/src/provision/hostname.rs b/libazureinit/src/provision/hostname.rs index 5f665151..840a2f99 100644 --- a/libazureinit/src/provision/hostname.rs +++ b/libazureinit/src/provision/hostname.rs @@ -7,17 +7,9 @@ use tracing::instrument; use crate::error::Error; -/// Available tools to set the host's hostname. -#[derive(strum::EnumIter, Debug, Clone)] -#[non_exhaustive] -pub enum Provisioner { - /// Use the `hostnamectl` command from `systemd`. - Hostnamectl, - #[cfg(test)] - FakeHostnamectl, -} +use crate::provision::HostnameProvisioner; -impl Provisioner { +impl HostnameProvisioner { pub(crate) fn set(&self, hostname: impl AsRef) -> Result<(), Error> { match self { Self::Hostnamectl => hostnamectl(hostname.as_ref()), diff --git a/libazureinit/src/provision/mod.rs b/libazureinit/src/provision/mod.rs index 8273a08b..cbf22f57 100644 --- a/libazureinit/src/provision/mod.rs +++ b/libazureinit/src/provision/mod.rs @@ -5,11 +5,12 @@ pub mod password; pub(crate) mod ssh; pub mod user; -use strum::IntoEnumIterator; -use tracing::instrument; - +use crate::config::{ + Config, HostnameProvisioner, PasswordProvisioner, UserProvisioner, +}; use crate::error::Error; use crate::User; +use tracing::instrument; /// The interface for applying the desired configuration to the host. /// @@ -23,156 +24,119 @@ use crate::User; pub struct Provision { hostname: String, user: User, - hostname_backends: Option>, - user_backends: Option>, - password_backends: Option>, + config: Config, } impl Provision { - pub fn new(hostname: impl Into, user: User) -> Self { + pub fn new( + hostname: impl Into, + user: User, + config: Config, + ) -> Self { Self { hostname: hostname.into(), user, - hostname_backends: None, - user_backends: None, - password_backends: None, + config, } } - /// Specify the ways to set the virtual machine's hostname. - /// - /// By default, all known methods will be attempted. Use this function to - /// restrict which methods are attempted. These will be attempted in the - /// order provided until one succeeds. - pub fn hostname_provisioners( - mut self, - backends: impl Into>, - ) -> Self { - self.hostname_backends = Some(backends.into()); - self - } - - /// Specify the ways to create a user in the virtual machine - /// - /// By default, all known methods will be attempted. Use this function to - /// restrict which methods are attempted. These will be attempted in the - /// order provided until one succeeds. - pub fn user_provisioners( - mut self, - backends: impl Into>, - ) -> Self { - self.user_backends = Some(backends.into()); - self - } - - /// Specify the ways to set a users password. - /// - /// By default, all known methods will be attempted. Use this function to - /// restrict which methods are attempted. These will be attempted in the - /// order provided until one succeeds. Only relevant if a password has been - /// provided with the [`User`]. - pub fn password_provisioners( - mut self, - backend: impl Into>, - ) -> Self { - self.password_backends = Some(backend.into()); - self - } - - /// Provision the host. - /// /// Provisioning can fail if the host lacks the necessary tools. For example, - /// if there is no `useradd` command on the system's `PATH`, or if the command + /// if there is no useradd command on the system's PATH, or if the command /// returns an error, this will return an error. It does not attempt to undo /// partial provisioning. #[instrument(skip_all)] pub fn provision(self) -> Result<(), Error> { - self.user_backends - .unwrap_or_else(|| user::Provisioner::iter().collect()) + self.config + .hostname_provisioners + .backends .iter() - .find_map(|backend| { - backend - .create(&self.user) - .map_err(|e| { - tracing::info!( - error=?e, - backend=?backend, - resource="user", - "Provisioning did not succeed" - ); - e - }) - .ok() + .find_map(|backend| match backend { + HostnameProvisioner::Hostnamectl => { + HostnameProvisioner::Hostnamectl.set(&self.hostname).ok() + } + #[cfg(test)] + HostnameProvisioner::FakeHostnamectl => Some(()), + }) + .ok_or(Error::NoHostnameProvisioner)?; + + self.config + .user_provisioners + .backends + .iter() + .find_map(|backend| match backend { + UserProvisioner::Useradd => { + UserProvisioner::Useradd.create(&self.user).ok() + } + #[cfg(test)] + UserProvisioner::FakeUseradd => Some(()), }) .ok_or(Error::NoUserProvisioner)?; - self.password_backends - .unwrap_or_else(|| password::Provisioner::iter().collect()) + self.config + .password_provisioners + .backends .iter() - .find_map(|backend| { - backend - .set(&self.user) - .map_err(|e| { - tracing::info!( - error=?e, - backend=?backend, - resource="password", - "Provisioning did not succeed" - ); - e - }) - .ok() + .find_map(|backend| match backend { + PasswordProvisioner::Passwd => { + PasswordProvisioner::Passwd.set(&self.user).ok() + } + #[cfg(test)] + PasswordProvisioner::FakePasswd => Some(()), }) .ok_or(Error::NoPasswordProvisioner)?; if !self.user.ssh_keys.is_empty() { + let authorized_keys_path = self.config.ssh.authorized_keys_path; + let query_sshd_config = self.config.ssh.query_sshd_config; + let user = nix::unistd::User::from_name(&self.user.name)?.ok_or( Error::UserMissing { user: self.user.name, }, )?; - ssh::provision_ssh(&user, &self.user.ssh_keys, None)?; + ssh::provision_ssh( + &user, + &self.user.ssh_keys, + authorized_keys_path, + query_sshd_config, + )?; } - self.hostname_backends - .unwrap_or_else(|| hostname::Provisioner::iter().collect()) - .iter() - .find_map(|backend| { - backend - .set(&self.hostname) - .map_err(|e| { - tracing::info!( - error=?e, - backend=?backend, - resource="hostname", - "Provisioning did not succeed" - ); - e - }) - .ok() - }) - .ok_or(Error::NoHostnameProvisioner)?; - Ok(()) } } #[cfg(test)] mod tests { - + use super::{Config, Provision}; + use crate::config::{ + HostnameProvisioner, PasswordProvisioner, UserProvisioner, + }; + use crate::config::{ + HostnameProvisioners, PasswordProvisioners, UserProvisioners, + }; use crate::User; - use super::{hostname, password, user, Provision}; - #[test] fn test_successful_provision() { + let mock_config = Config { + hostname_provisioners: HostnameProvisioners { + backends: vec![HostnameProvisioner::FakeHostnamectl], + }, + user_provisioners: UserProvisioners { + backends: vec![UserProvisioner::FakeUseradd], + }, + password_provisioners: PasswordProvisioners { + backends: vec![PasswordProvisioner::FakePasswd], + }, + ..Config::default() + }; + let _p = Provision::new( "my-hostname".to_string(), User::new("azureuser", vec![]), + mock_config, ) - .hostname_provisioners([hostname::Provisioner::FakeHostnamectl]) - .user_provisioners([user::Provisioner::FakeUseradd]) - .password_provisioners([password::Provisioner::FakePasswd]) .provision() .unwrap(); } diff --git a/libazureinit/src/provision/password.rs b/libazureinit/src/provision/password.rs index a778be0e..a8f3c043 100644 --- a/libazureinit/src/provision/password.rs +++ b/libazureinit/src/provision/password.rs @@ -9,17 +9,9 @@ use crate::{error::Error, User}; use super::ssh::update_sshd_config; -/// Available tools to set the user's password (if a password is provided). -#[derive(strum::EnumIter, Debug, Clone)] -#[non_exhaustive] -pub enum Provisioner { - /// Use the `passwd` command from `shadow-utils`. - Passwd, - #[cfg(test)] - FakePasswd, -} +use crate::provision::PasswordProvisioner; -impl Provisioner { +impl PasswordProvisioner { pub(crate) fn set(&self, user: &User) -> Result<(), Error> { match self { Self::Passwd => passwd(user), diff --git a/libazureinit/src/provision/ssh.rs b/libazureinit/src/provision/ssh.rs index 3055fc85..e66c512d 100644 --- a/libazureinit/src/provision/ssh.rs +++ b/libazureinit/src/provision/ssh.rs @@ -6,18 +6,20 @@ //! It includes functions to create the necessary `.ssh` directory, set the appropriate //! permissions, and write the provided public keys to the `authorized_keys` file. +use crate::error::Error; +use crate::imds::PublicKeys; +use lazy_static::lazy_static; +use nix::unistd::{chown, User}; +use regex::Regex; use std::{ - fs::{self, OpenOptions, Permissions}, + fs::{ + self, OpenOptions, {File, Permissions}, + }, io::{self, Read, Write}, os::unix::fs::{DirBuilderExt, PermissionsExt}, path::PathBuf, process::{Command, Output}, }; - -use crate::error::Error; -use crate::imds::PublicKeys; -use lazy_static::lazy_static; -use regex::Regex; use tempfile::NamedTempFile; use tracing::{error, info, instrument}; @@ -52,36 +54,47 @@ lazy_static! { /// or write to the `authorized_keys` file. #[instrument(skip_all, name = "ssh")] pub(crate) fn provision_ssh( - user: &nix::unistd::User, + user: &User, keys: &[PublicKeys], - authorized_keys_path_string: Option, + authorized_keys_path: PathBuf, + query_sshd_config: bool, ) -> Result<(), Error> { + let authorized_keys_path = if query_sshd_config { + tracing::info!( + "Attempting to get authorized keys path via sshd -G as configured." + ); + + match get_authorized_keys_path_from_sshd(|| { + Command::new("sshd").arg("-G").output() + }) { + Some(path) => user.dir.join(path), + None => { + tracing::warn!("sshd -G failed; using configured authorized_keys_path as fallback."); + user.dir.join(authorized_keys_path) + } + } + } else { + user.dir.join(authorized_keys_path) + }; + let ssh_dir = user.dir.join(".ssh"); std::fs::DirBuilder::new() .recursive(true) .mode(0o700) .create(&ssh_dir)?; - nix::unistd::chown(&ssh_dir, Some(user.uid), Some(user.gid))?; - // It's possible the directory already existed if it's created with the user; make sure - // the permissions are correct. std::fs::set_permissions(&ssh_dir, Permissions::from_mode(0o700))?; - let authorized_keys_path = user.dir.join( - authorized_keys_path_string - .or_else(|| { - get_authorized_keys_path_from_sshd(|| { - Command::new("sshd").arg("-G").output() - }) - }) - .unwrap_or_else(|| ".ssh/authorized_keys".to_string()), - ); - info!("Using authorized_keys path: {:?}", authorized_keys_path); + chown(&ssh_dir, Some(user.uid), Some(user.gid))?; + + tracing::info!("Using authorized_keys path: {:?}", authorized_keys_path); - let mut authorized_keys = std::fs::File::create(&authorized_keys_path)?; + let mut authorized_keys = File::create(&authorized_keys_path)?; authorized_keys.set_permissions(Permissions::from_mode(0o600))?; + keys.iter() .try_for_each(|key| writeln!(authorized_keys, "{}", key.key_data))?; - nix::unistd::chown(&authorized_keys_path, Some(user.uid), Some(user.gid))?; + + chown(&authorized_keys_path, Some(user.uid), Some(user.gid))?; Ok(()) } @@ -409,8 +422,9 @@ mod tests { }, ]; - provision_ssh(&user, &keys, Some(".ssh/xauthorized_keys".to_string())) - .unwrap(); + let authorized_keys_path = user.dir.join(".ssh/xauthorized_keys"); + + provision_ssh(&user, &keys, authorized_keys_path, false).unwrap(); let ssh_path = user.dir.join(".ssh"); let ssh_dir = std::fs::File::open(&ssh_path).unwrap(); @@ -447,8 +461,9 @@ mod tests { }, ]; - provision_ssh(&user, &keys, Some(".ssh/xauthorized_keys".to_string())) - .unwrap(); + let authorized_keys_path = user.dir.join(".ssh/xauthorized_keys"); + + provision_ssh(&user, &keys, authorized_keys_path, false).unwrap(); let ssh_dir = std::fs::File::open(user.dir.join(".ssh")).unwrap(); assert_eq!( @@ -472,18 +487,13 @@ mod tests { }, ]; - provision_ssh( - &user, - &keys[..1], - Some(".ssh/xauthorized_keys".to_string()), - ) - .unwrap(); - provision_ssh( - &user, - &keys[1..], - Some(".ssh/xauthorized_keys".to_string()), - ) - .unwrap(); + let authorized_keys_path = user.dir.join(".ssh/xauthorized_keys"); + + provision_ssh(&user, &keys[1..], authorized_keys_path.clone(), false) + .unwrap(); + + provision_ssh(&user, &keys[1..], authorized_keys_path.clone(), false) + .unwrap(); let mut auth_file = std::fs::File::open(user.dir.join(".ssh/xauthorized_keys")) diff --git a/libazureinit/src/provision/user.rs b/libazureinit/src/provision/user.rs index 7bcf1e67..3472f9f6 100644 --- a/libazureinit/src/provision/user.rs +++ b/libazureinit/src/provision/user.rs @@ -9,6 +9,8 @@ use tracing::instrument; use crate::{error::Error, imds::PublicKeys}; +use crate::config::UserProvisioner; + /// The user and its related configuration to create on the host. /// /// A bare minimum user includes a name and a set of SSH public keys to allow @@ -105,17 +107,7 @@ impl User { } } -/// Available tools to create the user. -#[derive(strum::EnumIter, Debug, Clone)] -#[non_exhaustive] -pub enum Provisioner { - /// Use the `useradd` command from `shadow-utils`. - Useradd, - #[cfg(test)] - FakeUseradd, -} - -impl Provisioner { +impl UserProvisioner { pub(crate) fn create(&self, user: &User) -> Result<(), Error> { match self { Self::Useradd => { diff --git a/src/main.rs b/src/main.rs index f412636c..58e431e3 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,12 +1,13 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. - +use std::path::PathBuf; mod kvp; mod logging; pub use logging::{initialize_tracing, setup_layers}; use anyhow::Context; use clap::Parser; +use libazureinit::config::Config; use libazureinit::imds::InstanceMetadata; use libazureinit::User; use libazureinit::{ @@ -14,7 +15,7 @@ use libazureinit::{ goalstate, imds, media, media::{get_mount_device, Environment}, reqwest::{header, Client}, - HostnameProvisioner, PasswordProvisioner, Provision, UserProvisioner, + Provision, }; use std::process::ExitCode; use std::time::Duration; @@ -42,6 +43,13 @@ struct Cli { default_value = "" )] groups: Vec, + + #[arg( + long, + help = "Path to the configuration file", + env = "AZURE_INIT_CONFIG" + )] + config: Option, } #[instrument] @@ -94,9 +102,18 @@ async fn main() -> ExitCode { eprintln!("Warning: Failed to set up tracing layers: {:?}", e); } - let result = provision().await; + let opts = Cli::parse(); + + let config = match Config::load(opts.config.clone()) { + Ok(config) => config, + Err(error) => { + eprintln!("Failed to load configuration: {error:?}"); + eprintln!("Example configuration:\n\n{}", Config::default()); + return ExitCode::FAILURE; + } + }; - match result { + match provision(config, opts).await { Ok(_) => ExitCode::SUCCESS, Err(e) => { tracing::error!("Provisioning failed with error: {:?}", e); @@ -116,7 +133,7 @@ async fn main() -> ExitCode { } #[instrument(name = "root")] -async fn provision() -> Result<(), anyhow::Error> { +async fn provision(config: Config, opts: Cli) -> Result<(), anyhow::Error> { let system = System::new(); let kernel_version = system .kernel_version() @@ -133,8 +150,6 @@ async fn provision() -> Result<(), anyhow::Error> { azure_init_version ); - let opts = Cli::parse(); - let mut default_headers = header::HeaderMap::new(); let user_agent = header::HeaderValue::from_str( format!("azure-init v{VERSION}").as_str(), @@ -176,19 +191,7 @@ async fn provision() -> Result<(), anyhow::Error> { let user = User::new(username, im.compute.public_keys).with_groups(opts.groups); - Provision::new(im.compute.os_profile.computer_name, user) - .hostname_provisioners([ - #[cfg(feature = "hostnamectl")] - HostnameProvisioner::Hostnamectl, - ]) - .user_provisioners([ - #[cfg(feature = "useradd")] - UserProvisioner::Useradd, - ]) - .password_provisioners([ - #[cfg(feature = "passwd")] - PasswordProvisioner::Passwd, - ]) + Provision::new(im.compute.os_profile.computer_name, user, config) .provision()?; let vm_goalstate = goalstate::get_goalstate( diff --git a/tests/functional_tests.rs b/tests/functional_tests.rs index 2e15cbcf..174776eb 100644 --- a/tests/functional_tests.rs +++ b/tests/functional_tests.rs @@ -1,19 +1,21 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. +use libazureinit::config::Config; use libazureinit::imds::PublicKeys; use libazureinit::User; use libazureinit::{ goalstate, imds, reqwest::{header, Client}, - HostnameProvisioner, PasswordProvisioner, Provision, UserProvisioner, + Provision, }; - use std::env; use std::time::Duration; #[tokio::main] async fn main() { + let config = Config::default(); + let cli_args: Vec = env::args().collect(); let mut default_headers = header::HeaderMap::new(); let user_agent = header::HeaderValue::from_str("azure-init").unwrap(); @@ -98,12 +100,13 @@ async fn main() { }, ]; - Provision::new("my-hostname".to_string(), User::new(username, keys)) - .hostname_provisioners([HostnameProvisioner::Hostnamectl]) - .user_provisioners([UserProvisioner::Useradd]) - .password_provisioners([PasswordProvisioner::Passwd]) - .provision() - .expect("Failed to provision host"); + Provision::new( + "my-hostname".to_string(), + User::new(username, keys), + config, + ) + .provision() + .expect("Failed to provision host"); println!("VM successfully provisioned"); println!(); From 71cd9ff53f65a91b21b9043fda30866d99fbff1f Mon Sep 17 00:00:00 2001 From: peytonr18 Date: Thu, 12 Dec 2024 14:56:43 -0800 Subject: [PATCH 08/13] Resolving cargofmt issue --- src/main.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main.rs b/src/main.rs index 752ff95b..6c719f8b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -103,7 +103,7 @@ async fn main() -> ExitCode { } let opts = Cli::parse(); - + let config = match Config::load(opts.config.clone()) { Ok(config) => config, Err(error) => { @@ -149,12 +149,12 @@ async fn provision(config: Config, opts: Cli) -> Result<(), anyhow::Error> { os_version, azure_init_version ); - + let mut default_headers = header::HeaderMap::new(); let user_agent = header::HeaderValue::from_str( format!("azure-init v{VERSION}").as_str(), )?; - + default_headers.insert(header::USER_AGENT, user_agent); let client = Client::builder() .timeout(std::time::Duration::from_secs(30)) From 8b2c8299dd1319da0b9dd08a056ab4f9a5e73611 Mon Sep 17 00:00:00 2001 From: peytonr18 Date: Tue, 17 Dec 2024 15:52:24 -0800 Subject: [PATCH 09/13] Auditing tracing::info! calls in an attempt to clean up KVP file for readability purposes. --- libazureinit/src/config.rs | 134 ++++++++++++++++++++----------------- libazureinit/src/http.rs | 2 +- 2 files changed, 73 insertions(+), 63 deletions(-) diff --git a/libazureinit/src/config.rs b/libazureinit/src/config.rs index b5edef6a..68627073 100644 --- a/libazureinit/src/config.rs +++ b/libazureinit/src/config.rs @@ -292,7 +292,7 @@ impl Config { Figment::from(Serialized::defaults(Config::default())); if base_path.exists() { - tracing::info!(path=?base_path, "Loading base configuration file"); + tracing::debug!(path=?base_path, "Loading base configuration file"); figment = figment.merge(Toml::file(base_path)); } else { tracing::warn!( @@ -307,7 +307,7 @@ impl Config { if cli_path.is_dir() { figment = Self::merge_toml_directory(figment, cli_path)?; } else { - tracing::info!( + tracing::debug!( "Merging configuration file from CLI: {:?}", cli_path ); @@ -315,13 +315,19 @@ impl Config { } } - figment.extract::().map_err(|e| { - tracing::error!("Failed to extract configuration: {:?}", e); - Error::Io(std::io::Error::new( - std::io::ErrorKind::InvalidData, - format!("Configuration error: {:?}", e), - )) - }) + figment + .extract::() + .map(|config| { + tracing::info!("Configuration successfully loaded."); + config + }) + .map_err(|e| { + tracing::error!("Failed to extract configuration: {:?}", e); + Error::Io(std::io::Error::new( + std::io::ErrorKind::InvalidData, + format!("Configuration error: {:?}", e), + )) + }) } /// Helper function to merge `.toml` files from a directory into the Figment configuration. @@ -349,12 +355,12 @@ impl Config { entries.sort(); for path_entry in entries { - tracing::info!("Merging configuration file: {:?}", path_entry); + tracing::debug!("Merging configuration file: {:?}", path_entry); figment = figment.merge(Toml::file(path_entry)); } Ok(figment) } else { - tracing::info!("Directory {:?} not found, skipping.", dir_path); + tracing::debug!("Directory {:?} not found, skipping.", dir_path); Ok(figment.clone()) } } @@ -396,13 +402,13 @@ mod tests { #[test] fn test_load_invalid_config() -> Result<(), Error> { - tracing::info!("Starting test_load_invalid_config..."); + tracing::debug!("Starting test_load_invalid_config..."); let dir = tempdir()?; let drop_in_path = dir.path().join("drop_in_path"); let file_path = dir.path().join("invalid_config.toml"); - tracing::info!("Writing an invalid configuration file..."); + tracing::debug!("Writing an invalid configuration file..."); let mut file = fs::File::create(&file_path)?; writeln!( file, @@ -413,13 +419,13 @@ mod tests { "# )?; - tracing::info!("Attempting to load configuration from file..."); + tracing::debug!("Attempting to load configuration from file..."); let result: Result = Config::load_from(file_path, drop_in_path, None); assert!(result.is_err(), "Expected an error due to invalid config"); - tracing::info!( + tracing::debug!( "test_load_invalid_config completed with expected error." ); @@ -428,7 +434,7 @@ mod tests { #[test] fn test_load_invalid_hostname_provisioner_config() -> Result<(), Error> { - tracing::info!( + tracing::debug!( "Starting test_load_invalid_hostname_provisioner_config..." ); @@ -437,7 +443,7 @@ mod tests { let file_path = dir.path().join("invalid_hostname_provisioner_config.toml"); - tracing::info!( + tracing::debug!( "Writing an invalid hostname provisioner configuration file..." ); let mut file = fs::File::create(&file_path)?; @@ -449,7 +455,7 @@ mod tests { "# )?; - tracing::info!("Attempting to load hostname provisioner configuration from file..."); + tracing::debug!("Attempting to load hostname provisioner configuration from file..."); let result: Result = Config::load_from(file_path, drop_in_path, None); assert!( @@ -457,20 +463,22 @@ mod tests { "Expected an error due to invalid hostname provisioner config" ); - tracing::info!("test_load_invalid_hostname_provisioner_config completed with expected error."); + tracing::debug!("test_load_invalid_hostname_provisioner_config completed with expected error."); Ok(()) } #[test] fn test_load_invalid_user_provisioner_config() -> Result<(), Error> { - tracing::info!("Starting test_load_invalid_user_provisioner_config..."); + tracing::debug!( + "Starting test_load_invalid_user_provisioner_config..." + ); let dir = tempdir()?; let drop_in_path = dir.path().join("drop_in_path"); let file_path = dir.path().join("invalid_user_provisioner_config.toml"); - tracing::info!( + tracing::debug!( "Writing an invalid user provisioner configuration file..." ); let mut file = fs::File::create(&file_path)?; @@ -482,7 +490,7 @@ mod tests { "# )?; - tracing::info!( + tracing::debug!( "Attempting to load user provisioner configuration from file..." ); let result: Result = @@ -492,14 +500,14 @@ mod tests { "Expected an error due to invalid user provisioner config" ); - tracing::info!("test_load_invalid_user_provisioner_config completed with expected error."); + tracing::debug!("test_load_invalid_user_provisioner_config completed with expected error."); Ok(()) } #[test] fn test_load_invalid_password_provisioner_config() -> Result<(), Error> { - tracing::info!( + tracing::debug!( "Starting test_load_invalid_password_provisioner_config..." ); @@ -508,7 +516,7 @@ mod tests { let file_path = dir.path().join("invalid_password_provisioner_config.toml"); - tracing::info!( + tracing::debug!( "Writing an invalid password provisioner configuration file..." ); let mut file = fs::File::create(&file_path)?; @@ -520,7 +528,7 @@ mod tests { "# )?; - tracing::info!("Attempting to load password provisioner configuration from file..."); + tracing::debug!("Attempting to load password provisioner configuration from file..."); let result: Result = Config::load_from(file_path, drop_in_path, None); assert!( @@ -528,14 +536,14 @@ mod tests { "Expected an error due to invalid password provisioner config" ); - tracing::info!("test_load_invalid_password_provisioner_config completed with expected error."); + tracing::debug!("test_load_invalid_password_provisioner_config completed with expected error."); Ok(()) } #[test] fn test_empty_config_file() -> Result<(), Error> { - tracing::info!( + tracing::debug!( "Starting test_empty_config_file_uses_defaults_when_merged..." ); @@ -543,13 +551,13 @@ mod tests { let drop_in_path: PathBuf = dir.path().join("drop_in_path"); let empty_file_path = dir.path().join("empty_config.toml"); - tracing::info!("Creating an empty configuration file..."); + tracing::debug!("Creating an empty configuration file..."); fs::File::create(&empty_file_path)?; - tracing::info!("Loading configuration with empty file..."); + tracing::debug!("Loading configuration with empty file..."); let config = Config::load_from(empty_file_path, drop_in_path, None)?; - tracing::info!("Verifying configuration matches defaults..."); + tracing::debug!("Verifying configuration matches defaults..."); assert_eq!( config.ssh.authorized_keys_path.to_str().unwrap(), ".ssh/authorized_keys" @@ -586,7 +594,7 @@ mod tests { assert!(config.telemetry.kvp_diagnostics); - tracing::info!("test_empty_config_file_uses_defaults_when_merged completed successfully."); + tracing::debug!("test_empty_config_file_uses_defaults_when_merged completed successfully."); Ok(()) } @@ -597,7 +605,7 @@ mod tests { let drop_in_path: PathBuf = dir.path().join("drop_in_path"); let override_file_path = dir.path().join("override_config.toml"); - tracing::info!( + tracing::debug!( "Writing an override configuration file with custom values..." ); let mut override_file = fs::File::create(&override_file_path)?; @@ -622,7 +630,7 @@ mod tests { "# )?; - tracing::info!("Loading override configuration from file..."); + tracing::debug!("Loading override configuration from file..."); let config = Config::load_from(override_file_path, drop_in_path, None) .map_err(|e| { tracing::error!( @@ -632,14 +640,14 @@ mod tests { e })?; - tracing::info!("Verifying merged SSH configuration values..."); + tracing::debug!("Verifying merged SSH configuration values..."); assert_eq!( config.ssh.authorized_keys_path.to_str().unwrap(), ".ssh/authorized_keys" ); assert!(!config.ssh.query_sshd_config); - tracing::info!( + tracing::debug!( "Verifying default hostname provisioner configuration..." ); assert_eq!( @@ -647,13 +655,13 @@ mod tests { vec![HostnameProvisioner::Hostnamectl] ); - tracing::info!("Verifying merged user provisioner configuration..."); + tracing::debug!("Verifying merged user provisioner configuration..."); assert_eq!( config.user_provisioners.backends, vec![UserProvisioner::Useradd] ); - tracing::info!( + tracing::debug!( "Verifying merged password provisioner configuration..." ); assert_eq!( @@ -661,21 +669,21 @@ mod tests { vec![PasswordProvisioner::Passwd] ); - tracing::info!("Verifying merged IMDS configuration..."); + tracing::debug!("Verifying merged IMDS configuration..."); assert_eq!(config.imds.connection_timeout_secs, 5.0); assert_eq!(config.imds.read_timeout_secs, 120.0); assert_eq!(config.imds.total_retry_timeout_secs, 300.0); - tracing::info!("Verifying merged provisioning media configuration..."); + tracing::debug!("Verifying merged provisioning media configuration..."); assert!(!config.provisioning_media.enable); - tracing::info!("Verifying merged Azure proxy agent configuration..."); + tracing::debug!("Verifying merged Azure proxy agent configuration..."); assert!(!config.azure_proxy_agent.enable); - tracing::info!("Verifying merged telemetry configuration..."); + tracing::debug!("Verifying merged telemetry configuration..."); assert!(!config.telemetry.kvp_diagnostics); - tracing::info!( + tracing::debug!( "test_load_and_merge_with_default_config completed successfully." ); @@ -688,56 +696,58 @@ mod tests { let drop_in_path: PathBuf = dir.path().join("drop_in_path"); let base_path = dir.path().join("base_path"); - tracing::info!("Starting test_default_config..."); + tracing::debug!("Starting test_default_config..."); - tracing::info!("Loading default configuration without overrides..."); + tracing::debug!("Loading default configuration without overrides..."); let config = Config::load_from(base_path, drop_in_path, None)?; - tracing::info!("Verifying default SSH configuration values..."); + tracing::debug!("Verifying default SSH configuration values..."); assert_eq!( config.ssh.authorized_keys_path.to_str().unwrap(), ".ssh/authorized_keys" ); assert!(config.ssh.query_sshd_config); - tracing::info!("Verifying default hostname provisioner..."); + tracing::debug!("Verifying default hostname provisioner..."); assert_eq!( config.hostname_provisioners.backends, vec![HostnameProvisioner::Hostnamectl] ); - tracing::info!("Verifying default user provisioner..."); + tracing::debug!("Verifying default user provisioner..."); assert_eq!( config.user_provisioners.backends, vec![UserProvisioner::Useradd] ); - tracing::info!("Verifying default password provisioner..."); + tracing::debug!("Verifying default password provisioner..."); assert_eq!( config.password_provisioners.backends, vec![PasswordProvisioner::Passwd] ); - tracing::info!("Verifying default IMDS configuration..."); + tracing::debug!("Verifying default IMDS configuration..."); assert_eq!(config.imds.connection_timeout_secs, 2.0); assert_eq!(config.imds.read_timeout_secs, 60.0); assert_eq!(config.imds.total_retry_timeout_secs, 300.0); - tracing::info!("Verifying default provisioning media configuration..."); + tracing::debug!( + "Verifying default provisioning media configuration..." + ); assert!(config.provisioning_media.enable); - tracing::info!("Verifying default Azure proxy agent configuration..."); + tracing::debug!("Verifying default Azure proxy agent configuration..."); assert!(config.azure_proxy_agent.enable); - tracing::info!("Verifying default wireserver configuration..."); + tracing::debug!("Verifying default wireserver configuration..."); assert_eq!(config.wireserver.connection_timeout_secs, 2.0); assert_eq!(config.wireserver.read_timeout_secs, 60.0); assert_eq!(config.wireserver.total_retry_timeout_secs, 1200.0); - tracing::info!("Verifying default telemetry configuration..."); + tracing::debug!("Verifying default telemetry configuration..."); assert!(config.telemetry.kvp_diagnostics); - tracing::info!("test_default_config completed successfully."); + tracing::debug!("test_default_config completed successfully."); Ok(()) } @@ -838,7 +848,7 @@ mod tests { #[test] fn test_merge_toml_basic_and_progressive() -> Result<(), Error> { - tracing::info!("Starting test_merge_toml_basic_and_progressive..."); + tracing::debug!("Starting test_merge_toml_basic_and_progressive..."); let dir = tempdir()?; let drop_in_path: PathBuf = dir.path().join("drop_in_path"); @@ -848,7 +858,7 @@ mod tests { let override_file_path_1 = drop_in_path.join("override_config_1.toml"); let override_file_path_2 = drop_in_path.join("override_config_2.toml"); - tracing::info!("Writing base configuration..."); + tracing::debug!("Writing base configuration..."); let mut base_file = fs::File::create(&base_file_path)?; writeln!( base_file, @@ -860,7 +870,7 @@ mod tests { "# )?; - tracing::info!("Writing first override configuration..."); + tracing::debug!("Writing first override configuration..."); let mut override_file_1 = fs::File::create(&override_file_path_1)?; writeln!( override_file_1, @@ -870,7 +880,7 @@ mod tests { "# )?; - tracing::info!("Writing second override configuration..."); + tracing::debug!("Writing second override configuration..."); let mut override_file_2 = fs::File::create(&override_file_path_2)?; writeln!( override_file_2, @@ -882,10 +892,10 @@ mod tests { "# )?; - tracing::info!("Loading and merging configurations..."); + tracing::debug!("Loading and merging configurations..."); let config = Config::load_from(base_file_path, drop_in_path, None)?; - tracing::info!("Verifying merged configuration..."); + tracing::debug!("Verifying merged configuration..."); assert_eq!( config.ssh.authorized_keys_path.to_str().unwrap(), "/custom/.ssh/authorized_keys", @@ -893,7 +903,7 @@ mod tests { assert!(!config.ssh.query_sshd_config); assert!(!config.telemetry.kvp_diagnostics); - tracing::info!( + tracing::debug!( "test_merge_toml_basic_and_progressive completed successfully." ); Ok(()) diff --git a/libazureinit/src/http.rs b/libazureinit/src/http.rs index 2fcf5e9f..cc89f788 100644 --- a/libazureinit/src/http.rs +++ b/libazureinit/src/http.rs @@ -111,7 +111,7 @@ async fn request( let now = std::time::Instant::now(); let mut attempt = 0_u32; loop { - let span = tracing::info_span!("request", attempt, http_status = tracing::field::Empty); + let span = tracing::debug_span!("request", attempt, http_status = tracing::field::Empty); let req = request.try_clone().expect("The request body MUST be clone-able"); match client .execute(req) From e14d8ac81bed5b010203bfea405ec9f30899ff29 Mon Sep 17 00:00:00 2001 From: peytonr18 Date: Tue, 17 Dec 2024 22:43:11 -0800 Subject: [PATCH 10/13] Add filter --- src/logging.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/logging.rs b/src/logging.rs index eef10f42..31461962 100644 --- a/src/logging.rs +++ b/src/logging.rs @@ -31,7 +31,10 @@ pub fn setup_layers( let emit_kvp_layer = match EmitKVPLayer::new(std::path::PathBuf::from( "/var/lib/hyperv/.kvp_pool_1", )) { - Ok(layer) => Some(layer), + Ok(layer) => { + let filter = EnvFilter::new("INFO"); + Some(layer.with_filter(filter)) + } Err(e) => { event!(Level::ERROR, "Failed to initialize EmitKVPLayer: {}. Continuing without KVP logging.", e); None From 0536a3b68e27030668f7a0f85f0eaf70e7e0c97f Mon Sep 17 00:00:00 2001 From: peytonr18 Date: Fri, 20 Dec 2024 14:40:02 -0800 Subject: [PATCH 11/13] Refactor emit_kvp_kayer and otel_layer with improved per-layer filtering, adjust key_buf logic to avoid indexing, refine tracing and function calls, and streamline start/end time declarations as per PR feedback. --- libazureinit/src/config.rs | 13 ++-- libazureinit/src/http.rs | 6 +- libazureinit/src/provision/ssh.rs | 8 +- libazureinit/src/provision/user.rs | 2 + src/kvp.rs | 113 +++++++++++++++-------------- src/logging.rs | 13 ++-- src/main.rs | 5 +- 7 files changed, 88 insertions(+), 72 deletions(-) diff --git a/libazureinit/src/config.rs b/libazureinit/src/config.rs index 68627073..d11db6d5 100644 --- a/libazureinit/src/config.rs +++ b/libazureinit/src/config.rs @@ -292,7 +292,7 @@ impl Config { Figment::from(Serialized::defaults(Config::default())); if base_path.exists() { - tracing::debug!(path=?base_path, "Loading base configuration file"); + tracing::info!(path=?base_path, "Loading base configuration file"); figment = figment.merge(Toml::file(base_path)); } else { tracing::warn!( @@ -307,7 +307,7 @@ impl Config { if cli_path.is_dir() { figment = Self::merge_toml_directory(figment, cli_path)?; } else { - tracing::debug!( + tracing::info!( "Merging configuration file from CLI: {:?}", cli_path ); @@ -318,7 +318,10 @@ impl Config { figment .extract::() .map(|config| { - tracing::info!("Configuration successfully loaded."); + tracing::info!( + target: "kvp", + "Configuration successfully loaded." + ); config }) .map_err(|e| { @@ -355,12 +358,12 @@ impl Config { entries.sort(); for path_entry in entries { - tracing::debug!("Merging configuration file: {:?}", path_entry); + tracing::info!("Merging configuration file: {:?}", path_entry); figment = figment.merge(Toml::file(path_entry)); } Ok(figment) } else { - tracing::debug!("Directory {:?} not found, skipping.", dir_path); + tracing::info!("Directory {:?} not found, skipping.", dir_path); Ok(figment.clone()) } } diff --git a/libazureinit/src/http.rs b/libazureinit/src/http.rs index cc89f788..a48c30f6 100644 --- a/libazureinit/src/http.rs +++ b/libazureinit/src/http.rs @@ -111,7 +111,7 @@ async fn request( let now = std::time::Instant::now(); let mut attempt = 0_u32; loop { - let span = tracing::debug_span!("request", attempt, http_status = tracing::field::Empty); + let span = tracing::info_span!("request", attempt, http_status = tracing::field::Empty); let req = request.try_clone().expect("The request body MUST be clone-able"); match client .execute(req) @@ -121,12 +121,12 @@ async fn request( let _enter = span.enter(); let statuscode = response.status(); span.record("http_status", statuscode.as_u16()); - tracing::info!(url=response.url().as_str(), "HTTP response received"); + tracing::info!(target: "kvp", url=response.url().as_str(), "HTTP response received"); match response.error_for_status() { Ok(response) => { if statuscode == StatusCode::OK { - tracing::info!("HTTP response succeeded with status {}", statuscode); + tracing::info!(target: "kvp", "HTTP response succeeded with status {}", statuscode); return Ok((response, retry_for.saturating_sub(now.elapsed() + retry_interval))); } }, diff --git a/libazureinit/src/provision/ssh.rs b/libazureinit/src/provision/ssh.rs index e66c512d..bcf02706 100644 --- a/libazureinit/src/provision/ssh.rs +++ b/libazureinit/src/provision/ssh.rs @@ -86,7 +86,11 @@ pub(crate) fn provision_ssh( chown(&ssh_dir, Some(user.uid), Some(user.gid))?; - tracing::info!("Using authorized_keys path: {:?}", authorized_keys_path); + tracing::info!( + target: "kvp", + "Using authorized_keys path: {:?}", + authorized_keys_path + ); let mut authorized_keys = File::create(&authorized_keys_path)?; authorized_keys.set_permissions(Permissions::from_mode(0o600))?; @@ -139,6 +143,7 @@ fn run_sshd_command( match sshd_config_command_runner() { Ok(output) if output.status.success() => { info!( + target: "kvp", stdout_length = output.stdout.len(), "Executed sshd -G successfully", ); @@ -184,6 +189,7 @@ fn extract_authorized_keys_file_path(stdout: &[u8]) -> Option { if line.starts_with("authorizedkeysfile") { let keypath = line.split_whitespace().nth(1).map(|s| { info!( + target: "kvp", authorizedkeysfile = %s, "Using sshd's authorizedkeysfile path configuration" ); diff --git a/libazureinit/src/provision/user.rs b/libazureinit/src/provision/user.rs index 3472f9f6..1a3fe483 100644 --- a/libazureinit/src/provision/user.rs +++ b/libazureinit/src/provision/user.rs @@ -135,6 +135,7 @@ fn user_exists(username: &str) -> Result { fn useradd(user: &User) -> Result<(), Error> { if user_exists(&user.name)? { tracing::info!( + target: "kvp", "User '{}' already exists. Skipping user creation.", user.name ); @@ -142,6 +143,7 @@ fn useradd(user: &User) -> Result<(), Error> { let group_list = user.groups.join(","); tracing::info!( + target: "kvp", "User '{}' is being added to the following groups: {}", user.name, group_list diff --git a/src/kvp.rs b/src/kvp.rs index 49ae504a..35fd6feb 100644 --- a/src/kvp.rs +++ b/src/kvp.rs @@ -13,7 +13,7 @@ use std::{ fmt::{self as std_fmt, Write as std_write}, fs::{File, OpenOptions}, - io::{self, Error, ErrorKind, Write}, + io::{self, ErrorKind, Write}, os::unix::fs::MetadataExt, path::Path, time::{Duration, Instant, SystemTime, UNIX_EPOCH}, @@ -81,10 +81,10 @@ impl Visit for StringVisitor<'_> { /// * `value` - The debug value associated with the field. fn record_debug( &mut self, - _field: &tracing::field::Field, + field: &tracing::field::Field, value: &dyn std_fmt::Debug, ) { - write!(self.string, "{}={:?}; ", _field.name(), value) + write!(self.string, "{}={:?}; ", field.name(), value) .expect("Writing to a string should never fail"); } } @@ -134,7 +134,7 @@ impl EmitKVPLayer { /// # Arguments /// * `file_path` - The file path where the KVP data will be stored. /// - pub fn new(file_path: std::path::PathBuf) -> Result { + pub fn new(file_path: std::path::PathBuf) -> Result { truncate_guest_pool_file(&file_path)?; let file = OpenOptions::new() @@ -309,21 +309,11 @@ where let start_time = end_time.checked_sub(elapsed).unwrap_or(UNIX_EPOCH); - let start_time_dt = DateTime::::from( - UNIX_EPOCH - + start_time - .duration_since(UNIX_EPOCH) - .unwrap_or_default(), - ) - .format("%Y-%m-%dT%H:%M:%S%.3fZ"); + let start_time_dt = DateTime::::from(start_time) + .format("%Y-%m-%dT%H:%M:%S%.3fZ"); - let end_time_dt = DateTime::::from( - UNIX_EPOCH - + end_time - .duration_since(UNIX_EPOCH) - .unwrap_or_default(), - ) - .format("%Y-%m-%dT%H:%M:%S%.3fZ"); + let end_time_dt = DateTime::::from(end_time) + .format("%Y-%m-%dT%H:%M:%S%.3fZ"); let event_value = format!( "Start: {} | End: {} | Status: {}", @@ -362,41 +352,51 @@ fn generate_event_key( /// exceeds the allowed size, it is split into multiple slices for encoding. /// This is used for logging events to a KVP file. /// +/// # Note +/// - The key must be zero-padded to `HV_KVP_EXCHANGE_MAX_KEY_SIZE` to meet Hyper-V's expected formatting. +/// /// # Arguments /// * `key` - The key as a string slice. /// * `value` - The value associated with the key. fn encode_kvp_item(key: &str, value: &str) -> Vec> { - let key_bytes = key.as_bytes(); - let value_bytes = value.as_bytes(); - - let key_len = key_bytes.len().min(HV_KVP_EXCHANGE_MAX_KEY_SIZE); - let mut key_buf = vec![0u8; HV_KVP_EXCHANGE_MAX_KEY_SIZE]; - key_buf[..key_len].copy_from_slice(&key_bytes[..key_len]); - - let mut kvp_slices = Vec::new(); + let key_buf = key + .as_bytes() + .iter() + .take(HV_KVP_EXCHANGE_MAX_KEY_SIZE) + .chain( + vec![0_u8; HV_KVP_EXCHANGE_MAX_KEY_SIZE.saturating_sub(key.len())] + .iter(), + ) + .copied() + .collect::>(); + + debug_assert!(key_buf.len() == HV_KVP_EXCHANGE_MAX_KEY_SIZE); + + let kvp_slices = value + .as_bytes() + .chunks(HV_KVP_AZURE_MAX_VALUE_SIZE) + .map(|chunk| { + let mut buffer = Vec::with_capacity( + HV_KVP_EXCHANGE_MAX_KEY_SIZE + HV_KVP_EXCHANGE_MAX_VALUE_SIZE, + ); + buffer.extend_from_slice(&key_buf); + buffer.extend_from_slice(chunk); + while buffer.len() + < HV_KVP_EXCHANGE_MAX_KEY_SIZE + HV_KVP_EXCHANGE_MAX_VALUE_SIZE + { + buffer.push(0); + } - for chunk in value_bytes.chunks(HV_KVP_AZURE_MAX_VALUE_SIZE) { - let mut value_buf = vec![0u8; HV_KVP_EXCHANGE_MAX_VALUE_SIZE]; - value_buf[..chunk.len()].copy_from_slice(chunk); + buffer + }) + .collect::>>(); - kvp_slices.push(encode_kvp_slice(key_buf.clone(), value_buf)); - } + debug_assert!(kvp_slices.iter().all(|kvp| kvp.len() + == HV_KVP_EXCHANGE_MAX_KEY_SIZE + HV_KVP_EXCHANGE_MAX_VALUE_SIZE)); kvp_slices } -/// Combines the key and value of a KVP into a single byte slice, ensuring -/// proper formatting for consumption by hv_kvp_daemon service, -/// which typically reads from /var/lib/hyperv/.kvp_pool_1. -fn encode_kvp_slice(key: Vec, value: Vec) -> Vec { - let mut buffer = Vec::with_capacity( - HV_KVP_EXCHANGE_MAX_KEY_SIZE + HV_KVP_EXCHANGE_MAX_VALUE_SIZE, - ); - buffer.extend_from_slice(&key); - buffer.extend_from_slice(&value); - buffer -} - /// Decodes a KVP byte slice into its corresponding key and value strings. /// This is useful for inspecting or logging raw KVP data. #[cfg(test)] @@ -412,18 +412,23 @@ pub fn decode_kvp_item( } let key = String::from_utf8( - record_data[0..HV_KVP_EXCHANGE_MAX_KEY_SIZE].to_vec(), + record_data + .iter() + .take(HV_KVP_EXCHANGE_MAX_KEY_SIZE) + .cloned() + .collect::>(), ) .unwrap_or_else(|_| String::new()) .trim_end_matches('\x00') .to_string(); let value = String::from_utf8( - record_data[HV_KVP_EXCHANGE_MAX_KEY_SIZE..] + record_data .iter() + .skip(HV_KVP_EXCHANGE_MAX_KEY_SIZE) .take(HV_KVP_AZURE_MAX_VALUE_SIZE) .cloned() - .collect(), + .collect::>(), ) .unwrap_or_else(|_| String::new()) .trim_end_matches('\x00') @@ -435,12 +440,9 @@ pub fn decode_kvp_item( /// Truncates the guest pool KVP file if it contains stale data (i.e., data /// older than the system's boot time). Logs whether the file was truncated /// or no action was needed. -fn truncate_guest_pool_file(kvp_file: &Path) -> Result<(), Error> { - let boot_time = SystemTime::now() - .duration_since(UNIX_EPOCH) - .map_err(|e| Error::new(std::io::ErrorKind::Other, e))? - .as_secs() - - get_uptime()?.as_secs(); +fn truncate_guest_pool_file(kvp_file: &Path) -> Result<(), anyhow::Error> { + let boot_time = SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs() + - get_uptime().as_secs(); match kvp_file.metadata() { Ok(metadata) => { @@ -461,7 +463,8 @@ fn truncate_guest_pool_file(kvp_file: &Path) -> Result<(), Error> { return Ok(()); } Err(e) => { - return Err(e); + return Err(anyhow::Error::from(e) + .context("Failed to access file metadata")); } } @@ -471,12 +474,12 @@ fn truncate_guest_pool_file(kvp_file: &Path) -> Result<(), Error> { /// Retrieves the system's uptime using the `sysinfo` crate, returning the duration /// since the system booted. This can be useful for time-based calculations or checks, /// such as determining whether data is stale or calculating the approximate boot time. -fn get_uptime() -> Result { +fn get_uptime() -> Duration { let mut system = System::new(); system.refresh_system(); let uptime_seconds = system.uptime(); - Ok(Duration::from_secs(uptime_seconds)) + Duration::from_secs(uptime_seconds) } #[cfg(test)] diff --git a/src/logging.rs b/src/logging.rs index 31461962..8da6cfd2 100644 --- a/src/logging.rs +++ b/src/logging.rs @@ -7,6 +7,7 @@ use opentelemetry_sdk::trace::{ }; use tracing::{event, Level}; use tracing_opentelemetry::OpenTelemetryLayer; +use tracing_subscriber::filter::LevelFilter; use tracing_subscriber::fmt::format::FmtSpan; use tracing_subscriber::{ fmt, layer::SubscriberExt, EnvFilter, Layer, Registry, @@ -26,15 +27,17 @@ pub fn initialize_tracing() -> sdktrace::Tracer { pub fn setup_layers( tracer: sdktrace::Tracer, ) -> Result<(), Box> { - let otel_layer = OpenTelemetryLayer::new(tracer); + let otel_layer = OpenTelemetryLayer::new(tracer) + .with_filter(EnvFilter::from_env("AZURE_INIT_LOG")); + + let kvp_filter = EnvFilter::builder() + .with_default_directive(LevelFilter::INFO.into()) + .parse_lossy("azure_init=INFO,libazureinit=WARN,kvp=INFO"); let emit_kvp_layer = match EmitKVPLayer::new(std::path::PathBuf::from( "/var/lib/hyperv/.kvp_pool_1", )) { - Ok(layer) => { - let filter = EnvFilter::new("INFO"); - Some(layer.with_filter(filter)) - } + Ok(layer) => Some(layer.with_filter(kvp_filter)), Err(e) => { event!(Level::ERROR, "Failed to initialize EmitKVPLayer: {}. Continuing without KVP logging.", e); None diff --git a/src/main.rs b/src/main.rs index 6c719f8b..27bd2869 100644 --- a/src/main.rs +++ b/src/main.rs @@ -132,7 +132,7 @@ async fn main() -> ExitCode { } } -#[instrument(name = "root")] +#[instrument(name = "root", skip_all)] async fn provision(config: Config, opts: Cli) -> Result<(), anyhow::Error> { let system = System::new(); let kernel_version = system @@ -141,13 +141,12 @@ async fn provision(config: Config, opts: Cli) -> Result<(), anyhow::Error> { let os_version = system .os_version() .unwrap_or("Unknown OS Version".to_string()); - let azure_init_version = env!("CARGO_PKG_VERSION"); tracing::info!( "Kernel Version: {}, OS Version: {}, Azure-Init Version: {}", kernel_version, os_version, - azure_init_version + VERSION ); let mut default_headers = header::HeaderMap::new(); From 6257a8158825d32a851f1d1e0a84503b6392a041 Mon Sep 17 00:00:00 2001 From: peytonr18 Date: Mon, 13 Jan 2025 15:49:56 -0800 Subject: [PATCH 12/13] Updating note to reflect that both the key and the value are null-byte padded and not just the key. --- src/kvp.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/kvp.rs b/src/kvp.rs index 35fd6feb..48b69749 100644 --- a/src/kvp.rs +++ b/src/kvp.rs @@ -353,7 +353,9 @@ fn generate_event_key( /// This is used for logging events to a KVP file. /// /// # Note -/// - The key must be zero-padded to `HV_KVP_EXCHANGE_MAX_KEY_SIZE` to meet Hyper-V's expected formatting. +/// - The key is zero-padded to `HV_KVP_EXCHANGE_MAX_KEY_SIZE`, and +/// the value is zero-padded to `HV_KVP_AZURE_MAX_VALUE_SIZE` to meet +/// Hyper-V's expected formatting. /// /// # Arguments /// * `key` - The key as a string slice. From 9e6820a6f1d4ac0b435c0def6e58c2d62a237556 Mon Sep 17 00:00:00 2001 From: peytonr18 Date: Thu, 30 Jan 2025 15:43:42 -0800 Subject: [PATCH 13/13] Refactor KVP logging to use module-based targets and update filtering logic. --- libazureinit/src/config.rs | 4 ++-- libazureinit/src/http.rs | 4 ++-- libazureinit/src/provision/ssh.rs | 6 +++--- libazureinit/src/provision/user.rs | 4 ++-- src/logging.rs | 17 +++++++++++++---- 5 files changed, 22 insertions(+), 13 deletions(-) diff --git a/libazureinit/src/config.rs b/libazureinit/src/config.rs index d11db6d5..768bbb58 100644 --- a/libazureinit/src/config.rs +++ b/libazureinit/src/config.rs @@ -319,7 +319,7 @@ impl Config { .extract::() .map(|config| { tracing::info!( - target: "kvp", + target: "libazureinit::config::success", "Configuration successfully loaded." ); config @@ -351,7 +351,7 @@ impl Config { .filter_map(Result::ok) .map(|entry| entry.path()) .filter(|path| { - path.extension().map_or(false, |ext| ext == "toml") + path.extension().is_some_and(|ext| ext == "toml") }) .collect(); diff --git a/libazureinit/src/http.rs b/libazureinit/src/http.rs index a48c30f6..26cf6c1d 100644 --- a/libazureinit/src/http.rs +++ b/libazureinit/src/http.rs @@ -121,12 +121,12 @@ async fn request( let _enter = span.enter(); let statuscode = response.status(); span.record("http_status", statuscode.as_u16()); - tracing::info!(target: "kvp", url=response.url().as_str(), "HTTP response received"); + tracing::info!(target: "libazureinit::http::received", url=response.url().as_str(), "HTTP response received"); match response.error_for_status() { Ok(response) => { if statuscode == StatusCode::OK { - tracing::info!(target: "kvp", "HTTP response succeeded with status {}", statuscode); + tracing::info!(target: "libazureinit::http::success", "HTTP response succeeded with status {}", statuscode); return Ok((response, retry_for.saturating_sub(now.elapsed() + retry_interval))); } }, diff --git a/libazureinit/src/provision/ssh.rs b/libazureinit/src/provision/ssh.rs index bcf02706..3e13eb3c 100644 --- a/libazureinit/src/provision/ssh.rs +++ b/libazureinit/src/provision/ssh.rs @@ -87,7 +87,7 @@ pub(crate) fn provision_ssh( chown(&ssh_dir, Some(user.uid), Some(user.gid))?; tracing::info!( - target: "kvp", + target: "libazureinit::ssh::authorized_keys", "Using authorized_keys path: {:?}", authorized_keys_path ); @@ -143,7 +143,7 @@ fn run_sshd_command( match sshd_config_command_runner() { Ok(output) if output.status.success() => { info!( - target: "kvp", + target: "libazureinit::ssh::success", stdout_length = output.stdout.len(), "Executed sshd -G successfully", ); @@ -189,7 +189,7 @@ fn extract_authorized_keys_file_path(stdout: &[u8]) -> Option { if line.starts_with("authorizedkeysfile") { let keypath = line.split_whitespace().nth(1).map(|s| { info!( - target: "kvp", + target: "libazureinit::ssh::authorized_keys", authorizedkeysfile = %s, "Using sshd's authorizedkeysfile path configuration" ); diff --git a/libazureinit/src/provision/user.rs b/libazureinit/src/provision/user.rs index 1a3fe483..112d80b7 100644 --- a/libazureinit/src/provision/user.rs +++ b/libazureinit/src/provision/user.rs @@ -135,7 +135,7 @@ fn user_exists(username: &str) -> Result { fn useradd(user: &User) -> Result<(), Error> { if user_exists(&user.name)? { tracing::info!( - target: "kvp", + target: "libazureinit::user::add", "User '{}' already exists. Skipping user creation.", user.name ); @@ -143,7 +143,7 @@ fn useradd(user: &User) -> Result<(), Error> { let group_list = user.groups.join(","); tracing::info!( - target: "kvp", + target: "libazureinit:user::add", "User '{}' is being added to the following groups: {}", user.name, group_list diff --git a/src/logging.rs b/src/logging.rs index 8da6cfd2..73c1e126 100644 --- a/src/logging.rs +++ b/src/logging.rs @@ -7,7 +7,6 @@ use opentelemetry_sdk::trace::{ }; use tracing::{event, Level}; use tracing_opentelemetry::OpenTelemetryLayer; -use tracing_subscriber::filter::LevelFilter; use tracing_subscriber::fmt::format::FmtSpan; use tracing_subscriber::{ fmt, layer::SubscriberExt, EnvFilter, Layer, Registry, @@ -30,9 +29,19 @@ pub fn setup_layers( let otel_layer = OpenTelemetryLayer::new(tracer) .with_filter(EnvFilter::from_env("AZURE_INIT_LOG")); - let kvp_filter = EnvFilter::builder() - .with_default_directive(LevelFilter::INFO.into()) - .parse_lossy("azure_init=INFO,libazureinit=WARN,kvp=INFO"); + let kvp_filter = EnvFilter::builder().parse( + [ + "WARN", + "azure_init=INFO", + "libazureinit::config::success", + "libazureinit::http::received", + "libazureinit::http::success", + "libazureinit::ssh::authorized_keys", + "libazureinit::ssh::success", + "libazureinit::user::add", + ] + .join(","), + )?; let emit_kvp_layer = match EmitKVPLayer::new(std::path::PathBuf::from( "/var/lib/hyperv/.kvp_pool_1",