From 9d6b2a6da77c1df2a8fec159a87fdd431f271736 Mon Sep 17 00:00:00 2001 From: Marc Schoolderman Date: Fri, 23 Aug 2024 20:41:41 +0200 Subject: [PATCH 01/10] create privacy between SysLogWriter and its user --- src/log/syslog.rs | 99 +++++++++++++++++++++++++++-------------------- 1 file changed, 57 insertions(+), 42 deletions(-) diff --git a/src/log/syslog.rs b/src/log/syslog.rs index 10df1fcd9..7efd9102c 100644 --- a/src/log/syslog.rs +++ b/src/log/syslog.rs @@ -1,57 +1,72 @@ use core::fmt::{self, Write}; -use std::ffi::CStr; - use log::{Level, Log, Metadata}; -use crate::system::syslog; - pub struct Syslog; -const LIMIT: usize = 960; -const DOTDOTDOT_START: &[u8] = b"[...] "; -const DOTDOTDOT_END: &[u8] = b" [...]"; -const NULL_BYTE: usize = 1; // for C string compatibility -const BUFSZ: usize = LIMIT + DOTDOTDOT_END.len() + NULL_BYTE; -const FACILITY: libc::c_int = libc::LOG_AUTH; +mod internal { + use crate::system::syslog; + use std::ffi::CStr; -struct SysLogWriter { - buffer: [u8; BUFSZ], - cursor: usize, - facility: libc::c_int, - priority: libc::c_int, -} + const DOTDOTDOT_START: &[u8] = b"[...] "; + const DOTDOTDOT_END: &[u8] = b" [...]"; -impl SysLogWriter { - fn new(priority: libc::c_int, facility: libc::c_int) -> Self { - Self { - buffer: [0; BUFSZ], - cursor: 0, - priority, - facility, - } - } + const MAX_MSG_LEN: usize = 960; + const NULL_BYTE_LEN: usize = 1; // for C string compatibility + const BUFSZ: usize = MAX_MSG_LEN + DOTDOTDOT_END.len() + NULL_BYTE_LEN; - fn append(&mut self, bytes: &[u8]) { - let num_bytes = bytes.len(); - self.buffer[self.cursor..self.cursor + num_bytes].copy_from_slice(bytes); - self.cursor += num_bytes; + pub struct SysLogWriter { + buffer: [u8; BUFSZ], + cursor: usize, + facility: libc::c_int, + priority: libc::c_int, } - fn send_to_syslog(&mut self) { - self.append(&[0]); - let message = CStr::from_bytes_with_nul(&self.buffer[..self.cursor]).unwrap(); - syslog(self.priority, self.facility, message); - self.cursor = 0; + // the caller of the pub functions will have to take care never to `append` more bytes than + // are `available`, or a panic will occur. + impl SysLogWriter { + pub fn new(priority: libc::c_int, facility: libc::c_int) -> Self { + Self { + buffer: [0; BUFSZ], + cursor: 0, + priority, + facility, + } + } + + pub fn append(&mut self, bytes: &[u8]) { + let num_bytes = bytes.len(); + self.buffer[self.cursor..self.cursor + num_bytes].copy_from_slice(bytes); + self.cursor += num_bytes; + } + + pub fn line_break(&mut self) { + self.append(DOTDOTDOT_END); + self.commit_to_syslog(); + self.append(DOTDOTDOT_START); + } + + pub fn commit_to_syslog(&mut self) { + self.append(&[0]); + let message = CStr::from_bytes_with_nul(&self.buffer[..self.cursor]).unwrap(); + syslog(self.priority, self.facility, message); + self.cursor = 0; + } + + pub fn available(&self) -> usize { + MAX_MSG_LEN - self.cursor + } } } +use internal::SysLogWriter; + impl Write for SysLogWriter { fn write_str(&mut self, mut message: &str) -> fmt::Result { loop { - if self.cursor + message.len() > LIMIT { + if message.len() > self.available() { // floor_char_boundary is currently unstable - let mut truncate_boundary = LIMIT - self.cursor; - while truncate_boundary > 0 && !message.is_char_boundary(truncate_boundary) { + let mut truncate_boundary = self.available(); + while !message.is_char_boundary(truncate_boundary) { truncate_boundary -= 1; } @@ -62,17 +77,15 @@ impl Write for SysLogWriter { if truncate_boundary == 0 { // we failed to find a "nice" cut off point, abruptly cut off the msg - truncate_boundary = LIMIT - self.cursor; + truncate_boundary = self.available(); } let left = &message[..truncate_boundary]; let right = &message[truncate_boundary..]; self.append(left.as_bytes()); - self.append(DOTDOTDOT_END); - self.send_to_syslog(); + self.line_break(); - self.append(DOTDOTDOT_START); message = right; } else { self.append(message.as_bytes()); @@ -85,6 +98,8 @@ impl Write for SysLogWriter { } } +const FACILITY: libc::c_int = libc::LOG_AUTH; + impl Log for Syslog { fn enabled(&self, metadata: &Metadata) -> bool { metadata.level() <= log::max_level() && metadata.level() <= log::STATIC_MAX_LEVEL @@ -101,7 +116,7 @@ impl Log for Syslog { let mut writer = SysLogWriter::new(priority, FACILITY); let _ = write!(writer, "{}", record.args()); - writer.send_to_syslog(); + writer.commit_to_syslog(); } fn flush(&self) { From a102907cc0c9fea66615cae2ed4bbae16e281514 Mon Sep 17 00:00:00 2001 From: Marc Schoolderman Date: Fri, 23 Aug 2024 21:00:23 +0200 Subject: [PATCH 02/10] add Drop method to SysLogWriter --- src/log/syslog.rs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/log/syslog.rs b/src/log/syslog.rs index 7efd9102c..e58449d9d 100644 --- a/src/log/syslog.rs +++ b/src/log/syslog.rs @@ -45,7 +45,7 @@ mod internal { self.append(DOTDOTDOT_START); } - pub fn commit_to_syslog(&mut self) { + fn commit_to_syslog(&mut self) { self.append(&[0]); let message = CStr::from_bytes_with_nul(&self.buffer[..self.cursor]).unwrap(); syslog(self.priority, self.facility, message); @@ -56,6 +56,12 @@ mod internal { MAX_MSG_LEN - self.cursor } } + + impl Drop for SysLogWriter { + fn drop(&mut self) { + self.commit_to_syslog(); + } + } } use internal::SysLogWriter; @@ -116,7 +122,6 @@ impl Log for Syslog { let mut writer = SysLogWriter::new(priority, FACILITY); let _ = write!(writer, "{}", record.args()); - writer.commit_to_syslog(); } fn flush(&self) { From 3173860955cfad86d720106102e0f34c67edabb5 Mon Sep 17 00:00:00 2001 From: Marc Schoolderman Date: Fri, 23 Aug 2024 21:16:38 +0200 Subject: [PATCH 03/10] split boundary calculation away from loop --- src/log/syslog.rs | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/src/log/syslog.rs b/src/log/syslog.rs index e58449d9d..3f18762de 100644 --- a/src/log/syslog.rs +++ b/src/log/syslog.rs @@ -66,25 +66,30 @@ mod internal { use internal::SysLogWriter; +fn suggested_break(message: &str, max_size: usize) -> usize { + // floor_char_boundary is currently unstable + let mut truncate_boundary = max_size; + while !message.is_char_boundary(truncate_boundary) { + truncate_boundary -= 1; + } + + // don't overzealously truncate log messages + truncate_boundary = message[..truncate_boundary] + .rfind(|c: char| c.is_ascii_whitespace()) + .unwrap_or(truncate_boundary); + + if truncate_boundary == 0 { + max_size + } else { + truncate_boundary + } +} + impl Write for SysLogWriter { fn write_str(&mut self, mut message: &str) -> fmt::Result { loop { if message.len() > self.available() { - // floor_char_boundary is currently unstable - let mut truncate_boundary = self.available(); - while !message.is_char_boundary(truncate_boundary) { - truncate_boundary -= 1; - } - - // don't overzealously truncate log messages - truncate_boundary = message[..truncate_boundary] - .rfind(|c: char| c.is_ascii_whitespace()) - .unwrap_or(truncate_boundary); - - if truncate_boundary == 0 { - // we failed to find a "nice" cut off point, abruptly cut off the msg - truncate_boundary = self.available(); - } + let truncate_boundary = suggested_break(message, self.available()); let left = &message[..truncate_boundary]; let right = &message[truncate_boundary..]; From 8c01520695c94496fd1eb39a0489d18e54d9b70d Mon Sep 17 00:00:00 2001 From: Marc Schoolderman Date: Fri, 23 Aug 2024 21:32:26 +0200 Subject: [PATCH 04/10] now we can clearly see this is a while loop --- src/log/syslog.rs | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/src/log/syslog.rs b/src/log/syslog.rs index 3f18762de..a596e3f56 100644 --- a/src/log/syslog.rs +++ b/src/log/syslog.rs @@ -87,24 +87,20 @@ fn suggested_break(message: &str, max_size: usize) -> usize { impl Write for SysLogWriter { fn write_str(&mut self, mut message: &str) -> fmt::Result { - loop { - if message.len() > self.available() { - let truncate_boundary = suggested_break(message, self.available()); + while message.len() > self.available() { + let truncate_boundary = suggested_break(message, self.available()); - let left = &message[..truncate_boundary]; - let right = &message[truncate_boundary..]; + let left = &message[..truncate_boundary]; + let right = &message[truncate_boundary..]; - self.append(left.as_bytes()); - self.line_break(); + self.append(left.as_bytes()); + self.line_break(); - message = right; - } else { - self.append(message.as_bytes()); - - break; - } + message = right; } + self.append(message.as_bytes()); + Ok(()) } } From e51cc44deb840290b0d71df027d9cfe51fd4f2ba Mon Sep 17 00:00:00 2001 From: Marc Schoolderman Date: Fri, 23 Aug 2024 22:02:49 +0200 Subject: [PATCH 05/10] split off floor_char_boundary --- src/log/syslog.rs | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/src/log/syslog.rs b/src/log/syslog.rs index a596e3f56..99c8a6e5f 100644 --- a/src/log/syslog.rs +++ b/src/log/syslog.rs @@ -66,13 +66,21 @@ mod internal { use internal::SysLogWriter; -fn suggested_break(message: &str, max_size: usize) -> usize { - // floor_char_boundary is currently unstable - let mut truncate_boundary = max_size; - while !message.is_char_boundary(truncate_boundary) { - truncate_boundary -= 1; +/// `floor_char_boundary` is currently unstable in Rust +fn floor_char_boundary(data: &str, mut index: usize) -> usize { + if index >= data.len() { + return data.len(); + } + while !data.is_char_boundary(index) { + index -= 1; } + index +} + +fn suggested_break(message: &str, max_size: usize) -> usize { + let mut truncate_boundary = floor_char_boundary(message, max_size); + // don't overzealously truncate log messages truncate_boundary = message[..truncate_boundary] .rfind(|c: char| c.is_ascii_whitespace()) From d696886a61a89ee3cf8391aa001270d9f2e8abd8 Mon Sep 17 00:00:00 2001 From: Marc Schoolderman Date: Fri, 23 Aug 2024 22:23:21 +0200 Subject: [PATCH 06/10] clearly state the contracts for suggested_break and other functions --- src/log/syslog.rs | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/log/syslog.rs b/src/log/syslog.rs index 99c8a6e5f..02ac1fed9 100644 --- a/src/log/syslog.rs +++ b/src/log/syslog.rs @@ -23,6 +23,8 @@ mod internal { // the caller of the pub functions will have to take care never to `append` more bytes than // are `available`, or a panic will occur. + // the impl guarantees that after `line_break()`, there will be enough room available for at + // least a single UTF8 character sequence (which is true since MAX_MSG_LEN >= 10) impl SysLogWriter { pub fn new(priority: libc::c_int, facility: libc::c_int) -> Self { Self { @@ -78,6 +80,9 @@ fn floor_char_boundary(data: &str, mut index: usize) -> usize { index } +/// This function REQUIRES that `message` is larger than `max_size` (or a panic will occur). +/// This function WILL return a non-zero result if `max_size` is large enough to fit +/// at least the first character of `message`. fn suggested_break(message: &str, max_size: usize) -> usize { let mut truncate_boundary = floor_char_boundary(message, max_size); @@ -104,6 +109,13 @@ impl Write for SysLogWriter { self.append(left.as_bytes()); self.line_break(); + // This loop while terminate, since either of the following is true: + // 1. truncate_boundary is strictly positive: + // message.len() has strictly decreased, and self.available() has not decreased + // 2. truncate_boundary is zero: + // message.len() has remained unchanged, but self.available() has strictly increased; + // this latter is true since, for truncate_boundary to be 0, self.available() must + // have been not large enough to fit a single UTF8 character message = right; } From 112ce8d3af84ca3ac00d30bdb8c6c73febfa3f19 Mon Sep 17 00:00:00 2001 From: Marc Schoolderman Date: Sat, 24 Aug 2024 15:07:27 +0200 Subject: [PATCH 07/10] add test case against splitting UTF8 sequences (introduced by #857) --- src/log/syslog.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/log/syslog.rs b/src/log/syslog.rs index 02ac1fed9..0dc5fb74d 100644 --- a/src/log/syslog.rs +++ b/src/log/syslog.rs @@ -198,4 +198,11 @@ mod tests { logger.log(&record); } + + #[test] + fn will_not_break_utf8() { + let mut writer = SysLogWriter::new(libc::LOG_DEBUG, FACILITY); + + let _ = write!(writer, "{}¢", "x".repeat(959)); + } } From dd2ee96e2b35bb878bd89944dd46b88f4da71a56 Mon Sep 17 00:00:00 2001 From: Marc Schoolderman Date: Sat, 24 Aug 2024 15:16:25 +0200 Subject: [PATCH 08/10] make test case pass --- src/log/syslog.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/log/syslog.rs b/src/log/syslog.rs index 0dc5fb74d..bc7fce478 100644 --- a/src/log/syslog.rs +++ b/src/log/syslog.rs @@ -92,7 +92,7 @@ fn suggested_break(message: &str, max_size: usize) -> usize { .unwrap_or(truncate_boundary); if truncate_boundary == 0 { - max_size + floor_char_boundary(message, max_size) } else { truncate_boundary } From 5267a2014a1c8a387c0011b4a04fec79278f9b46 Mon Sep 17 00:00:00 2001 From: Marc Schoolderman Date: Fri, 23 Aug 2024 22:44:10 +0200 Subject: [PATCH 09/10] overhaul of suggested_break --- src/log/syslog.rs | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/src/log/syslog.rs b/src/log/syslog.rs index bc7fce478..7f9b935aa 100644 --- a/src/log/syslog.rs +++ b/src/log/syslog.rs @@ -84,17 +84,16 @@ fn floor_char_boundary(data: &str, mut index: usize) -> usize { /// This function WILL return a non-zero result if `max_size` is large enough to fit /// at least the first character of `message`. fn suggested_break(message: &str, max_size: usize) -> usize { - let mut truncate_boundary = floor_char_boundary(message, max_size); - - // don't overzealously truncate log messages - truncate_boundary = message[..truncate_boundary] - .rfind(|c: char| c.is_ascii_whitespace()) - .unwrap_or(truncate_boundary); - - if truncate_boundary == 0 { - floor_char_boundary(message, max_size) + // method A: try to split the message in two non-empty parts on an ASCII white space character + // method B: split on the utf8 character boundary that consumes the most data + if let Some(pos) = message.as_bytes()[1..max_size] + .iter() + .rposition(|c| c.is_ascii_whitespace()) + { + // since pos+1 contains ASCII whitespace, it acts as a valid utf8 boundary as well + pos + 1 } else { - truncate_boundary + floor_char_boundary(message, max_size) } } From f2fcdb6f9d2115d00cf46b61bd9d816a2d326d9e Mon Sep 17 00:00:00 2001 From: Marc Schoolderman Date: Fri, 6 Sep 2024 12:01:02 +0200 Subject: [PATCH 10/10] incorporate comments by @rnijveld --- src/log/syslog.rs | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/src/log/syslog.rs b/src/log/syslog.rs index 7f9b935aa..4033fa409 100644 --- a/src/log/syslog.rs +++ b/src/log/syslog.rs @@ -14,18 +14,20 @@ mod internal { const NULL_BYTE_LEN: usize = 1; // for C string compatibility const BUFSZ: usize = MAX_MSG_LEN + DOTDOTDOT_END.len() + NULL_BYTE_LEN; - pub struct SysLogWriter { + pub struct SysLogMessageWriter { buffer: [u8; BUFSZ], cursor: usize, facility: libc::c_int, priority: libc::c_int, } - // the caller of the pub functions will have to take care never to `append` more bytes than + // - whenever a SysLogMessageWriter has been constructed, a syslog message WILL be created + // for one specific event; this struct functions as a low-level interface for that message + // - the caller of the pub functions will have to take care never to `append` more bytes than // are `available`, or a panic will occur. - // the impl guarantees that after `line_break()`, there will be enough room available for at + // - the impl guarantees that after `line_break()`, there will be enough room available for at // least a single UTF8 character sequence (which is true since MAX_MSG_LEN >= 10) - impl SysLogWriter { + impl SysLogMessageWriter { pub fn new(priority: libc::c_int, facility: libc::c_int) -> Self { Self { buffer: [0; BUFSZ], @@ -59,14 +61,14 @@ mod internal { } } - impl Drop for SysLogWriter { + impl Drop for SysLogMessageWriter { fn drop(&mut self) { self.commit_to_syslog(); } } } -use internal::SysLogWriter; +use internal::SysLogMessageWriter; /// `floor_char_boundary` is currently unstable in Rust fn floor_char_boundary(data: &str, mut index: usize) -> usize { @@ -97,7 +99,7 @@ fn suggested_break(message: &str, max_size: usize) -> usize { } } -impl Write for SysLogWriter { +impl Write for SysLogMessageWriter { fn write_str(&mut self, mut message: &str) -> fmt::Result { while message.len() > self.available() { let truncate_boundary = suggested_break(message, self.available()); @@ -140,7 +142,7 @@ impl Log for Syslog { Level::Trace => libc::LOG_DEBUG, }; - let mut writer = SysLogWriter::new(priority, FACILITY); + let mut writer = SysLogMessageWriter::new(priority, FACILITY); let _ = write!(writer, "{}", record.args()); } @@ -154,7 +156,7 @@ mod tests { use log::Log; use std::fmt::Write; - use super::{SysLogWriter, Syslog, FACILITY}; + use super::{SysLogMessageWriter, Syslog, FACILITY}; #[test] fn can_write_to_syslog() { @@ -169,7 +171,7 @@ mod tests { #[test] fn can_handle_multiple_writes() { - let mut writer = SysLogWriter::new(libc::LOG_DEBUG, FACILITY); + let mut writer = SysLogMessageWriter::new(libc::LOG_DEBUG, FACILITY); for i in 1..20 { let _ = write!(writer, "{}", "Test 123 ".repeat(i)); @@ -200,7 +202,7 @@ mod tests { #[test] fn will_not_break_utf8() { - let mut writer = SysLogWriter::new(libc::LOG_DEBUG, FACILITY); + let mut writer = SysLogMessageWriter::new(libc::LOG_DEBUG, FACILITY); let _ = write!(writer, "{}¢", "x".repeat(959)); }