diff --git a/src/diff.rs b/src/diff.rs index f769a29..bbb725d 100644 --- a/src/diff.rs +++ b/src/diff.rs @@ -5,7 +5,7 @@ use crate::params::{parse_params, Format}; use crate::utils::report_failure_to_read_input_file; -use crate::{context_diff, ed_diff, normal_diff, unified_diff}; +use crate::{context_diff, ed_diff, normal_diff, side_diff, unified_diff}; use std::env::ArgsOs; use std::ffi::OsString; use std::fs; @@ -79,6 +79,7 @@ pub fn main(opts: Peekable) -> ExitCode { eprintln!("{error}"); exit(2); }), + Format::SideBySide => side_diff::diff(&from_content, &to_content), }; if params.brief && !result.is_empty() { println!( diff --git a/src/lib.rs b/src/lib.rs index a20ac56..342b01c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,6 +4,7 @@ pub mod ed_diff; pub mod macros; pub mod normal_diff; pub mod params; +pub mod side_diff; pub mod unified_diff; pub mod utils; @@ -11,4 +12,5 @@ pub mod utils; pub use context_diff::diff as context_diff; pub use ed_diff::diff as ed_diff; pub use normal_diff::diff as normal_diff; +pub use side_diff::diff as side_by_side_diff; pub use unified_diff::diff as unified_diff; diff --git a/src/main.rs b/src/main.rs index 8194d00..badaaa0 100644 --- a/src/main.rs +++ b/src/main.rs @@ -18,6 +18,7 @@ mod ed_diff; mod macros; mod normal_diff; mod params; +mod side_diff; mod unified_diff; mod utils; diff --git a/src/params.rs b/src/params.rs index 9b3abc4..9f5c07d 100644 --- a/src/params.rs +++ b/src/params.rs @@ -11,6 +11,7 @@ pub enum Format { Unified, Context, Ed, + SideBySide, } #[derive(Clone, Debug, Eq, PartialEq)] @@ -101,6 +102,13 @@ pub fn parse_params>(mut opts: Peekable) -> Resu format = Some(Format::Ed); continue; } + if param == "-y" || param == "--side-by-side" { + if format.is_some() && format != Some(Format::SideBySide) { + return Err("Conflicting output style option".to_string()); + } + format = Some(Format::SideBySide); + continue; + } if tabsize_re.is_match(param.to_string_lossy().as_ref()) { // Because param matches the regular expression, // it is safe to assume it is valid UTF-8. diff --git a/src/side_diff.rs b/src/side_diff.rs new file mode 100644 index 0000000..71bf4b7 --- /dev/null +++ b/src/side_diff.rs @@ -0,0 +1,86 @@ +// This file is part of the uutils diffutils package. +// +// For the full copyright and license information, please view the LICENSE-* +// files that was distributed with this source code. + +use crate::utils::limited_string; +use diff::Result; +use std::{ + io::{stdout, StdoutLock, Write}, + vec, +}; + +fn push_output( + output: &mut StdoutLock, + left_ln: &[u8], + right_ln: &[u8], + symbol: &[u8], + tab_size: usize, +) -> std::io::Result<()> { + // The reason why this function exists, is that we cannot + // assume a enconding for our left or right line, and the + // writeln!() macro obligattes us to do it. + + // side-by-side diff usually prints the output like: + // {left_line}{tab}{space_char}{symbol(|, < or >)}{space_char}{right_line}{EOL} + + // recalculate how many spaces are nescessary, cause we need to take into + // consideration the lenght of the word before print it. + let tab_size = (tab_size as isize - left_ln.len() as isize).max(0); + let ident = vec![b' '; tab_size as usize]; + output.write_all(left_ln)?; // {left_line} + output.write_all(&ident)?; // {tab} + output.write_all(b" ")?; // {space_char} + output.write_all(symbol)?; // {symbol} + output.write_all(b" ")?; // {space_char} + output.write_all(right_ln)?; // {right_line} + + writeln!(output)?; // {EOL} + + Ok(()) +} + +pub fn diff(from_file: &[u8], to_file: &[u8]) -> Vec { + // ^ The left file ^ The right file + + let mut output = stdout().lock(); + let left_lines: Vec<&[u8]> = from_file.split(|&c| c == b'\n').collect(); + let right_lines: Vec<&[u8]> = to_file.split(|&c| c == b'\n').collect(); + let tab_size = 61; // for some reason the tab spaces are 61 not 60 + for result in diff::slice(&left_lines, &right_lines) { + match result { + Result::Left(left_ln) => { + push_output( + &mut output, + limited_string(left_ln, tab_size), + &[], + b"<", + tab_size, + ) + .unwrap(); + } + Result::Right(right_ln) => { + push_output( + &mut output, + &[], + limited_string(right_ln, tab_size), + b">", + tab_size, + ) + .unwrap(); + } + Result::Both(left_ln, right_ln) => { + push_output( + &mut output, + limited_string(left_ln, tab_size), + limited_string(right_ln, tab_size), + b" ", + tab_size, + ) + .unwrap(); + } + } + } + + vec![] +} diff --git a/src/utils.rs b/src/utils.rs index 88b39ff..b0d0232 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -3,9 +3,8 @@ // For the full copyright and license information, please view the LICENSE-* // files that was distributed with this source code. -use std::{ffi::OsString, io::Write}; - use regex::Regex; +use std::{ffi::OsString, io::Write}; use unicode_width::UnicodeWidthStr; /// Replace tabs by spaces in the input line. @@ -99,6 +98,15 @@ pub fn report_failure_to_read_input_file( ); } +/// Limits a string at a certain limiter position. This can break the +/// encoding of a specific char where it has been cut. +#[must_use] +pub fn limited_string(orig: &[u8], limiter: usize) -> &[u8] { + // TODO: Verify if we broke the encoding of the char + // when we cut it. + &orig[..orig.len().min(limiter)] +} + #[cfg(test)] mod tests { use super::*; @@ -205,4 +213,64 @@ mod tests { assert!(m_time > current_time); } } + + mod limited_string { + use super::*; + use std::str; + + #[test] + fn empty_orig_returns_empty() { + let orig: &[u8] = b""; + let result = limited_string(&orig, 10); + assert!(result.is_empty()); + } + + #[test] + fn zero_limit_returns_empty() { + let orig: &[u8] = b"foo"; + let result = limited_string(&orig, 0); + assert!(result.is_empty()); + } + + #[test] + fn limit_longer_than_orig_returns_full() { + let orig: &[u8] = b"foo"; + let result = limited_string(&orig, 10); + assert_eq!(result, orig); + } + + #[test] + fn ascii_limit_in_middle() { + let orig: &[u8] = b"foobar"; + let result = limited_string(&orig, 3); + assert_eq!(result, b"foo"); + assert!(str::from_utf8(&result).is_ok()); // All are ascii chars, we do not broke the enconding + } + + #[test] + fn utf8_multibyte_cut_invalidates() { + let orig = "áéíóú".as_bytes(); + let result = limited_string(&orig, 1); + // should contain only the first byte of mult-byte char + assert_eq!(result, vec![0xC3]); + assert!(str::from_utf8(&result).is_err()); + } + + #[test] + fn utf8_limit_at_codepoint_boundary() { + let orig = "áéí".as_bytes(); + let bytes = &orig; + let result = limited_string(&orig, bytes.len()); + + assert_eq!(result, *bytes); + assert!(str::from_utf8(&result).is_ok()); + } + + #[test] + fn works_with_byte_vec_input() { + let orig_bytes = b"hello".to_vec(); + let result = limited_string(&orig_bytes, 3); + assert_eq!(result, b"hel"); + } + } }