Skip to content

Commit

Permalink
head: Fix bug printing large non-seekable files
Browse files Browse the repository at this point in the history
Fixes issue #7288.
Rewrite logic for print-all-but-last-n-bytes in non-seekable
files.
  • Loading branch information
karlmcdowall authored and sylvestre committed Feb 12, 2025
1 parent a3c5b8c commit 84b42a8
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 45 deletions.
64 changes: 19 additions & 45 deletions src/uu/head/src/head.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

use clap::{crate_version, Arg, ArgAction, ArgMatches, Command};
use std::ffi::OsString;
use std::io::{self, ErrorKind, Read, Seek, SeekFrom, Write};
use std::io::{self, BufWriter, Read, Seek, SeekFrom, Write};
use std::num::TryFromIntError;
use thiserror::Error;
use uucore::display::Quotable;
Expand Down Expand Up @@ -239,10 +239,7 @@ impl HeadOptions {
}
}

fn read_n_bytes<R>(input: R, n: u64) -> std::io::Result<()>
where
R: Read,
{
fn read_n_bytes(input: impl Read, n: u64) -> std::io::Result<()> {
// Read the first `n` bytes from the `input` reader.
let mut reader = input.take(n);

Expand Down Expand Up @@ -287,48 +284,22 @@ fn catch_too_large_numbers_in_backwards_bytes_or_lines(n: u64) -> Option<usize>
}
}

/// Print to stdout all but the last `n` bytes from the given reader.
fn read_but_last_n_bytes(input: &mut impl std::io::BufRead, n: u64) -> std::io::Result<()> {
if n == 0 {
//prints everything
return read_n_bytes(input, u64::MAX);
}

fn read_but_last_n_bytes(input: impl std::io::BufRead, n: u64) -> std::io::Result<()> {
if let Some(n) = catch_too_large_numbers_in_backwards_bytes_or_lines(n) {
let stdout = std::io::stdout();
let mut stdout = stdout.lock();

let mut ring_buffer = Vec::new();

let mut buffer = [0u8; BUF_SIZE];
let mut total_read = 0;

loop {
let read = match input.read(&mut buffer) {
Ok(0) => break,
Ok(read) => read,
Err(e) => match e.kind() {
ErrorKind::Interrupted => continue,
_ => return Err(e),
},
};

total_read += read;

if total_read <= n {
// Fill the ring buffer without exceeding n bytes
let overflow = n - total_read;
ring_buffer.extend_from_slice(&buffer[..read - overflow]);
} else {
// Write the ring buffer and the part of the buffer that exceeds n
stdout.write_all(&ring_buffer)?;
stdout.write_all(&buffer[..read - n + ring_buffer.len()])?;
ring_buffer.clear();
ring_buffer.extend_from_slice(&buffer[read - n + ring_buffer.len()..read]);
}
let stdout = stdout.lock();
// Even though stdout is buffered, it will flush on each newline in the
// input stream. This can be costly, so add an extra layer of buffering
// over the top. This gives a significant speedup (approx 4x).
let mut writer = BufWriter::with_capacity(BUF_SIZE, stdout);
for byte in take_all_but(input.bytes(), n) {
writer.write_all(&[byte?])?;
}
// Make sure we finish writing everything to the target before
// exiting. Otherwise, when Rust is implicitly flushing, any
// error will be silently ignored.
writer.flush()?;
}

Ok(())
}

Expand All @@ -343,6 +314,10 @@ fn read_but_last_n_lines(
for bytes in take_all_but(lines(input, separator), n) {
stdout.write_all(&bytes?)?;
}
// Make sure we finish writing everything to the target before
// exiting. Otherwise, when Rust is implicitly flushing, any
// error will be silently ignored.
stdout.flush()?;
}
Ok(())
}
Expand Down Expand Up @@ -440,8 +415,7 @@ fn head_backwards_without_seek_file(
input: &mut std::fs::File,
options: &HeadOptions,
) -> std::io::Result<()> {
let reader = &mut std::io::BufReader::with_capacity(BUF_SIZE, &*input);

let reader = std::io::BufReader::with_capacity(BUF_SIZE, &*input);
match options.mode {
Mode::AllButLastBytes(n) => read_but_last_n_bytes(reader, n)?,
Mode::AllButLastLines(n) => read_but_last_n_lines(reader, n, options.line_ending.into())?,
Expand Down
40 changes: 40 additions & 0 deletions tests/by-util/test_head.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
// file that was distributed with this source code.

// spell-checker:ignore (words) bogusfile emptyfile abcdefghijklmnopqrstuvwxyz abcdefghijklmnopqrstu
// spell-checker:ignore (words) seekable

use crate::common::util::TestScenario;

Expand Down Expand Up @@ -392,6 +393,45 @@ fn test_presume_input_pipe_5_chars() {
.stdout_is_fixture("lorem_ipsum_5_chars.expected");
}

#[test]
fn test_all_but_last_bytes_large_file_piped() {
// Validate print-all-but-last-n-bytes with a large piped-in (i.e. non-seekable) file.
let scene = TestScenario::new(util_name!());
let fixtures = &scene.fixtures;

// First, create all our fixtures.
let seq_30000_file_name = "seq_30000";
let seq_29000_file_name = "seq_29000";
let seq_29001_30000_file_name = "seq_29001_30000";
scene
.cmd("seq")
.arg("30000")
.set_stdout(fixtures.make_file(seq_30000_file_name))
.succeeds();
scene
.cmd("seq")
.arg("29000")
.set_stdout(fixtures.make_file(seq_29000_file_name))
.succeeds();
scene
.cmd("seq")
.args(&["29001", "30000"])
.set_stdout(fixtures.make_file(seq_29001_30000_file_name))
.succeeds();

let seq_29001_30000_file_length = fixtures
.open(seq_29001_30000_file_name)
.metadata()
.unwrap()
.len();
scene
.ucmd()
.args(&["-c", &format!("-{}", seq_29001_30000_file_length)])
.pipe_in_fixture(seq_30000_file_name)
.succeeds()
.stdout_only_fixture(seq_29000_file_name);
}

#[test]
fn test_read_backwards_lines_large_file() {
// Create our fixtures on the fly. We need the input file to be at least double
Expand Down

0 comments on commit 84b42a8

Please sign in to comment.