From 429dfa6cc4072158d1fefc41a46b437bb8559aee Mon Sep 17 00:00:00 2001 From: Alex Saveau Date: Sun, 6 Nov 2022 11:39:42 -0800 Subject: [PATCH] Add zero-compromise directory iteration using getdents64 Signed-off-by: Alex Saveau --- Cargo.toml | 4 +- src/dents.rs | 187 +++++++++++++++++++++++++++++++++++++++++++++++ src/dir.rs | 40 ++-------- src/file_type.rs | 37 ++++++++++ src/lib.rs | 10 +++ test/test_dir.rs | 8 +- 6 files changed, 249 insertions(+), 37 deletions(-) create mode 100644 src/dents.rs create mode 100644 src/file_type.rs diff --git a/Cargo.toml b/Cargo.toml index e09f758a25..873208833e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -46,10 +46,12 @@ default = [ acct = [] aio = ["pin-utils"] -dir = ["fs"] +dents = ["file_type"] +dir = ["fs", "file_type"] env = [] event = [] feature = [] +file_type = [] fs = [] hostname = [] inotify = [] diff --git a/src/dents.rs b/src/dents.rs new file mode 100644 index 0000000000..62538792c5 --- /dev/null +++ b/src/dents.rs @@ -0,0 +1,187 @@ +//! Raw directory iteration using Linux's getdents syscall + +use crate::errno::Errno; +use crate::file_type::FileType; +use std::cmp::max; +use std::ffi::CStr; +use std::mem::MaybeUninit; +use std::os::unix::io::AsFd; +use std::{mem, slice}; + +/// A directory iterator implemented with getdents. +/// +/// This implementation: +/// - Excludes deleted inodes (with ID 0). +/// - Does not handle growing the buffer. If this functionality is necessary, +/// you'll need to drop the current iterator, resize the buffer, and then +/// re-create the iterator. The iterator is guaranteed to continue where it +/// left off provided the file descriptor isn't changed. See the example in +/// [`RawDir::new`]. +#[derive(Debug)] +pub struct RawDir<'buf, Fd: AsFd> { + fd: Fd, + buf: &'buf mut [MaybeUninit], + initialized: usize, + offset: usize, +} + +impl<'buf, Fd: AsFd> RawDir<'buf, Fd> { + /// Create a new iterator from the given file descriptor and buffer. + /// + /// # Examples + /// + /// ``` + /// # use std::mem::MaybeUninit; + /// # use std::os::unix::io::{AsFd, FromRawFd, OwnedFd}; + /// # use nix::dents::RawDir; + /// # use nix::errno::Errno; + /// # use nix::fcntl::{OFlag, open, openat}; + /// # use nix::sys::stat::Mode; + /// + /// let fd = open(".", OFlag::O_RDONLY | OFlag::O_DIRECTORY, Mode::empty()).unwrap(); + /// let fd = unsafe { OwnedFd::from_raw_fd(fd) }; + /// + /// let mut buf = [MaybeUninit::uninit(); 2048]; + /// + /// for entry in RawDir::new(fd, &mut buf) { + /// let entry = entry.unwrap(); + /// dbg!(&entry); + /// } + /// ``` + /// + /// Contrived example that demonstrates reading entries with arbitrarily large file paths: + /// + /// ``` + /// # use std::cmp::max; + /// # use std::mem::MaybeUninit; + /// # use std::os::unix::io::{AsFd, FromRawFd, OwnedFd}; + /// # use nix::dents::RawDir; + /// # use nix::errno::Errno; + /// # use nix::fcntl::{OFlag, open, openat}; + /// # use nix::sys::stat::Mode; + /// + /// let fd = open(".", OFlag::O_RDONLY | OFlag::O_DIRECTORY, Mode::empty()).unwrap(); + /// let fd = unsafe { OwnedFd::from_raw_fd(fd) }; + /// + /// // DO NOT DO THIS. Use `Vec::with_capacity` to at least start the buffer + /// // off with *some* space. + /// let mut buf = Vec::new(); + /// + /// 'read: loop { + /// 'resize: { + /// for entry in RawDir::new(&fd, buf.spare_capacity_mut()) { + /// let entry = match entry { + /// Err(Errno::EINVAL) => break 'resize, + /// r => r.unwrap(), + /// }; + /// dbg!(&entry); + /// } + /// break 'read; + /// } + /// + /// let new_capacity = max(buf.capacity() * 2, 1); + /// buf.reserve(new_capacity); + /// } + /// ``` + /// + /// Note that this is horribly inefficient as we'll most likely end up doing ~1 syscall per file. + pub fn new(fd: Fd, buf: &'buf mut [MaybeUninit]) -> Self { + Self { + fd, + buf, + initialized: 0, + offset: 0, + } + } +} + +/// A raw directory entry, similar to `std::fs::DirEntry`. +/// +/// Note that unlike the std version, this may represent the `.` or `..` entries. +#[derive(Debug)] +#[allow(missing_docs)] +pub struct RawDirEntry<'a> { + pub inode_number: u64, + pub file_type: FileType, + pub name: &'a CStr, +} + +#[repr(C, packed)] +struct dirent64 { + d_ino: libc::ino64_t, + d_off: libc::off64_t, + d_reclen: libc::c_ushort, + d_type: libc::c_uchar, +} + +impl<'buf, Fd: AsFd> Iterator for RawDir<'buf, Fd> { + type Item = Result, Errno>; + + fn next(&mut self) -> Option { + loop { + if self.offset < self.initialized { + let dirent_ptr = + &self.buf[self.offset] as *const MaybeUninit; + // Trust the kernel to use proper alignment + #[allow(clippy::cast_ptr_alignment)] + let dirent = unsafe { &*dirent_ptr.cast::() }; + + self.offset += dirent.d_reclen as usize; + if dirent.d_ino == 0 { + continue; + } + + return Some(Ok(RawDirEntry { + inode_number: dirent.d_ino, + file_type: FileType::from(dirent.d_type), + name: unsafe { + let name_start = + dirent_ptr.add(mem::size_of::()); + let mut name_end = { + // Find the last aligned byte of the file name so we can + // start searching for NUL bytes. If we started searching + // from the back, we would run into garbage left over from + // previous iterations. + // TODO use .map_addr() once strict_provenance is stable + let addr = max( + name_start as usize, + dirent_ptr.add(dirent.d_reclen as usize - 1) + as usize + & !(mem::size_of::() - 1), + ); + addr as *const u8 + }; + + while *name_end != 0 { + name_end = name_end.add(1); + } + + CStr::from_bytes_with_nul_unchecked( + slice::from_raw_parts( + name_start.cast::(), + // Add 1 for the NUL byte + // TODO use .addr() once strict_provenance is stable + name_end as usize - name_start as usize + 1, + ), + ) + }, + })); + } + self.initialized = 0; + self.offset = 0; + + match unsafe { + Errno::result(libc::syscall( + libc::SYS_getdents64, + self.fd.as_fd(), + self.buf.as_mut_ptr(), + self.buf.len(), + )) + } { + Ok(bytes_read) if bytes_read == 0 => return None, + Ok(bytes_read) => self.initialized = bytes_read as usize, + Err(e) => return Some(Err(e)), + } + } + } +} diff --git a/src/dir.rs b/src/dir.rs index 5ce503644e..cfecc06e5f 100644 --- a/src/dir.rs +++ b/src/dir.rs @@ -2,6 +2,7 @@ use crate::errno::Errno; use crate::fcntl::{self, OFlag}; +pub use crate::file_type::FileType as Type; use crate::sys; use crate::{Error, NixPath, Result}; use cfg_if::cfg_if; @@ -195,25 +196,6 @@ impl IntoIterator for Dir { #[repr(transparent)] pub struct Entry(dirent); -/// Type of file referenced by a directory entry -#[derive(Copy, Clone, Debug, Eq, Hash, PartialEq)] -pub enum Type { - /// FIFO (Named pipe) - Fifo, - /// Character device - CharacterDevice, - /// Directory - Directory, - /// Block device - BlockDevice, - /// Regular file - File, - /// Symbolic link - Symlink, - /// Unix-domain socket - Socket, -} - impl Entry { /// Returns the inode number (`d_ino`) of the underlying `dirent`. #[allow(clippy::useless_conversion)] // Not useless on all OSes @@ -240,7 +222,7 @@ impl Entry { /// Returns the bare file name of this directory entry without any other leading path component. pub fn file_name(&self) -> &ffi::CStr { - unsafe { ::std::ffi::CStr::from_ptr(self.0.d_name.as_ptr()) } + unsafe { ffi::CStr::from_ptr(self.0.d_name.as_ptr()) } } /// Returns the type of this directory entry, if known. @@ -248,29 +230,23 @@ impl Entry { /// See platform `readdir(3)` or `dirent(5)` manpage for when the file type is known; /// notably, some Linux filesystems don't implement this. The caller should use `stat` or /// `fstat` if this returns `None`. - pub fn file_type(&self) -> Option { + pub fn file_type(&self) -> Type { #[cfg(not(any( target_os = "illumos", target_os = "solaris", target_os = "haiku" )))] - match self.0.d_type { - libc::DT_FIFO => Some(Type::Fifo), - libc::DT_CHR => Some(Type::CharacterDevice), - libc::DT_DIR => Some(Type::Directory), - libc::DT_BLK => Some(Type::BlockDevice), - libc::DT_REG => Some(Type::File), - libc::DT_LNK => Some(Type::Symlink), - libc::DT_SOCK => Some(Type::Socket), - /* libc::DT_UNKNOWN | */ _ => None, + { + Type::from(self.0.d_type) } - // illumos, Solaris, and Haiku systems do not have the d_type member at all: #[cfg(any( target_os = "illumos", target_os = "solaris", target_os = "haiku" ))] - None + { + Type::Unknown + } } } diff --git a/src/file_type.rs b/src/file_type.rs new file mode 100644 index 0000000000..202b0c679e --- /dev/null +++ b/src/file_type.rs @@ -0,0 +1,37 @@ +//! File type conversion utilities + +/// Type of file referenced by a directory entry +#[derive(Copy, Clone, Debug, Eq, Hash, PartialEq)] +pub enum FileType { + /// FIFO (Named pipe) + Fifo, + /// Character device + CharacterDevice, + /// Directory + Directory, + /// Block device + BlockDevice, + /// Regular file + File, + /// Symbolic link + Symlink, + /// Unix-domain socket + Socket, + /// Unknown + Unknown, +} + +impl From for FileType { + fn from(value: libc::c_uchar) -> Self { + match value { + libc::DT_FIFO => Self::Fifo, + libc::DT_CHR => Self::CharacterDevice, + libc::DT_DIR => Self::Directory, + libc::DT_BLK => Self::BlockDevice, + libc::DT_REG => Self::File, + libc::DT_LNK => Self::Symlink, + libc::DT_SOCK => Self::Socket, + /* libc::DT_UNKNOWN | */ _ => Self::Unknown, + } + } +} diff --git a/src/lib.rs b/src/lib.rs index 6b82125761..1f3c594b26 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -9,6 +9,7 @@ //! They may be enabled in any combination. //! * `acct` - Process accounting //! * `aio` - POSIX AIO +//! * `dents` - Raw directory iteration using Linux's getdents syscall //! * `dir` - Stuff relating to directory iteration //! * `env` - Manipulate environment variables //! * `event` - Event-driven APIs, like `kqueue` and `epoll` @@ -63,6 +64,11 @@ pub use libc; mod macros; // Public crates +#[cfg(target_os = "linux")] +feature! { + #![feature = "dents"] + pub mod dents; +} #[cfg(not(target_os = "redox"))] feature! { #![feature = "dir"] @@ -80,6 +86,10 @@ feature! { #[deny(missing_docs)] pub mod features; } +feature! { + #![feature = "file_type"] + pub mod file_type; +} #[allow(missing_docs)] pub mod fcntl; feature! { diff --git a/test/test_dir.rs b/test/test_dir.rs index 2af4aa5c0a..95cf773a0b 100644 --- a/test/test_dir.rs +++ b/test/test_dir.rs @@ -32,10 +32,10 @@ fn read() { // Check file types. The system is allowed to return DT_UNKNOWN (aka None here) but if it does // return a type, ensure it's correct. - assert!(&[Some(Type::Directory), None].contains(&entries[0].file_type())); // .: dir - assert!(&[Some(Type::Directory), None].contains(&entries[1].file_type())); // ..: dir - assert!(&[Some(Type::Symlink), None].contains(&entries[2].file_type())); // bar: symlink - assert!(&[Some(Type::File), None].contains(&entries[3].file_type())); // foo: regular file + assert!(&[Type::Directory, Type::Unknown].contains(&entries[0].file_type())); // .: dir + assert!(&[Type::Directory, Type::Unknown].contains(&entries[1].file_type())); // ..: dir + assert!(&[Type::Symlink, Type::Unknown].contains(&entries[2].file_type())); // bar: symlink + assert!(&[Type::File, Type::Unknown].contains(&entries[3].file_type())); // foo: regular file } #[test]