Skip to content

Add zero-compromise directory iteration using getdents64 #1856

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,12 @@ default = [

acct = []
aio = ["pin-utils"]
dir = ["fs"]
dents = ["file_type"]
dir = ["fs", "file_type"]
env = []
event = []
feature = []
file_type = []
fs = []
hostname = []
inotify = []
Expand Down
187 changes: 187 additions & 0 deletions src/dents.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
//! Raw directory iteration using Linux's getdents syscall

use crate::errno::Errno;
use crate::file_type::FileType;
use std::cmp::max;
use std::ffi::CStr;
use std::mem::MaybeUninit;
use std::os::unix::io::AsFd;
use std::{mem, slice};

/// A directory iterator implemented with getdents.
///
/// This implementation:
/// - Excludes deleted inodes (with ID 0).
/// - Does not handle growing the buffer. If this functionality is necessary,
/// you'll need to drop the current iterator, resize the buffer, and then
/// re-create the iterator. The iterator is guaranteed to continue where it
/// left off provided the file descriptor isn't changed. See the example in
/// [`RawDir::new`].
#[derive(Debug)]
pub struct RawDir<'buf, Fd: AsFd> {
fd: Fd,
buf: &'buf mut [MaybeUninit<u8>],
initialized: usize,
offset: usize,
}

impl<'buf, Fd: AsFd> RawDir<'buf, Fd> {
/// Create a new iterator from the given file descriptor and buffer.
///
/// # Examples
///
/// ```
/// # use std::mem::MaybeUninit;
/// # use std::os::unix::io::{AsFd, FromRawFd, OwnedFd};
/// # use nix::dents::RawDir;
/// # use nix::errno::Errno;
/// # use nix::fcntl::{OFlag, open, openat};
/// # use nix::sys::stat::Mode;
///
/// let fd = open(".", OFlag::O_RDONLY | OFlag::O_DIRECTORY, Mode::empty()).unwrap();
/// let fd = unsafe { OwnedFd::from_raw_fd(fd) };
///
/// let mut buf = [MaybeUninit::uninit(); 2048];
///
/// for entry in RawDir::new(fd, &mut buf) {
/// let entry = entry.unwrap();
/// dbg!(&entry);
/// }
/// ```
///
/// Contrived example that demonstrates reading entries with arbitrarily large file paths:
///
/// ```
/// # use std::cmp::max;
/// # use std::mem::MaybeUninit;
/// # use std::os::unix::io::{AsFd, FromRawFd, OwnedFd};
/// # use nix::dents::RawDir;
/// # use nix::errno::Errno;
/// # use nix::fcntl::{OFlag, open, openat};
/// # use nix::sys::stat::Mode;
///
/// let fd = open(".", OFlag::O_RDONLY | OFlag::O_DIRECTORY, Mode::empty()).unwrap();
/// let fd = unsafe { OwnedFd::from_raw_fd(fd) };
///
/// // DO NOT DO THIS. Use `Vec::with_capacity` to at least start the buffer
/// // off with *some* space.
/// let mut buf = Vec::new();
///
/// 'read: loop {
/// 'resize: {
/// for entry in RawDir::new(&fd, buf.spare_capacity_mut()) {
/// let entry = match entry {
/// Err(Errno::EINVAL) => break 'resize,
/// r => r.unwrap(),
/// };
/// dbg!(&entry);
/// }
/// break 'read;
/// }
///
/// let new_capacity = max(buf.capacity() * 2, 1);
/// buf.reserve(new_capacity);
/// }
/// ```
///
/// Note that this is horribly inefficient as we'll most likely end up doing ~1 syscall per file.
pub fn new(fd: Fd, buf: &'buf mut [MaybeUninit<u8>]) -> Self {
Self {
fd,
buf,
initialized: 0,
offset: 0,
}
}
}

/// A raw directory entry, similar to `std::fs::DirEntry`.
///
/// Note that unlike the std version, this may represent the `.` or `..` entries.
#[derive(Debug)]
#[allow(missing_docs)]
pub struct RawDirEntry<'a> {
pub inode_number: u64,
pub file_type: FileType,
pub name: &'a CStr,
}

#[repr(C, packed)]
struct dirent64 {
d_ino: libc::ino64_t,
d_off: libc::off64_t,
d_reclen: libc::c_ushort,
d_type: libc::c_uchar,
}

impl<'buf, Fd: AsFd> Iterator for RawDir<'buf, Fd> {
type Item = Result<RawDirEntry<'buf>, Errno>;

fn next(&mut self) -> Option<Self::Item> {
loop {
if self.offset < self.initialized {
let dirent_ptr =
&self.buf[self.offset] as *const MaybeUninit<u8>;
// Trust the kernel to use proper alignment
#[allow(clippy::cast_ptr_alignment)]
let dirent = unsafe { &*dirent_ptr.cast::<dirent64>() };

self.offset += dirent.d_reclen as usize;
if dirent.d_ino == 0 {
continue;
}

return Some(Ok(RawDirEntry {
inode_number: dirent.d_ino,
file_type: FileType::from(dirent.d_type),
name: unsafe {
let name_start =
dirent_ptr.add(mem::size_of::<dirent64>());
let mut name_end = {
// Find the last aligned byte of the file name so we can
// start searching for NUL bytes. If we started searching
// from the back, we would run into garbage left over from
// previous iterations.
// TODO use .map_addr() once strict_provenance is stable
let addr = max(
name_start as usize,
dirent_ptr.add(dirent.d_reclen as usize - 1)
as usize
& !(mem::size_of::<usize>() - 1),
);
addr as *const u8
};

while *name_end != 0 {
name_end = name_end.add(1);
}

CStr::from_bytes_with_nul_unchecked(
slice::from_raw_parts(
name_start.cast::<u8>(),
// Add 1 for the NUL byte
// TODO use .addr() once strict_provenance is stable
name_end as usize - name_start as usize + 1,
),
)
},
}));
}
self.initialized = 0;
self.offset = 0;

match unsafe {
Errno::result(libc::syscall(
libc::SYS_getdents64,
self.fd.as_fd(),
self.buf.as_mut_ptr(),
self.buf.len(),
))
} {
Ok(bytes_read) if bytes_read == 0 => return None,
Ok(bytes_read) => self.initialized = bytes_read as usize,
Err(e) => return Some(Err(e)),
}
}
}
}
40 changes: 8 additions & 32 deletions src/dir.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

use crate::errno::Errno;
use crate::fcntl::{self, OFlag};
pub use crate::file_type::FileType as Type;
use crate::sys;
use crate::{Error, NixPath, Result};
use cfg_if::cfg_if;
Expand Down Expand Up @@ -195,25 +196,6 @@ impl IntoIterator for Dir {
#[repr(transparent)]
pub struct Entry(dirent);

/// Type of file referenced by a directory entry
#[derive(Copy, Clone, Debug, Eq, Hash, PartialEq)]
pub enum Type {
/// FIFO (Named pipe)
Fifo,
/// Character device
CharacterDevice,
/// Directory
Directory,
/// Block device
BlockDevice,
/// Regular file
File,
/// Symbolic link
Symlink,
/// Unix-domain socket
Socket,
}

impl Entry {
/// Returns the inode number (`d_ino`) of the underlying `dirent`.
#[allow(clippy::useless_conversion)] // Not useless on all OSes
Expand All @@ -240,37 +222,31 @@ impl Entry {

/// Returns the bare file name of this directory entry without any other leading path component.
pub fn file_name(&self) -> &ffi::CStr {
unsafe { ::std::ffi::CStr::from_ptr(self.0.d_name.as_ptr()) }
unsafe { ffi::CStr::from_ptr(self.0.d_name.as_ptr()) }
}

/// Returns the type of this directory entry, if known.
///
/// See platform `readdir(3)` or `dirent(5)` manpage for when the file type is known;
/// notably, some Linux filesystems don't implement this. The caller should use `stat` or
/// `fstat` if this returns `None`.
pub fn file_type(&self) -> Option<Type> {
pub fn file_type(&self) -> Type {
Copy link
Contributor Author

@SUPERCILEX SUPERCILEX Nov 12, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the policy on breaking changes like this? Option<Type> -> Type

I could undo this by matching on Unknown and returning None.

#[cfg(not(any(
target_os = "illumos",
target_os = "solaris",
target_os = "haiku"
)))]
match self.0.d_type {
libc::DT_FIFO => Some(Type::Fifo),
libc::DT_CHR => Some(Type::CharacterDevice),
libc::DT_DIR => Some(Type::Directory),
libc::DT_BLK => Some(Type::BlockDevice),
libc::DT_REG => Some(Type::File),
libc::DT_LNK => Some(Type::Symlink),
libc::DT_SOCK => Some(Type::Socket),
/* libc::DT_UNKNOWN | */ _ => None,
{
Type::from(self.0.d_type)
}

// illumos, Solaris, and Haiku systems do not have the d_type member at all:
#[cfg(any(
target_os = "illumos",
target_os = "solaris",
target_os = "haiku"
))]
None
{
Type::Unknown
}
}
}
37 changes: 37 additions & 0 deletions src/file_type.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
//! File type conversion utilities

/// Type of file referenced by a directory entry
#[derive(Copy, Clone, Debug, Eq, Hash, PartialEq)]
pub enum FileType {
/// FIFO (Named pipe)
Fifo,
/// Character device
CharacterDevice,
/// Directory
Directory,
/// Block device
BlockDevice,
/// Regular file
File,
/// Symbolic link
Symlink,
/// Unix-domain socket
Socket,
/// Unknown
Unknown,
}

impl From<libc::c_uchar> for FileType {
fn from(value: libc::c_uchar) -> Self {
match value {
libc::DT_FIFO => Self::Fifo,
libc::DT_CHR => Self::CharacterDevice,
libc::DT_DIR => Self::Directory,
libc::DT_BLK => Self::BlockDevice,
libc::DT_REG => Self::File,
libc::DT_LNK => Self::Symlink,
libc::DT_SOCK => Self::Socket,
/* libc::DT_UNKNOWN | */ _ => Self::Unknown,
}
}
}
10 changes: 10 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
//! They may be enabled in any combination.
//! * `acct` - Process accounting
//! * `aio` - POSIX AIO
//! * `dents` - Raw directory iteration using Linux's getdents syscall
//! * `dir` - Stuff relating to directory iteration
//! * `env` - Manipulate environment variables
//! * `event` - Event-driven APIs, like `kqueue` and `epoll`
Expand Down Expand Up @@ -63,6 +64,11 @@ pub use libc;
mod macros;

// Public crates
#[cfg(target_os = "linux")]
feature! {
#![feature = "dents"]
pub mod dents;
}
#[cfg(not(target_os = "redox"))]
feature! {
#![feature = "dir"]
Expand All @@ -80,6 +86,10 @@ feature! {
#[deny(missing_docs)]
pub mod features;
}
feature! {
#![feature = "file_type"]
pub mod file_type;
}
#[allow(missing_docs)]
pub mod fcntl;
feature! {
Expand Down
8 changes: 4 additions & 4 deletions test/test_dir.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,10 @@ fn read() {

// Check file types. The system is allowed to return DT_UNKNOWN (aka None here) but if it does
// return a type, ensure it's correct.
assert!(&[Some(Type::Directory), None].contains(&entries[0].file_type())); // .: dir
assert!(&[Some(Type::Directory), None].contains(&entries[1].file_type())); // ..: dir
assert!(&[Some(Type::Symlink), None].contains(&entries[2].file_type())); // bar: symlink
assert!(&[Some(Type::File), None].contains(&entries[3].file_type())); // foo: regular file
assert!(&[Type::Directory, Type::Unknown].contains(&entries[0].file_type())); // .: dir
assert!(&[Type::Directory, Type::Unknown].contains(&entries[1].file_type())); // ..: dir
assert!(&[Type::Symlink, Type::Unknown].contains(&entries[2].file_type())); // bar: symlink
assert!(&[Type::File, Type::Unknown].contains(&entries[3].file_type())); // foo: regular file
}

#[test]
Expand Down