nix-rust · SUPERCILEX · Nov 6, 2022 · SUPERCILEX · Nov 12, 2022
diff --git a/Cargo.toml b/Cargo.toml
@@ -46,10 +46,12 @@ default = [
 
 acct = []
 aio = ["pin-utils"]
-dir = ["fs"]
+dents = ["file_type"]
+dir = ["fs", "file_type"]
 env = []
 event = []
 feature = []
+file_type = []
 fs = []
 hostname = []
 inotify = []

diff --git a/src/dents.rs b/src/dents.rs
@@ -0,0 +1,187 @@
+//! Raw directory iteration using Linux's getdents syscall
+
+use crate::errno::Errno;
+use crate::file_type::FileType;
+use std::cmp::max;
+use std::ffi::CStr;
+use std::mem::MaybeUninit;
+use std::os::unix::io::AsFd;
+use std::{mem, slice};
+
+/// A directory iterator implemented with getdents.
+///
+/// This implementation:
+/// - Excludes deleted inodes (with ID 0).
+/// - Does not handle growing the buffer. If this functionality is necessary,
+///   you'll need to drop the current iterator, resize the buffer, and then
+///   re-create the iterator. The iterator is guaranteed to continue where it
+///   left off provided the file descriptor isn't changed. See the example in
+///   [`RawDir::new`].
+#[derive(Debug)]
+pub struct RawDir<'buf, Fd: AsFd> {
+    fd: Fd,
+    buf: &'buf mut [MaybeUninit<u8>],
+    initialized: usize,
+    offset: usize,
+}
+
+impl<'buf, Fd: AsFd> RawDir<'buf, Fd> {
+    /// Create a new iterator from the given file descriptor and buffer.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # use std::mem::MaybeUninit;
+    /// # use std::os::unix::io::{AsFd, FromRawFd, OwnedFd};
+    /// # use nix::dents::RawDir;
+    /// # use nix::errno::Errno;
+    /// # use nix::fcntl::{OFlag, open, openat};
+    /// # use nix::sys::stat::Mode;
+    ///
+    /// let fd = open(".", OFlag::O_RDONLY | OFlag::O_DIRECTORY, Mode::empty()).unwrap();
+    /// let fd = unsafe { OwnedFd::from_raw_fd(fd) };
+    ///
+    /// let mut buf = [MaybeUninit::uninit(); 2048];
+    ///
+    /// for entry in RawDir::new(fd, &mut buf) {
+    ///     let entry = entry.unwrap();
+    ///     dbg!(&entry);
+    /// }
+    /// ```
+    ///
+    /// Contrived example that demonstrates reading entries with arbitrarily large file paths:
+    ///
+    /// ```
+    /// # use std::cmp::max;
+    /// # use std::mem::MaybeUninit;
+    /// # use std::os::unix::io::{AsFd, FromRawFd, OwnedFd};
+    /// # use nix::dents::RawDir;
+    /// # use nix::errno::Errno;
+    /// # use nix::fcntl::{OFlag, open, openat};
+    /// # use nix::sys::stat::Mode;
+    ///
+    /// let fd = open(".", OFlag::O_RDONLY | OFlag::O_DIRECTORY, Mode::empty()).unwrap();
+    /// let fd = unsafe { OwnedFd::from_raw_fd(fd) };
+    ///
+    /// // DO NOT DO THIS. Use `Vec::with_capacity` to at least start the buffer
+    /// // off with *some* space.
+    /// let mut buf = Vec::new();
+    ///
+    /// 'read: loop {
+    ///     'resize: {
+    ///         for entry in RawDir::new(&fd, buf.spare_capacity_mut()) {
+    ///             let entry = match entry {
+    ///                 Err(Errno::EINVAL) => break 'resize,
+    ///                 r => r.unwrap(),
+    ///             };
+    ///             dbg!(&entry);
+    ///         }
+    ///         break 'read;
+    ///     }
+    ///
+    ///     let new_capacity = max(buf.capacity() * 2, 1);
+    ///     buf.reserve(new_capacity);
+    /// }
+    /// ```
+    ///
+    /// Note that this is horribly inefficient as we'll most likely end up doing ~1 syscall per file.
+    pub fn new(fd: Fd, buf: &'buf mut [MaybeUninit<u8>]) -> Self {
+        Self {
+            fd,
+            buf,
+            initialized: 0,
+            offset: 0,
+        }
+    }
+}
+
+/// A raw directory entry, similar to `std::fs::DirEntry`.
+///
+/// Note that unlike the std version, this may represent the `.` or `..` entries.
+#[derive(Debug)]
+#[allow(missing_docs)]
+pub struct RawDirEntry<'a> {
+    pub inode_number: u64,
+    pub file_type: FileType,
+    pub name: &'a CStr,
+}
+
+#[repr(C, packed)]
+struct dirent64 {
+    d_ino: libc::ino64_t,
+    d_off: libc::off64_t,
+    d_reclen: libc::c_ushort,
+    d_type: libc::c_uchar,
+}
+
+impl<'buf, Fd: AsFd> Iterator for RawDir<'buf, Fd> {
+    type Item = Result<RawDirEntry<'buf>, Errno>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        loop {
+            if self.offset < self.initialized {
+                let dirent_ptr =
+                    &self.buf[self.offset] as *const MaybeUninit<u8>;
+                // Trust the kernel to use proper alignment
+                #[allow(clippy::cast_ptr_alignment)]
+                let dirent = unsafe { &*dirent_ptr.cast::<dirent64>() };
+
+                self.offset += dirent.d_reclen as usize;
+                if dirent.d_ino == 0 {
+                    continue;
+                }
+
+                return Some(Ok(RawDirEntry {
+                    inode_number: dirent.d_ino,
+                    file_type: FileType::from(dirent.d_type),
+                    name: unsafe {
+                        let name_start =
+                            dirent_ptr.add(mem::size_of::<dirent64>());
+                        let mut name_end = {
+                            // Find the last aligned byte of the file name so we can
+                            // start searching for NUL bytes. If we started searching
+                            // from the back, we would run into garbage left over from
+                            // previous iterations.
+                            // TODO use .map_addr() once strict_provenance is stable
+                            let addr = max(
+                                name_start as usize,
+                                dirent_ptr.add(dirent.d_reclen as usize - 1)
+                                    as usize
+                                    & !(mem::size_of::<usize>() - 1),
+                            );
+                            addr as *const u8
+                        };
+
+                        while *name_end != 0 {
+                            name_end = name_end.add(1);
+                        }
+
+                        CStr::from_bytes_with_nul_unchecked(
+                            slice::from_raw_parts(
+                                name_start.cast::<u8>(),
+                                // Add 1 for the NUL byte
+                                // TODO use .addr() once strict_provenance is stable
+                                name_end as usize - name_start as usize + 1,
+                            ),
+                        )
+                    },
+                }));
+            }
+            self.initialized = 0;
+            self.offset = 0;
+
+            match unsafe {
+                Errno::result(libc::syscall(
+                    libc::SYS_getdents64,
+                    self.fd.as_fd(),
+                    self.buf.as_mut_ptr(),
+                    self.buf.len(),
+                ))
+            } {
+                Ok(bytes_read) if bytes_read == 0 => return None,
+                Ok(bytes_read) => self.initialized = bytes_read as usize,
+                Err(e) => return Some(Err(e)),
+            }
+        }
+    }
+}
diff --git a/src/dir.rs b/src/dir.rs
@@ -2,6 +2,7 @@
 
 use crate::errno::Errno;
 use crate::fcntl::{self, OFlag};
+pub use crate::file_type::FileType as Type;
 use crate::sys;
 use crate::{Error, NixPath, Result};
 use cfg_if::cfg_if;
@@ -195,25 +196,6 @@ impl IntoIterator for Dir {
 #[repr(transparent)]
 pub struct Entry(dirent);
 
-/// Type of file referenced by a directory entry
-#[derive(Copy, Clone, Debug, Eq, Hash, PartialEq)]
-pub enum Type {
-    /// FIFO (Named pipe)
-    Fifo,
-    /// Character device
-    CharacterDevice,
-    /// Directory
-    Directory,
-    /// Block device
-    BlockDevice,
-    /// Regular file
-    File,
-    /// Symbolic link
-    Symlink,
-    /// Unix-domain socket
-    Socket,
-}
-
 impl Entry {
     /// Returns the inode number (`d_ino`) of the underlying `dirent`.
     #[allow(clippy::useless_conversion)] // Not useless on all OSes
@@ -240,37 +222,31 @@ impl Entry {
 
     /// Returns the bare file name of this directory entry without any other leading path component.
     pub fn file_name(&self) -> &ffi::CStr {
-        unsafe { ::std::ffi::CStr::from_ptr(self.0.d_name.as_ptr()) }
+        unsafe { ffi::CStr::from_ptr(self.0.d_name.as_ptr()) }
     }
 
     /// Returns the type of this directory entry, if known.
     ///
     /// See platform `readdir(3)` or `dirent(5)` manpage for when the file type is known;
     /// notably, some Linux filesystems don't implement this. The caller should use `stat` or
     /// `fstat` if this returns `None`.
-    pub fn file_type(&self) -> Option<Type> {
+    pub fn file_type(&self) -> Type {
         #[cfg(not(any(
             target_os = "illumos",
             target_os = "solaris",
             target_os = "haiku"
         )))]
-        match self.0.d_type {
-            libc::DT_FIFO => Some(Type::Fifo),
-            libc::DT_CHR => Some(Type::CharacterDevice),
-            libc::DT_DIR => Some(Type::Directory),
-            libc::DT_BLK => Some(Type::BlockDevice),
-            libc::DT_REG => Some(Type::File),
-            libc::DT_LNK => Some(Type::Symlink),
-            libc::DT_SOCK => Some(Type::Socket),
-            /* libc::DT_UNKNOWN | */ _ => None,
+        {
+            Type::from(self.0.d_type)
         }
 
-        // illumos, Solaris, and Haiku systems do not have the d_type member at all:
         #[cfg(any(
             target_os = "illumos",
             target_os = "solaris",
             target_os = "haiku"
         ))]
-        None
+        {
+            Type::Unknown
+        }
     }
 }
diff --git a/src/file_type.rs b/src/file_type.rs
@@ -0,0 +1,37 @@
+//! File type conversion utilities
+
+/// Type of file referenced by a directory entry
+#[derive(Copy, Clone, Debug, Eq, Hash, PartialEq)]
+pub enum FileType {
+    /// FIFO (Named pipe)
+    Fifo,
+    /// Character device
+    CharacterDevice,
+    /// Directory
+    Directory,
+    /// Block device
+    BlockDevice,
+    /// Regular file
+    File,
+    /// Symbolic link
+    Symlink,
+    /// Unix-domain socket
+    Socket,
+    /// Unknown
+    Unknown,
+}
+
+impl From<libc::c_uchar> for FileType {
+    fn from(value: libc::c_uchar) -> Self {
+        match value {
+            libc::DT_FIFO => Self::Fifo,
+            libc::DT_CHR => Self::CharacterDevice,
+            libc::DT_DIR => Self::Directory,
+            libc::DT_BLK => Self::BlockDevice,
+            libc::DT_REG => Self::File,
+            libc::DT_LNK => Self::Symlink,
+            libc::DT_SOCK => Self::Socket,
+            /* libc::DT_UNKNOWN | */ _ => Self::Unknown,
+        }
+    }
+}
diff --git a/src/lib.rs b/src/lib.rs
@@ -9,6 +9,7 @@
 //! They may be enabled in any combination.
 //! * `acct` - Process accounting
 //! * `aio` - POSIX AIO
+//! * `dents` - Raw directory iteration using Linux's getdents syscall
 //! * `dir` - Stuff relating to directory iteration
 //! * `env` - Manipulate environment variables
 //! * `event` - Event-driven APIs, like `kqueue` and `epoll`
@@ -63,6 +64,11 @@ pub use libc;
 mod macros;
 
 // Public crates
+#[cfg(target_os = "linux")]
+feature! {
+    #![feature = "dents"]
+    pub mod dents;
+}
 #[cfg(not(target_os = "redox"))]
 feature! {
     #![feature = "dir"]
@@ -80,6 +86,10 @@ feature! {
     #[deny(missing_docs)]
     pub mod features;
 }
+feature! {
+    #![feature = "file_type"]
+    pub mod file_type;
+}
 #[allow(missing_docs)]
 pub mod fcntl;
 feature! {

diff --git a/test/test_dir.rs b/test/test_dir.rs
@@ -32,10 +32,10 @@ fn read() {
 
     // Check file types. The system is allowed to return DT_UNKNOWN (aka None here) but if it does
     // return a type, ensure it's correct.
-    assert!(&[Some(Type::Directory), None].contains(&entries[0].file_type())); // .: dir
-    assert!(&[Some(Type::Directory), None].contains(&entries[1].file_type())); // ..: dir
-    assert!(&[Some(Type::Symlink), None].contains(&entries[2].file_type())); // bar: symlink
-    assert!(&[Some(Type::File), None].contains(&entries[3].file_type())); // foo: regular file
+    assert!(&[Type::Directory, Type::Unknown].contains(&entries[0].file_type())); // .: dir
+    assert!(&[Type::Directory, Type::Unknown].contains(&entries[1].file_type())); // ..: dir
+    assert!(&[Type::Symlink, Type::Unknown].contains(&entries[2].file_type())); // bar: symlink
+    assert!(&[Type::File, Type::Unknown].contains(&entries[3].file_type())); // foo: regular file
 }
 
 #[test]