Skip to content

Commit 4635801

Browse files
committed
Add zero-compromise directory iteration using getdents64
Signed-off-by: Alex Saveau <[email protected]>
1 parent 126112c commit 4635801

File tree

6 files changed

+289
-6
lines changed

6 files changed

+289
-6
lines changed

Cargo.toml

+3-1
Original file line numberDiff line numberDiff line change
@@ -46,10 +46,12 @@ default = [
4646

4747
acct = []
4848
aio = ["pin-utils"]
49-
dir = ["fs"]
49+
dents = ["file_type"]
50+
dir = ["fs", "file_type"]
5051
env = []
5152
event = []
5253
feature = []
54+
file_type = []
5355
fs = []
5456
hostname = []
5557
inotify = []

src/dents.rs

+187
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,187 @@
1+
//! Raw directory iteration using Linux's getdents syscall
2+
3+
use crate::errno::Errno;
4+
use crate::file_type::Type;
5+
use std::cmp::max;
6+
use std::ffi::CStr;
7+
use std::mem::MaybeUninit;
8+
use std::os::fd::BorrowedFd;
9+
use std::{mem, slice};
10+
11+
/// A directory iterator implemented with getdents.
12+
///
13+
/// This implementation:
14+
/// - Excludes deleted inodes (with ID 0).
15+
/// - Does not handle growing the buffer. If this functionality is necessary,
16+
/// you'll need to drop the current iterator, resize the buffer, and then
17+
/// re-create the iterator. The iterator is guaranteed to continue where it
18+
/// left off provided the file descriptor isn't changed. See the example in
19+
/// [`RawDir::new`].
20+
#[derive(Debug)]
21+
pub struct RawDir<'a, 'b> {
22+
fd: BorrowedFd<'a>,
23+
buf: &'b mut [MaybeUninit<u8>],
24+
initialized: usize,
25+
offset: usize,
26+
}
27+
28+
impl<'a, 'b> RawDir<'a, 'b> {
29+
/// Create a new iterator from the given file descriptor and buffer.
30+
///
31+
/// # Examples
32+
///
33+
/// ```
34+
/// # use std::mem::MaybeUninit;
35+
/// # use std::os::fd::{AsFd, FromRawFd, OwnedFd};
36+
/// # use nix::dents::RawDir;
37+
/// # use nix::errno::Errno;
38+
/// # use nix::fcntl::{OFlag, open, openat};
39+
/// # use nix::sys::stat::Mode;
40+
///
41+
/// let fd = open(".", OFlag::O_RDONLY | OFlag::O_DIRECTORY, Mode::empty()).unwrap();
42+
/// let fd = unsafe { OwnedFd::from_raw_fd(fd) };
43+
///
44+
/// let mut buf = [MaybeUninit::uninit(); 2048];
45+
///
46+
/// for entry in RawDir::new(fd.as_fd(), &mut buf) {
47+
/// let entry = entry.unwrap();
48+
/// dbg!(&entry);
49+
/// }
50+
/// ```
51+
///
52+
/// Contrived example that demonstrates reading entries with arbitrarily large file paths:
53+
///
54+
/// ```
55+
/// # use std::cmp::max;
56+
/// # use std::mem::MaybeUninit;
57+
/// # use std::os::fd::{AsFd, FromRawFd, OwnedFd};
58+
/// # use nix::dents::RawDir;
59+
/// # use nix::errno::Errno;
60+
/// # use nix::fcntl::{OFlag, open, openat};
61+
/// # use nix::sys::stat::Mode;
62+
///
63+
/// let fd = open(".", OFlag::O_RDONLY | OFlag::O_DIRECTORY, Mode::empty()).unwrap();
64+
/// let fd = unsafe { OwnedFd::from_raw_fd(fd) };
65+
///
66+
/// // DO NOT DO THIS. Use `Vec::with_capacity` to at least start the buffer
67+
/// // off with *some* space.
68+
/// let mut buf = Vec::new();
69+
///
70+
/// 'read: loop {
71+
/// 'resize: {
72+
/// for entry in RawDir::new(fd.as_fd(), buf.spare_capacity_mut()) {
73+
/// let entry = match entry {
74+
/// Err(Errno::EINVAL) => break 'resize,
75+
/// r => r.unwrap(),
76+
/// };
77+
/// dbg!(&entry);
78+
/// }
79+
/// break 'read;
80+
/// }
81+
///
82+
/// let new_capacity = max(buf.capacity() * 2, 1);
83+
/// buf.reserve(new_capacity);
84+
/// }
85+
/// ```
86+
///
87+
/// Note that this is horribly inefficient as we'll most likely end up doing ~1 syscall per file.
88+
pub fn new(fd: BorrowedFd<'a>, buf: &'b mut [MaybeUninit<u8>]) -> Self {
89+
Self {
90+
fd,
91+
buf,
92+
initialized: 0,
93+
offset: 0,
94+
}
95+
}
96+
}
97+
98+
/// A raw directory entry, similar to `std::fs::DirEntry`.
99+
///
100+
/// Note that unlike the std version, this may represent the `.` or `..` entries.
101+
#[derive(Debug)]
102+
#[allow(missing_docs)]
103+
pub struct RawDirEntry<'a> {
104+
pub inode_number: u64,
105+
pub file_type: Type,
106+
pub name: &'a CStr,
107+
}
108+
109+
#[repr(C, packed)]
110+
struct dirent64 {
111+
d_ino: libc::ino64_t,
112+
d_off: libc::off64_t,
113+
d_reclen: libc::c_ushort,
114+
d_type: libc::c_uchar,
115+
}
116+
117+
impl<'a, 'b> Iterator for RawDir<'a, 'b> {
118+
type Item = Result<RawDirEntry<'a>, Errno>;
119+
120+
fn next(&mut self) -> Option<Self::Item> {
121+
loop {
122+
if self.offset < self.initialized {
123+
let dirent_ptr =
124+
&self.buf[self.offset] as *const MaybeUninit<u8>;
125+
// Trust the kernel to use proper alignment
126+
#[allow(clippy::cast_ptr_alignment)]
127+
let dirent = unsafe { &*dirent_ptr.cast::<dirent64>() };
128+
129+
self.offset += dirent.d_reclen as usize;
130+
if dirent.d_ino == 0 {
131+
continue;
132+
}
133+
134+
return Some(Ok(RawDirEntry {
135+
inode_number: dirent.d_ino,
136+
file_type: Type::from(dirent.d_type),
137+
name: unsafe {
138+
let name_start =
139+
dirent_ptr.add(mem::size_of::<dirent64>());
140+
let mut name_end = {
141+
// Find the last aligned byte of the file name so we can
142+
// start searching for NUL bytes. If we started searching
143+
// from the back, we would run into garbage left over from
144+
// previous iterations.
145+
// TODO use .map_addr() once strict_provenance is stable
146+
let addr = max(
147+
name_start as usize,
148+
dirent_ptr.add(dirent.d_reclen as usize - 1)
149+
as usize
150+
& !(mem::size_of::<usize>() - 1),
151+
);
152+
addr as *const u8
153+
};
154+
155+
while *name_end != 0 {
156+
name_end = name_end.add(1);
157+
}
158+
159+
CStr::from_bytes_with_nul_unchecked(
160+
slice::from_raw_parts(
161+
name_start.cast::<u8>(),
162+
// Add 1 for the NUL byte
163+
// TODO use .addr() once strict_provenance is stable
164+
name_end as usize - name_start as usize + 1,
165+
),
166+
)
167+
},
168+
}));
169+
}
170+
self.initialized = 0;
171+
self.offset = 0;
172+
173+
match unsafe {
174+
Errno::result(libc::syscall(
175+
libc::SYS_getdents64,
176+
self.fd,
177+
self.buf.as_mut_ptr(),
178+
self.buf.len(),
179+
))
180+
} {
181+
Ok(bytes_read) if bytes_read == 0 => return None,
182+
Ok(bytes_read) => self.initialized = bytes_read as usize,
183+
Err(e) => return Some(Err(e)),
184+
}
185+
}
186+
}
187+
}

src/dir.rs

+35-1
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,40 @@ impl Entry {
271271
target_os = "solaris",
272272
target_os = "haiku"
273273
))]
274-
None
274+
pub use crate::file_type::Type;
275+
use crate::sys;
276+
use crate::{Error, NixPath, Result};
277+
use cfg_if::cfg_if;
278+
use std::ffi;
279+
use std::os::unix::io::{AsRawFd, IntoRawFd, RawFd};
280+
use std::ptr;
281+
pub struct Dir(ptr::NonNull<libc::DIR>);
282+
pub fn open<P: ?Sized + NixPath>(
283+
path: &P,
284+
oflag: OFlag,
285+
mode: sys::stat::Mode,
286+
) -> Result<Self> {
287+
pub fn openat<P: ?Sized + NixPath>(
288+
dirfd: RawFd,
289+
path: &P,
290+
oflag: OFlag,
291+
mode: sys::stat::Mode,
292+
) -> Result<Self> {
293+
let d = ptr::NonNull::new(unsafe { libc::fdopendir(fd) }).ok_or_else(
294+
|| {
295+
let e = Error::last();
296+
unsafe { libc::close(fd) };
297+
e
298+
},
299+
)?;
300+
if let Err(e) = Errno::result(readdir_r(
301+
dir.0.as_ptr(),
302+
ent.as_mut_ptr(),
303+
&mut result,
304+
)) {
305+
#[allow(clippy::useless_conversion)] // Not useless on all OSes
306+
unsafe { ffi::CStr::from_ptr(self.0.d_name.as_ptr()) }
307+
pub fn file_type(&self) -> Type {
308+
Type::from(self.0.d_type)
275309
}
276310
}

src/file_type.rs

+50
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
//! File type conversion utilities
2+
3+
/// Type of file referenced by a directory entry
4+
#[derive(Copy, Clone, Debug, Eq, Hash, PartialEq)]
5+
pub enum Type {
6+
/// FIFO (Named pipe)
7+
Fifo,
8+
/// Character device
9+
CharacterDevice,
10+
/// Directory
11+
Directory,
12+
/// Block device
13+
BlockDevice,
14+
/// Regular file
15+
File,
16+
/// Symbolic link
17+
Symlink,
18+
/// Unix-domain socket
19+
Socket,
20+
/// Unknown
21+
Unknown,
22+
}
23+
24+
impl From<libc::c_uchar> for Type {
25+
fn from(value: libc::c_uchar) -> Self {
26+
#[cfg(not(any(
27+
target_os = "illumos",
28+
target_os = "solaris",
29+
target_os = "haiku"
30+
)))]
31+
match value {
32+
libc::DT_FIFO => Self::Fifo,
33+
libc::DT_CHR => Self::CharacterDevice,
34+
libc::DT_DIR => Self::Directory,
35+
libc::DT_BLK => Self::BlockDevice,
36+
libc::DT_REG => Self::File,
37+
libc::DT_LNK => Self::Symlink,
38+
libc::DT_SOCK => Self::Socket,
39+
/* libc::DT_UNKNOWN | */ _ => Self::Unknown,
40+
}
41+
42+
// illumos, Solaris, and Haiku systems do not have the d_type member at all:
43+
#[cfg(any(
44+
target_os = "illumos",
45+
target_os = "solaris",
46+
target_os = "haiku"
47+
))]
48+
Self::Unknown
49+
}
50+
}

src/lib.rs

+10
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
//! They may be enabled in any combination.
1010
//! * `acct` - Process accounting
1111
//! * `aio` - POSIX AIO
12+
//! * `dents` - Raw directory iteration using Linux's getdents syscall
1213
//! * `dir` - Stuff relating to directory iteration
1314
//! * `env` - Manipulate environment variables
1415
//! * `event` - Event-driven APIs, like `kqueue` and `epoll`
@@ -63,6 +64,11 @@ pub use libc;
6364
mod macros;
6465

6566
// Public crates
67+
#[cfg(target_os = "linux")]
68+
feature! {
69+
#![feature = "dents"]
70+
pub mod dents;
71+
}
6672
#[cfg(not(target_os = "redox"))]
6773
feature! {
6874
#![feature = "dir"]
@@ -80,6 +86,10 @@ feature! {
8086
#[deny(missing_docs)]
8187
pub mod features;
8288
}
89+
feature! {
90+
#![feature = "file_type"]
91+
pub mod file_type;
92+
}
8393
#[allow(missing_docs)]
8494
pub mod fcntl;
8595
feature! {

test/test_dir.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,10 @@ fn read() {
3232

3333
// Check file types. The system is allowed to return DT_UNKNOWN (aka None here) but if it does
3434
// return a type, ensure it's correct.
35-
assert!(&[Some(Type::Directory), None].contains(&entries[0].file_type())); // .: dir
36-
assert!(&[Some(Type::Directory), None].contains(&entries[1].file_type())); // ..: dir
37-
assert!(&[Some(Type::Symlink), None].contains(&entries[2].file_type())); // bar: symlink
38-
assert!(&[Some(Type::File), None].contains(&entries[3].file_type())); // foo: regular file
35+
assert!(&[Type::Directory, Type::Unknown].contains(&entries[0].file_type())); // .: dir
36+
assert!(&[Type::Directory, Type::Unknown].contains(&entries[1].file_type())); // ..: dir
37+
assert!(&[Type::Symlink, Type::Unknown].contains(&entries[2].file_type())); // bar: symlink
38+
assert!(&[Type::File, Type::Unknown].contains(&entries[3].file_type())); // foo: regular file
3939
}
4040

4141
#[test]

0 commit comments

Comments
 (0)