Skip to content

Commit 429dfa6

Browse files
committed
Add zero-compromise directory iteration using getdents64
Signed-off-by: Alex Saveau <[email protected]>
1 parent 126112c commit 429dfa6

File tree

6 files changed

+249
-37
lines changed

6 files changed

+249
-37
lines changed

Cargo.toml

+3-1
Original file line numberDiff line numberDiff line change
@@ -46,10 +46,12 @@ default = [
4646

4747
acct = []
4848
aio = ["pin-utils"]
49-
dir = ["fs"]
49+
dents = ["file_type"]
50+
dir = ["fs", "file_type"]
5051
env = []
5152
event = []
5253
feature = []
54+
file_type = []
5355
fs = []
5456
hostname = []
5557
inotify = []

src/dents.rs

+187
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,187 @@
1+
//! Raw directory iteration using Linux's getdents syscall
2+
3+
use crate::errno::Errno;
4+
use crate::file_type::FileType;
5+
use std::cmp::max;
6+
use std::ffi::CStr;
7+
use std::mem::MaybeUninit;
8+
use std::os::unix::io::AsFd;
9+
use std::{mem, slice};
10+
11+
/// A directory iterator implemented with getdents.
12+
///
13+
/// This implementation:
14+
/// - Excludes deleted inodes (with ID 0).
15+
/// - Does not handle growing the buffer. If this functionality is necessary,
16+
/// you'll need to drop the current iterator, resize the buffer, and then
17+
/// re-create the iterator. The iterator is guaranteed to continue where it
18+
/// left off provided the file descriptor isn't changed. See the example in
19+
/// [`RawDir::new`].
20+
#[derive(Debug)]
21+
pub struct RawDir<'buf, Fd: AsFd> {
22+
fd: Fd,
23+
buf: &'buf mut [MaybeUninit<u8>],
24+
initialized: usize,
25+
offset: usize,
26+
}
27+
28+
impl<'buf, Fd: AsFd> RawDir<'buf, Fd> {
29+
/// Create a new iterator from the given file descriptor and buffer.
30+
///
31+
/// # Examples
32+
///
33+
/// ```
34+
/// # use std::mem::MaybeUninit;
35+
/// # use std::os::unix::io::{AsFd, FromRawFd, OwnedFd};
36+
/// # use nix::dents::RawDir;
37+
/// # use nix::errno::Errno;
38+
/// # use nix::fcntl::{OFlag, open, openat};
39+
/// # use nix::sys::stat::Mode;
40+
///
41+
/// let fd = open(".", OFlag::O_RDONLY | OFlag::O_DIRECTORY, Mode::empty()).unwrap();
42+
/// let fd = unsafe { OwnedFd::from_raw_fd(fd) };
43+
///
44+
/// let mut buf = [MaybeUninit::uninit(); 2048];
45+
///
46+
/// for entry in RawDir::new(fd, &mut buf) {
47+
/// let entry = entry.unwrap();
48+
/// dbg!(&entry);
49+
/// }
50+
/// ```
51+
///
52+
/// Contrived example that demonstrates reading entries with arbitrarily large file paths:
53+
///
54+
/// ```
55+
/// # use std::cmp::max;
56+
/// # use std::mem::MaybeUninit;
57+
/// # use std::os::unix::io::{AsFd, FromRawFd, OwnedFd};
58+
/// # use nix::dents::RawDir;
59+
/// # use nix::errno::Errno;
60+
/// # use nix::fcntl::{OFlag, open, openat};
61+
/// # use nix::sys::stat::Mode;
62+
///
63+
/// let fd = open(".", OFlag::O_RDONLY | OFlag::O_DIRECTORY, Mode::empty()).unwrap();
64+
/// let fd = unsafe { OwnedFd::from_raw_fd(fd) };
65+
///
66+
/// // DO NOT DO THIS. Use `Vec::with_capacity` to at least start the buffer
67+
/// // off with *some* space.
68+
/// let mut buf = Vec::new();
69+
///
70+
/// 'read: loop {
71+
/// 'resize: {
72+
/// for entry in RawDir::new(&fd, buf.spare_capacity_mut()) {
73+
/// let entry = match entry {
74+
/// Err(Errno::EINVAL) => break 'resize,
75+
/// r => r.unwrap(),
76+
/// };
77+
/// dbg!(&entry);
78+
/// }
79+
/// break 'read;
80+
/// }
81+
///
82+
/// let new_capacity = max(buf.capacity() * 2, 1);
83+
/// buf.reserve(new_capacity);
84+
/// }
85+
/// ```
86+
///
87+
/// Note that this is horribly inefficient as we'll most likely end up doing ~1 syscall per file.
88+
pub fn new(fd: Fd, buf: &'buf mut [MaybeUninit<u8>]) -> Self {
89+
Self {
90+
fd,
91+
buf,
92+
initialized: 0,
93+
offset: 0,
94+
}
95+
}
96+
}
97+
98+
/// A raw directory entry, similar to `std::fs::DirEntry`.
99+
///
100+
/// Note that unlike the std version, this may represent the `.` or `..` entries.
101+
#[derive(Debug)]
102+
#[allow(missing_docs)]
103+
pub struct RawDirEntry<'a> {
104+
pub inode_number: u64,
105+
pub file_type: FileType,
106+
pub name: &'a CStr,
107+
}
108+
109+
#[repr(C, packed)]
110+
struct dirent64 {
111+
d_ino: libc::ino64_t,
112+
d_off: libc::off64_t,
113+
d_reclen: libc::c_ushort,
114+
d_type: libc::c_uchar,
115+
}
116+
117+
impl<'buf, Fd: AsFd> Iterator for RawDir<'buf, Fd> {
118+
type Item = Result<RawDirEntry<'buf>, Errno>;
119+
120+
fn next(&mut self) -> Option<Self::Item> {
121+
loop {
122+
if self.offset < self.initialized {
123+
let dirent_ptr =
124+
&self.buf[self.offset] as *const MaybeUninit<u8>;
125+
// Trust the kernel to use proper alignment
126+
#[allow(clippy::cast_ptr_alignment)]
127+
let dirent = unsafe { &*dirent_ptr.cast::<dirent64>() };
128+
129+
self.offset += dirent.d_reclen as usize;
130+
if dirent.d_ino == 0 {
131+
continue;
132+
}
133+
134+
return Some(Ok(RawDirEntry {
135+
inode_number: dirent.d_ino,
136+
file_type: FileType::from(dirent.d_type),
137+
name: unsafe {
138+
let name_start =
139+
dirent_ptr.add(mem::size_of::<dirent64>());
140+
let mut name_end = {
141+
// Find the last aligned byte of the file name so we can
142+
// start searching for NUL bytes. If we started searching
143+
// from the back, we would run into garbage left over from
144+
// previous iterations.
145+
// TODO use .map_addr() once strict_provenance is stable
146+
let addr = max(
147+
name_start as usize,
148+
dirent_ptr.add(dirent.d_reclen as usize - 1)
149+
as usize
150+
& !(mem::size_of::<usize>() - 1),
151+
);
152+
addr as *const u8
153+
};
154+
155+
while *name_end != 0 {
156+
name_end = name_end.add(1);
157+
}
158+
159+
CStr::from_bytes_with_nul_unchecked(
160+
slice::from_raw_parts(
161+
name_start.cast::<u8>(),
162+
// Add 1 for the NUL byte
163+
// TODO use .addr() once strict_provenance is stable
164+
name_end as usize - name_start as usize + 1,
165+
),
166+
)
167+
},
168+
}));
169+
}
170+
self.initialized = 0;
171+
self.offset = 0;
172+
173+
match unsafe {
174+
Errno::result(libc::syscall(
175+
libc::SYS_getdents64,
176+
self.fd.as_fd(),
177+
self.buf.as_mut_ptr(),
178+
self.buf.len(),
179+
))
180+
} {
181+
Ok(bytes_read) if bytes_read == 0 => return None,
182+
Ok(bytes_read) => self.initialized = bytes_read as usize,
183+
Err(e) => return Some(Err(e)),
184+
}
185+
}
186+
}
187+
}

src/dir.rs

+8-32
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
33
use crate::errno::Errno;
44
use crate::fcntl::{self, OFlag};
5+
pub use crate::file_type::FileType as Type;
56
use crate::sys;
67
use crate::{Error, NixPath, Result};
78
use cfg_if::cfg_if;
@@ -195,25 +196,6 @@ impl IntoIterator for Dir {
195196
#[repr(transparent)]
196197
pub struct Entry(dirent);
197198

198-
/// Type of file referenced by a directory entry
199-
#[derive(Copy, Clone, Debug, Eq, Hash, PartialEq)]
200-
pub enum Type {
201-
/// FIFO (Named pipe)
202-
Fifo,
203-
/// Character device
204-
CharacterDevice,
205-
/// Directory
206-
Directory,
207-
/// Block device
208-
BlockDevice,
209-
/// Regular file
210-
File,
211-
/// Symbolic link
212-
Symlink,
213-
/// Unix-domain socket
214-
Socket,
215-
}
216-
217199
impl Entry {
218200
/// Returns the inode number (`d_ino`) of the underlying `dirent`.
219201
#[allow(clippy::useless_conversion)] // Not useless on all OSes
@@ -240,37 +222,31 @@ impl Entry {
240222

241223
/// Returns the bare file name of this directory entry without any other leading path component.
242224
pub fn file_name(&self) -> &ffi::CStr {
243-
unsafe { ::std::ffi::CStr::from_ptr(self.0.d_name.as_ptr()) }
225+
unsafe { ffi::CStr::from_ptr(self.0.d_name.as_ptr()) }
244226
}
245227

246228
/// Returns the type of this directory entry, if known.
247229
///
248230
/// See platform `readdir(3)` or `dirent(5)` manpage for when the file type is known;
249231
/// notably, some Linux filesystems don't implement this. The caller should use `stat` or
250232
/// `fstat` if this returns `None`.
251-
pub fn file_type(&self) -> Option<Type> {
233+
pub fn file_type(&self) -> Type {
252234
#[cfg(not(any(
253235
target_os = "illumos",
254236
target_os = "solaris",
255237
target_os = "haiku"
256238
)))]
257-
match self.0.d_type {
258-
libc::DT_FIFO => Some(Type::Fifo),
259-
libc::DT_CHR => Some(Type::CharacterDevice),
260-
libc::DT_DIR => Some(Type::Directory),
261-
libc::DT_BLK => Some(Type::BlockDevice),
262-
libc::DT_REG => Some(Type::File),
263-
libc::DT_LNK => Some(Type::Symlink),
264-
libc::DT_SOCK => Some(Type::Socket),
265-
/* libc::DT_UNKNOWN | */ _ => None,
239+
{
240+
Type::from(self.0.d_type)
266241
}
267242

268-
// illumos, Solaris, and Haiku systems do not have the d_type member at all:
269243
#[cfg(any(
270244
target_os = "illumos",
271245
target_os = "solaris",
272246
target_os = "haiku"
273247
))]
274-
None
248+
{
249+
Type::Unknown
250+
}
275251
}
276252
}

src/file_type.rs

+37
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
//! File type conversion utilities
2+
3+
/// Type of file referenced by a directory entry
4+
#[derive(Copy, Clone, Debug, Eq, Hash, PartialEq)]
5+
pub enum FileType {
6+
/// FIFO (Named pipe)
7+
Fifo,
8+
/// Character device
9+
CharacterDevice,
10+
/// Directory
11+
Directory,
12+
/// Block device
13+
BlockDevice,
14+
/// Regular file
15+
File,
16+
/// Symbolic link
17+
Symlink,
18+
/// Unix-domain socket
19+
Socket,
20+
/// Unknown
21+
Unknown,
22+
}
23+
24+
impl From<libc::c_uchar> for FileType {
25+
fn from(value: libc::c_uchar) -> Self {
26+
match value {
27+
libc::DT_FIFO => Self::Fifo,
28+
libc::DT_CHR => Self::CharacterDevice,
29+
libc::DT_DIR => Self::Directory,
30+
libc::DT_BLK => Self::BlockDevice,
31+
libc::DT_REG => Self::File,
32+
libc::DT_LNK => Self::Symlink,
33+
libc::DT_SOCK => Self::Socket,
34+
/* libc::DT_UNKNOWN | */ _ => Self::Unknown,
35+
}
36+
}
37+
}

src/lib.rs

+10
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
//! They may be enabled in any combination.
1010
//! * `acct` - Process accounting
1111
//! * `aio` - POSIX AIO
12+
//! * `dents` - Raw directory iteration using Linux's getdents syscall
1213
//! * `dir` - Stuff relating to directory iteration
1314
//! * `env` - Manipulate environment variables
1415
//! * `event` - Event-driven APIs, like `kqueue` and `epoll`
@@ -63,6 +64,11 @@ pub use libc;
6364
mod macros;
6465

6566
// Public crates
67+
#[cfg(target_os = "linux")]
68+
feature! {
69+
#![feature = "dents"]
70+
pub mod dents;
71+
}
6672
#[cfg(not(target_os = "redox"))]
6773
feature! {
6874
#![feature = "dir"]
@@ -80,6 +86,10 @@ feature! {
8086
#[deny(missing_docs)]
8187
pub mod features;
8288
}
89+
feature! {
90+
#![feature = "file_type"]
91+
pub mod file_type;
92+
}
8393
#[allow(missing_docs)]
8494
pub mod fcntl;
8595
feature! {

test/test_dir.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,10 @@ fn read() {
3232

3333
// Check file types. The system is allowed to return DT_UNKNOWN (aka None here) but if it does
3434
// return a type, ensure it's correct.
35-
assert!(&[Some(Type::Directory), None].contains(&entries[0].file_type())); // .: dir
36-
assert!(&[Some(Type::Directory), None].contains(&entries[1].file_type())); // ..: dir
37-
assert!(&[Some(Type::Symlink), None].contains(&entries[2].file_type())); // bar: symlink
38-
assert!(&[Some(Type::File), None].contains(&entries[3].file_type())); // foo: regular file
35+
assert!(&[Type::Directory, Type::Unknown].contains(&entries[0].file_type())); // .: dir
36+
assert!(&[Type::Directory, Type::Unknown].contains(&entries[1].file_type())); // ..: dir
37+
assert!(&[Type::Symlink, Type::Unknown].contains(&entries[2].file_type())); // bar: symlink
38+
assert!(&[Type::File, Type::Unknown].contains(&entries[3].file_type())); // foo: regular file
3939
}
4040

4141
#[test]

0 commit comments

Comments
 (0)