From e5627375739732bf4cbdd6b6c106190539e59ab4 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Sun, 24 Dec 2023 18:21:28 +0100 Subject: [PATCH 01/30] rust: upgrade to Rust 1.75.0 This is the next upgrade to the Rust toolchain, from 1.74.1 to 1.75.0 (i.e. the latest) [1]. See the upgrade policy [2] and the comments on the first upgrade in commit 3ed03f4da06e ("rust: upgrade to Rust 1.68.2"). # Unstable features The `const_maybe_uninit_zeroed` unstable feature [3] was stabilized in Rust 1.75.0, which we were using in the PHYLIB abstractions. The only unstable features allowed to be used outside the `kernel` crate are still `new_uninit,offset_of`, though other code to be upstreamed may increase the list. Please see [4] for details. # Other improvements Rust 1.75.0 stabilized `pointer_byte_offsets` [5] which we could potentially use as an alternative for `ptr_metadata` in the future. # Required changes For this upgrade, no changes were required (i.e. on our side). # `alloc` upgrade and reviewing The vast majority of changes are due to our `alloc` fork being upgraded at once. There are two kinds of changes to be aware of: the ones coming from upstream, which we should follow as closely as possible, and the updates needed in our added fallible APIs to keep them matching the newer infallible APIs coming from upstream. Instead of taking a look at the diff of this patch, an alternative approach is reviewing a diff of the changes between upstream `alloc` and the kernel's. This allows to easily inspect the kernel additions only, especially to check if the fallible methods we already have still match the infallible ones in the new version coming from upstream. Another approach is reviewing the changes introduced in the additions in the kernel fork between the two versions. This is useful to spot potentially unintended changes to our additions. To apply these approaches, one may follow steps similar to the following to generate a pair of patches that show the differences between upstream Rust and the kernel (for the subset of `alloc` we use) before and after applying this patch: # Get the difference with respect to the old version. git -C rust checkout $(linux/scripts/min-tool-version.sh rustc) git -C linux ls-tree -r --name-only HEAD -- rust/alloc | cut -d/ -f3- | grep -Fv README.md | xargs -IPATH cp rust/library/alloc/src/PATH linux/rust/alloc/PATH git -C linux diff --patch-with-stat --summary -R > old.patch git -C linux restore rust/alloc # Apply this patch. git -C linux am rust-upgrade.patch # Get the difference with respect to the new version. git -C rust checkout $(linux/scripts/min-tool-version.sh rustc) git -C linux ls-tree -r --name-only HEAD -- rust/alloc | cut -d/ -f3- | grep -Fv README.md | xargs -IPATH cp rust/library/alloc/src/PATH linux/rust/alloc/PATH git -C linux diff --patch-with-stat --summary -R > new.patch git -C linux restore rust/alloc Now one may check the `new.patch` to take a look at the additions (first approach) or at the difference between those two patches (second approach). For the latter, a side-by-side tool is recommended. Link: https://github.com/rust-lang/rust/blob/stable/RELEASES.md#version-1750-2023-12-28 [1] Link: https://rust-for-linux.com/rust-version-policy [2] Link: https://github.com/rust-lang/rust/issues/91850 [3] Link: https://github.com/Rust-for-Linux/linux/issues/2 [4] Link: https://github.com/rust-lang/rust/issues/96283 [5] Signed-off-by: Miguel Ojeda Reviewed-by: Martin Rodriguez Reboredo Reviewed-by: Vincenzo Palazzo Link: https://lore.kernel.org/r/20231224172128.271447-1-ojeda@kernel.org --- Documentation/process/changes.rst | 2 +- rust/alloc/alloc.rs | 9 ++++++++- rust/alloc/boxed.rs | 20 ++++++++++++-------- rust/alloc/lib.rs | 7 ++++--- rust/alloc/raw_vec.rs | 19 +++++++++++++++---- rust/alloc/vec/mod.rs | 16 ++++++++++------ rust/kernel/lib.rs | 1 - scripts/min-tool-version.sh | 2 +- 8 files changed, 51 insertions(+), 25 deletions(-) diff --git a/Documentation/process/changes.rst b/Documentation/process/changes.rst index 169f67773518ca..52284fdbaf23ee 100644 --- a/Documentation/process/changes.rst +++ b/Documentation/process/changes.rst @@ -31,7 +31,7 @@ you probably needn't concern yourself with pcmciautils. ====================== =============== ======================================== GNU C 5.1 gcc --version Clang/LLVM (optional) 11.0.0 clang --version -Rust (optional) 1.74.1 rustc --version +Rust (optional) 1.75.0 rustc --version bindgen (optional) 0.65.1 bindgen --version GNU make 3.82 make --version bash 4.2 bash --version diff --git a/rust/alloc/alloc.rs b/rust/alloc/alloc.rs index 150e13750ff70e..8a6be8c981731a 100644 --- a/rust/alloc/alloc.rs +++ b/rust/alloc/alloc.rs @@ -379,13 +379,20 @@ pub const fn handle_alloc_error(layout: Layout) -> ! { panic!("allocation failed"); } + #[inline] fn rt_error(layout: Layout) -> ! { unsafe { __rust_alloc_error_handler(layout.size(), layout.align()); } } - unsafe { core::intrinsics::const_eval_select((layout,), ct_error, rt_error) } + #[cfg(not(feature = "panic_immediate_abort"))] + unsafe { + core::intrinsics::const_eval_select((layout,), ct_error, rt_error) + } + + #[cfg(feature = "panic_immediate_abort")] + ct_error(layout) } // For alloc test `std::alloc::handle_alloc_error` can be used directly. diff --git a/rust/alloc/boxed.rs b/rust/alloc/boxed.rs index 9620eba1726872..f5f40778a19373 100644 --- a/rust/alloc/boxed.rs +++ b/rust/alloc/boxed.rs @@ -161,7 +161,7 @@ use core::marker::Tuple; use core::marker::Unsize; use core::mem::{self, SizedTypeProperties}; use core::ops::{ - CoerceUnsized, Deref, DerefMut, DispatchFromDyn, Generator, GeneratorState, Receiver, + CoerceUnsized, Coroutine, CoroutineState, Deref, DerefMut, DispatchFromDyn, Receiver, }; use core::pin::Pin; use core::ptr::{self, NonNull, Unique}; @@ -211,7 +211,7 @@ impl Box { /// ``` /// let five = Box::new(5); /// ``` - #[cfg(all(not(no_global_oom_handling)))] + #[cfg(not(no_global_oom_handling))] #[inline(always)] #[stable(feature = "rust1", since = "1.0.0")] #[must_use] @@ -2110,28 +2110,28 @@ impl AsMut for Box { #[stable(feature = "pin", since = "1.33.0")] impl Unpin for Box where A: 'static {} -#[unstable(feature = "generator_trait", issue = "43122")] -impl + Unpin, R, A: Allocator> Generator for Box +#[unstable(feature = "coroutine_trait", issue = "43122")] +impl + Unpin, R, A: Allocator> Coroutine for Box where A: 'static, { type Yield = G::Yield; type Return = G::Return; - fn resume(mut self: Pin<&mut Self>, arg: R) -> GeneratorState { + fn resume(mut self: Pin<&mut Self>, arg: R) -> CoroutineState { G::resume(Pin::new(&mut *self), arg) } } -#[unstable(feature = "generator_trait", issue = "43122")] -impl, R, A: Allocator> Generator for Pin> +#[unstable(feature = "coroutine_trait", issue = "43122")] +impl, R, A: Allocator> Coroutine for Pin> where A: 'static, { type Yield = G::Yield; type Return = G::Return; - fn resume(mut self: Pin<&mut Self>, arg: R) -> GeneratorState { + fn resume(mut self: Pin<&mut Self>, arg: R) -> CoroutineState { G::resume((*self).as_mut(), arg) } } @@ -2448,4 +2448,8 @@ impl core::error::Error for Box { fn source(&self) -> Option<&(dyn core::error::Error + 'static)> { core::error::Error::source(&**self) } + + fn provide<'b>(&'b self, request: &mut core::error::Request<'b>) { + core::error::Error::provide(&**self, request); + } } diff --git a/rust/alloc/lib.rs b/rust/alloc/lib.rs index 9c7ea73da10800..345cf5c9cf92d7 100644 --- a/rust/alloc/lib.rs +++ b/rust/alloc/lib.rs @@ -80,6 +80,8 @@ not(no_sync), target_has_atomic = "ptr" ))] +#![cfg_attr(not(bootstrap), doc(rust_logo))] +#![cfg_attr(not(bootstrap), feature(rustdoc_internals))] #![no_std] #![needs_allocator] // Lints: @@ -115,7 +117,6 @@ #![feature(const_eval_select)] #![feature(const_maybe_uninit_as_mut_ptr)] #![feature(const_maybe_uninit_write)] -#![feature(const_maybe_uninit_zeroed)] #![feature(const_pin)] #![feature(const_refs_to_cell)] #![feature(const_size_of_val)] @@ -141,7 +142,7 @@ #![feature(maybe_uninit_uninit_array)] #![feature(maybe_uninit_uninit_array_transpose)] #![feature(pattern)] -#![feature(pointer_byte_offsets)] +#![feature(ptr_addr_eq)] #![feature(ptr_internals)] #![feature(ptr_metadata)] #![feature(ptr_sub_ptr)] @@ -168,7 +169,7 @@ // // Language features: // tidy-alphabetical-start -#![cfg_attr(not(test), feature(generator_trait))] +#![cfg_attr(not(test), feature(coroutine_trait))] #![cfg_attr(test, feature(panic_update_hook))] #![cfg_attr(test, feature(test))] #![feature(allocator_internals)] diff --git a/rust/alloc/raw_vec.rs b/rust/alloc/raw_vec.rs index a7425582a323f1..f1b8cec8cc6268 100644 --- a/rust/alloc/raw_vec.rs +++ b/rust/alloc/raw_vec.rs @@ -338,10 +338,13 @@ impl RawVec { /// The same as `reserve`, but returns on errors instead of panicking or aborting. pub fn try_reserve(&mut self, len: usize, additional: usize) -> Result<(), TryReserveError> { if self.needs_to_grow(len, additional) { - self.grow_amortized(len, additional) - } else { - Ok(()) + self.grow_amortized(len, additional)?; + } + unsafe { + // Inform the optimizer that the reservation has succeeded or wasn't needed + core::intrinsics::assume(!self.needs_to_grow(len, additional)); } + Ok(()) } /// The same as `reserve_for_push`, but returns on errors instead of panicking or aborting. @@ -378,7 +381,14 @@ impl RawVec { len: usize, additional: usize, ) -> Result<(), TryReserveError> { - if self.needs_to_grow(len, additional) { self.grow_exact(len, additional) } else { Ok(()) } + if self.needs_to_grow(len, additional) { + self.grow_exact(len, additional)?; + } + unsafe { + // Inform the optimizer that the reservation has succeeded or wasn't needed + core::intrinsics::assume(!self.needs_to_grow(len, additional)); + } + Ok(()) } /// Shrinks the buffer down to the specified capacity. If the given amount @@ -569,6 +579,7 @@ fn alloc_guard(alloc_size: usize) -> Result<(), TryReserveError> { // ensure that the code generation related to these panics is minimal as there's // only one location which panics rather than a bunch throughout the module. #[cfg(not(no_global_oom_handling))] +#[cfg_attr(not(feature = "panic_immediate_abort"), inline(never))] fn capacity_overflow() -> ! { panic!("capacity overflow"); } diff --git a/rust/alloc/vec/mod.rs b/rust/alloc/vec/mod.rs index 41ca71805ef09e..0d95fd7ef3375f 100644 --- a/rust/alloc/vec/mod.rs +++ b/rust/alloc/vec/mod.rs @@ -1376,7 +1376,7 @@ impl Vec { /// [`as_mut_ptr`]: Vec::as_mut_ptr /// [`as_ptr`]: Vec::as_ptr #[stable(feature = "vec_as_ptr", since = "1.37.0")] - #[cfg_attr(not(bootstrap), rustc_never_returns_null_ptr)] + #[rustc_never_returns_null_ptr] #[inline] pub fn as_ptr(&self) -> *const T { // We shadow the slice method of the same name to avoid going through @@ -1436,7 +1436,7 @@ impl Vec { /// [`as_mut_ptr`]: Vec::as_mut_ptr /// [`as_ptr`]: Vec::as_ptr #[stable(feature = "vec_as_ptr", since = "1.37.0")] - #[cfg_attr(not(bootstrap), rustc_never_returns_null_ptr)] + #[rustc_never_returns_null_ptr] #[inline] pub fn as_mut_ptr(&mut self) -> *mut T { // We shadow the slice method of the same name to avoid going through @@ -1565,7 +1565,8 @@ impl Vec { #[stable(feature = "rust1", since = "1.0.0")] pub fn swap_remove(&mut self, index: usize) -> T { #[cold] - #[inline(never)] + #[cfg_attr(not(feature = "panic_immediate_abort"), inline(never))] + #[track_caller] fn assert_failed(index: usize, len: usize) -> ! { panic!("swap_remove index (is {index}) should be < len (is {len})"); } @@ -1606,7 +1607,8 @@ impl Vec { #[stable(feature = "rust1", since = "1.0.0")] pub fn insert(&mut self, index: usize, element: T) { #[cold] - #[inline(never)] + #[cfg_attr(not(feature = "panic_immediate_abort"), inline(never))] + #[track_caller] fn assert_failed(index: usize, len: usize) -> ! { panic!("insertion index (is {index}) should be <= len (is {len})"); } @@ -1667,7 +1669,7 @@ impl Vec { #[track_caller] pub fn remove(&mut self, index: usize) -> T { #[cold] - #[inline(never)] + #[cfg_attr(not(feature = "panic_immediate_abort"), inline(never))] #[track_caller] fn assert_failed(index: usize, len: usize) -> ! { panic!("removal index (is {index}) should be < len (is {len})"); @@ -2097,6 +2099,7 @@ impl Vec { } else { unsafe { self.len -= 1; + core::intrinsics::assume(self.len < self.capacity()); Some(ptr::read(self.as_ptr().add(self.len()))) } } @@ -2299,7 +2302,8 @@ impl Vec { A: Clone, { #[cold] - #[inline(never)] + #[cfg_attr(not(feature = "panic_immediate_abort"), inline(never))] + #[track_caller] fn assert_failed(at: usize, len: usize) -> ! { panic!("`at` split index (is {at}) should be <= len (is {len})"); } diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs index 7ac39874aeac36..cb2d024db51feb 100644 --- a/rust/kernel/lib.rs +++ b/rust/kernel/lib.rs @@ -14,7 +14,6 @@ #![no_std] #![feature(allocator_api)] #![feature(coerce_unsized)] -#![feature(const_maybe_uninit_zeroed)] #![feature(dispatch_from_dyn)] #![feature(new_uninit)] #![feature(offset_of)] diff --git a/scripts/min-tool-version.sh b/scripts/min-tool-version.sh index c62066825f538c..bcc7d4247290d4 100755 --- a/scripts/min-tool-version.sh +++ b/scripts/min-tool-version.sh @@ -31,7 +31,7 @@ llvm) fi ;; rustc) - echo 1.74.1 + echo 1.75.0 ;; bindgen) echo 0.65.1 From 39f11a3310a74b9f24d81c4ee57af2e4d38c7078 Mon Sep 17 00:00:00 2001 From: Wedson Almeida Filho Date: Wed, 6 Dec 2023 11:59:46 +0000 Subject: [PATCH 02/30] rust: file: add Rust abstraction for `struct file` This abstraction makes it possible to manipulate the open files for a process. The new `File` struct wraps the C `struct file`. When accessing it using the smart pointer `ARef`, the pointer will own a reference count to the file. When accessing it as `&File`, then the reference does not own a refcount, but the borrow checker will ensure that the reference count does not hit zero while the `&File` is live. Since this is intended to manipulate the open files of a process, we introduce a `from_fd` constructor that corresponds to the C `fget` method. In future patches, it will become possible to create a new fd in a process and bind it to a `File`. Rust Binder will use these to send fds from one process to another. We also provide a method for accessing the file's flags. Rust Binder will use this to access the flags of the Binder fd to check whether the non-blocking flag is set, which affects what the Binder ioctl does. This introduces a struct for the EBADF error type, rather than just using the Error type directly. This has two advantages: * `File::from_fd` returns a `Result, BadFdError>`, which the compiler will represent as a single pointer, with null being an error. This is possible because the compiler understands that `BadFdError` has only one possible value, and it also understands that the `ARef` smart pointer is guaranteed non-null. * Additionally, we promise to users of the method that the method can only fail with EBADF, which means that they can rely on this promise without having to inspect its implementation. That said, there are also two disadvantages: * Defining additional error types involves boilerplate. * The question mark operator will only utilize the `From` trait once, which prevents you from using the question mark operator on `BadFdError` in methods that return some third error type that the kernel `Error` is convertible into. (However, it works fine in methods that return `Error`.) Signed-off-by: Wedson Almeida Filho Co-developed-by: Daniel Xu Signed-off-by: Daniel Xu Co-developed-by: Alice Ryhl Signed-off-by: Alice Ryhl Link: https://lore.kernel.org/r/20231206-alice-file-v2-1-af617c0d9d94@google.com [boqun: Resolve conflicts against net-next] --- rust/bindings/bindings_helper.h | 2 + rust/helpers.c | 7 ++ rust/kernel/file.rs | 196 ++++++++++++++++++++++++++++++++ rust/kernel/lib.rs | 1 + 4 files changed, 206 insertions(+) create mode 100644 rust/kernel/file.rs diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h index c0cb4b05b9185b..09e16f54d8620f 100644 --- a/rust/bindings/bindings_helper.h +++ b/rust/bindings/bindings_helper.h @@ -11,6 +11,8 @@ #include #include #include +#include +#include #include #include #include diff --git a/rust/helpers.c b/rust/helpers.c index 70e59efd92bc43..03141a3608a45a 100644 --- a/rust/helpers.c +++ b/rust/helpers.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -157,6 +158,12 @@ void rust_helper_init_work_with_key(struct work_struct *work, work_func_t func, } EXPORT_SYMBOL_GPL(rust_helper_init_work_with_key); +struct file *rust_helper_get_file(struct file *f) +{ + return get_file(f); +} +EXPORT_SYMBOL_GPL(rust_helper_get_file); + /* * `bindgen` binds the C `size_t` type as the Rust `usize` type, so we can * use it in contexts where Rust expects a `usize` like slice (array) indices. diff --git a/rust/kernel/file.rs b/rust/kernel/file.rs new file mode 100644 index 00000000000000..29e1aacacd0662 --- /dev/null +++ b/rust/kernel/file.rs @@ -0,0 +1,196 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Files and file descriptors. +//! +//! C headers: [`include/linux/fs.h`](../../../../include/linux/fs.h) and +//! [`include/linux/file.h`](../../../../include/linux/file.h) + +use crate::{ + bindings, + error::{code::*, Error, Result}, + types::{ARef, AlwaysRefCounted, Opaque}, +}; +use core::ptr; + +/// Flags associated with a [`File`]. +pub mod flags { + /// File is opened in append mode. + pub const O_APPEND: u32 = bindings::O_APPEND; + + /// Signal-driven I/O is enabled. + pub const O_ASYNC: u32 = bindings::FASYNC; + + /// Close-on-exec flag is set. + pub const O_CLOEXEC: u32 = bindings::O_CLOEXEC; + + /// File was created if it didn't already exist. + pub const O_CREAT: u32 = bindings::O_CREAT; + + /// Direct I/O is enabled for this file. + pub const O_DIRECT: u32 = bindings::O_DIRECT; + + /// File must be a directory. + pub const O_DIRECTORY: u32 = bindings::O_DIRECTORY; + + /// Like [`O_SYNC`] except metadata is not synced. + pub const O_DSYNC: u32 = bindings::O_DSYNC; + + /// Ensure that this file is created with the `open(2)` call. + pub const O_EXCL: u32 = bindings::O_EXCL; + + /// Large file size enabled (`off64_t` over `off_t`). + pub const O_LARGEFILE: u32 = bindings::O_LARGEFILE; + + /// Do not update the file last access time. + pub const O_NOATIME: u32 = bindings::O_NOATIME; + + /// File should not be used as process's controlling terminal. + pub const O_NOCTTY: u32 = bindings::O_NOCTTY; + + /// If basename of path is a symbolic link, fail open. + pub const O_NOFOLLOW: u32 = bindings::O_NOFOLLOW; + + /// File is using nonblocking I/O. + pub const O_NONBLOCK: u32 = bindings::O_NONBLOCK; + + /// Also known as `O_NDELAY`. + /// + /// This is effectively the same flag as [`O_NONBLOCK`] on all architectures + /// except SPARC64. + pub const O_NDELAY: u32 = bindings::O_NDELAY; + + /// Used to obtain a path file descriptor. + pub const O_PATH: u32 = bindings::O_PATH; + + /// Write operations on this file will flush data and metadata. + pub const O_SYNC: u32 = bindings::O_SYNC; + + /// This file is an unnamed temporary regular file. + pub const O_TMPFILE: u32 = bindings::O_TMPFILE; + + /// File should be truncated to length 0. + pub const O_TRUNC: u32 = bindings::O_TRUNC; + + /// Bitmask for access mode flags. + /// + /// # Examples + /// + /// ``` + /// use kernel::file; + /// # fn do_something() {} + /// # let flags = 0; + /// if (flags & file::flags::O_ACCMODE) == file::flags::O_RDONLY { + /// do_something(); + /// } + /// ``` + pub const O_ACCMODE: u32 = bindings::O_ACCMODE; + + /// File is read only. + pub const O_RDONLY: u32 = bindings::O_RDONLY; + + /// File is write only. + pub const O_WRONLY: u32 = bindings::O_WRONLY; + + /// File can be both read and written. + pub const O_RDWR: u32 = bindings::O_RDWR; +} + +/// Wraps the kernel's `struct file`. +/// +/// # Invariants +/// +/// Instances of this type are always ref-counted, that is, a call to `get_file` ensures that the +/// allocation remains valid at least until the matching call to `fput`. +#[repr(transparent)] +pub struct File(Opaque); + +// SAFETY: By design, the only way to access a `File` is via an immutable reference or an `ARef`. +// This means that the only situation in which a `File` can be accessed mutably is when the +// refcount drops to zero and the destructor runs. It is safe for that to happen on any thread, so +// it is ok for this type to be `Send`. +unsafe impl Send for File {} + +// SAFETY: All methods defined on `File` that take `&self` are safe to call even if other threads +// are concurrently accessing the same `struct file`, because those methods either access immutable +// properties or have proper synchronization to ensure that such accesses are safe. +unsafe impl Sync for File {} + +impl File { + /// Constructs a new `struct file` wrapper from a file descriptor. + /// + /// The file descriptor belongs to the current process. + pub fn fget(fd: u32) -> Result, BadFdError> { + // SAFETY: FFI call, there are no requirements on `fd`. + let ptr = ptr::NonNull::new(unsafe { bindings::fget(fd) }).ok_or(BadFdError)?; + + // SAFETY: `fget` either returns null or a valid pointer to a file, and we checked for null + // above. + // + // INVARIANT: `fget` increments the refcount before returning. + Ok(unsafe { ARef::from_raw(ptr.cast()) }) + } + + /// Creates a reference to a [`File`] from a valid pointer. + /// + /// # Safety + /// + /// The caller must ensure that `ptr` points at a valid file and that its refcount does not + /// reach zero during the lifetime 'a. + pub unsafe fn from_ptr<'a>(ptr: *const bindings::file) -> &'a File { + // SAFETY: The caller guarantees that the pointer is not dangling and stays valid for the + // duration of 'a. The cast is okay because `File` is `repr(transparent)`. + // + // INVARIANT: The safety requirements guarantee that the refcount does not hit zero during + // 'a. + unsafe { &*ptr.cast() } + } + + /// Returns a raw pointer to the inner C struct. + #[inline] + pub fn as_ptr(&self) -> *mut bindings::file { + self.0.get() + } + + /// Returns the flags associated with the file. + /// + /// The flags are a combination of the constants in [`flags`]. + pub fn flags(&self) -> u32 { + // This `read_volatile` is intended to correspond to a READ_ONCE call. + // + // SAFETY: The file is valid because the shared reference guarantees a nonzero refcount. + // + // TODO: Replace with `read_once` when available on the Rust side. + unsafe { core::ptr::addr_of!((*self.as_ptr()).f_flags).read_volatile() } + } +} + +// SAFETY: The type invariants guarantee that `File` is always ref-counted. +unsafe impl AlwaysRefCounted for File { + fn inc_ref(&self) { + // SAFETY: The existence of a shared reference means that the refcount is nonzero. + unsafe { bindings::get_file(self.as_ptr()) }; + } + + unsafe fn dec_ref(obj: ptr::NonNull) { + // SAFETY: The safety requirements guarantee that the refcount is nonzero. + unsafe { bindings::fput(obj.cast().as_ptr()) } + } +} + +/// Represents the `EBADF` error code. +/// +/// Used for methods that can only fail with `EBADF`. +#[derive(Copy, Clone, Eq, PartialEq)] +pub struct BadFdError; + +impl From for Error { + fn from(_: BadFdError) -> Error { + EBADF + } +} + +impl core::fmt::Debug for BadFdError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.pad("EBADF") + } +} diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs index cb2d024db51feb..08872bac32d379 100644 --- a/rust/kernel/lib.rs +++ b/rust/kernel/lib.rs @@ -34,6 +34,7 @@ extern crate self as kernel; mod allocator; mod build_assert; pub mod error; +pub mod file; pub mod init; pub mod ioctl; #[cfg(CONFIG_KUNIT)] From 1d0fab60c1a3169dce30a5ec2e526899c995d851 Mon Sep 17 00:00:00 2001 From: Wedson Almeida Filho Date: Wed, 6 Dec 2023 11:59:47 +0000 Subject: [PATCH 03/30] rust: cred: add Rust abstraction for `struct cred` Add a wrapper around `struct cred` called `Credential`, and provide functionality to get the `Credential` associated with a `File`. Rust Binder must check the credentials of processes when they attempt to perform various operations, and these checks usually take a `&Credential` as parameter. The security_binder_set_context_mgr function would be one example. This patch is necessary to access these security_* methods from Rust. Signed-off-by: Wedson Almeida Filho Co-developed-by: Alice Ryhl Signed-off-by: Alice Ryhl Link: https://lore.kernel.org/r/20231206-alice-file-v2-2-af617c0d9d94@google.com --- rust/bindings/bindings_helper.h | 1 + rust/helpers.c | 13 +++++++ rust/kernel/cred.rs | 64 +++++++++++++++++++++++++++++++++ rust/kernel/file.rs | 16 +++++++++ rust/kernel/lib.rs | 1 + 5 files changed, 95 insertions(+) create mode 100644 rust/kernel/cred.rs diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h index 09e16f54d8620f..354624096eeae1 100644 --- a/rust/bindings/bindings_helper.h +++ b/rust/bindings/bindings_helper.h @@ -7,6 +7,7 @@ */ #include +#include #include #include #include diff --git a/rust/helpers.c b/rust/helpers.c index 03141a3608a45a..10ed69f76424da 100644 --- a/rust/helpers.c +++ b/rust/helpers.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -164,6 +165,18 @@ struct file *rust_helper_get_file(struct file *f) } EXPORT_SYMBOL_GPL(rust_helper_get_file); +const struct cred *rust_helper_get_cred(const struct cred *cred) +{ + return get_cred(cred); +} +EXPORT_SYMBOL_GPL(rust_helper_get_cred); + +void rust_helper_put_cred(const struct cred *cred) +{ + put_cred(cred); +} +EXPORT_SYMBOL_GPL(rust_helper_put_cred); + /* * `bindgen` binds the C `size_t` type as the Rust `usize` type, so we can * use it in contexts where Rust expects a `usize` like slice (array) indices. diff --git a/rust/kernel/cred.rs b/rust/kernel/cred.rs new file mode 100644 index 00000000000000..497058ec89bbf8 --- /dev/null +++ b/rust/kernel/cred.rs @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Credentials management. +//! +//! C header: [`include/linux/cred.h`](../../../../include/linux/cred.h) +//! +//! Reference: + +use crate::{ + bindings, + types::{AlwaysRefCounted, Opaque}, +}; + +/// Wraps the kernel's `struct cred`. +/// +/// # Invariants +/// +/// Instances of this type are always ref-counted, that is, a call to `get_cred` ensures that the +/// allocation remains valid at least until the matching call to `put_cred`. +#[repr(transparent)] +pub struct Credential(pub(crate) Opaque); + +// SAFETY: By design, the only way to access a `Credential` is via an immutable reference or an +// `ARef`. This means that the only situation in which a `Credential` can be accessed mutably is +// when the refcount drops to zero and the destructor runs. It is safe for that to happen on any +// thread, so it is ok for this type to be `Send`. +unsafe impl Send for Credential {} + +// SAFETY: It's OK to access `Credential` through shared references from other threads because +// we're either accessing properties that don't change or that are properly synchronised by C code. +unsafe impl Sync for Credential {} + +impl Credential { + /// Creates a reference to a [`Credential`] from a valid pointer. + /// + /// # Safety + /// + /// The caller must ensure that `ptr` is valid and remains valid for the lifetime of the + /// returned [`Credential`] reference. + pub unsafe fn from_ptr<'a>(ptr: *const bindings::cred) -> &'a Credential { + // SAFETY: The safety requirements guarantee the validity of the dereference, while the + // `Credential` type being transparent makes the cast ok. + unsafe { &*ptr.cast() } + } + + /// Returns the effective UID of the given credential. + pub fn euid(&self) -> bindings::kuid_t { + // SAFETY: By the type invariant, we know that `self.0` is valid. + unsafe { (*self.0.get()).euid } + } +} + +// SAFETY: The type invariants guarantee that `Credential` is always ref-counted. +unsafe impl AlwaysRefCounted for Credential { + fn inc_ref(&self) { + // SAFETY: The existence of a shared reference means that the refcount is nonzero. + unsafe { bindings::get_cred(self.0.get()) }; + } + + unsafe fn dec_ref(obj: core::ptr::NonNull) { + // SAFETY: The safety requirements guarantee that the refcount is nonzero. + unsafe { bindings::put_cred(obj.cast().as_ptr()) }; + } +} diff --git a/rust/kernel/file.rs b/rust/kernel/file.rs index 29e1aacacd0662..a88140794a8deb 100644 --- a/rust/kernel/file.rs +++ b/rust/kernel/file.rs @@ -7,6 +7,7 @@ use crate::{ bindings, + cred::Credential, error::{code::*, Error, Result}, types::{ARef, AlwaysRefCounted, Opaque}, }; @@ -151,6 +152,21 @@ impl File { self.0.get() } + /// Returns the credentials of the task that originally opened the file. + pub fn cred(&self) -> &Credential { + // SAFETY: Since the caller holds a reference to the file, it is guaranteed that its + // refcount does not hit zero during this function call. + // + // It's okay to read the `f_cred` field without synchronization as `f_cred` is never + // changed after initialization of the file. + let ptr = unsafe { (*self.as_ptr()).f_cred }; + + // SAFETY: The signature of this function ensures that the caller will only access the + // returned credential while the file is still valid, and the C side ensures that the + // credential stays valid at least as long as the file. + unsafe { Credential::from_ptr(ptr) } + } + /// Returns the flags associated with the file. /// /// The flags are a combination of the constants in [`flags`]. diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs index 08872bac32d379..694108e66b24ae 100644 --- a/rust/kernel/lib.rs +++ b/rust/kernel/lib.rs @@ -33,6 +33,7 @@ extern crate self as kernel; #[cfg(not(testlib))] mod allocator; mod build_assert; +pub mod cred; pub mod error; pub mod file; pub mod init; From dab0fe4e8de09429a4aebd009794d8beb1a71b69 Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Wed, 6 Dec 2023 11:59:48 +0000 Subject: [PATCH 04/30] rust: security: add abstraction for secctx Adds an abstraction for viewing the string representation of a security context. This is needed by Rust Binder because it has feature where a process can view the string representation of the security context for incoming transactions. The process can use that to authenticate incoming transactions, and since the feature is provided by the kernel, the process can trust that the security context is legitimate. Signed-off-by: Alice Ryhl Link: https://lore.kernel.org/r/20231206-alice-file-v2-3-af617c0d9d94@google.com --- rust/bindings/bindings_helper.h | 1 + rust/helpers.c | 21 +++++++++ rust/kernel/cred.rs | 8 ++++ rust/kernel/lib.rs | 1 + rust/kernel/security.rs | 79 +++++++++++++++++++++++++++++++++ 5 files changed, 110 insertions(+) create mode 100644 rust/kernel/security.rs diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h index 354624096eeae1..b5678c8b6f58f0 100644 --- a/rust/bindings/bindings_helper.h +++ b/rust/bindings/bindings_helper.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include diff --git a/rust/helpers.c b/rust/helpers.c index 10ed69f76424da..fd633d9db79a38 100644 --- a/rust/helpers.c +++ b/rust/helpers.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -177,6 +178,26 @@ void rust_helper_put_cred(const struct cred *cred) } EXPORT_SYMBOL_GPL(rust_helper_put_cred); +#ifndef CONFIG_SECURITY +void rust_helper_security_cred_getsecid(const struct cred *c, u32 *secid) +{ + security_cred_getsecid(c, secid); +} +EXPORT_SYMBOL_GPL(rust_helper_security_cred_getsecid); + +int rust_helper_security_secid_to_secctx(u32 secid, char **secdata, u32 *seclen) +{ + return security_secid_to_secctx(secid, secdata, seclen); +} +EXPORT_SYMBOL_GPL(rust_helper_security_secid_to_secctx); + +void rust_helper_security_release_secctx(char *secdata, u32 seclen) +{ + security_release_secctx(secdata, seclen); +} +EXPORT_SYMBOL_GPL(rust_helper_security_release_secctx); +#endif + /* * `bindgen` binds the C `size_t` type as the Rust `usize` type, so we can * use it in contexts where Rust expects a `usize` like slice (array) indices. diff --git a/rust/kernel/cred.rs b/rust/kernel/cred.rs index 497058ec89bbf8..3794937b52947d 100644 --- a/rust/kernel/cred.rs +++ b/rust/kernel/cred.rs @@ -43,6 +43,14 @@ impl Credential { unsafe { &*ptr.cast() } } + /// Get the id for this security context. + pub fn get_secid(&self) -> u32 { + let mut secid = 0; + // SAFETY: The invariants of this type ensures that the pointer is valid. + unsafe { bindings::security_cred_getsecid(self.0.get(), &mut secid) }; + secid + } + /// Returns the effective UID of the given credential. pub fn euid(&self) -> bindings::kuid_t { // SAFETY: By the type invariant, we know that `self.0` is valid. diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs index 694108e66b24ae..446107a970a821 100644 --- a/rust/kernel/lib.rs +++ b/rust/kernel/lib.rs @@ -44,6 +44,7 @@ pub mod kunit; pub mod net; pub mod prelude; pub mod print; +pub mod security; mod static_assert; #[doc(hidden)] pub mod std_vendor; diff --git a/rust/kernel/security.rs b/rust/kernel/security.rs new file mode 100644 index 00000000000000..6545bfa2fd72e9 --- /dev/null +++ b/rust/kernel/security.rs @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Linux Security Modules (LSM). +//! +//! C header: [`include/linux/security.h`](../../../../include/linux/security.h). + +use crate::{ + bindings, + error::{to_result, Result}, +}; + +/// A security context string. +/// +/// # Invariants +/// +/// The `secdata` and `seclen` fields correspond to a valid security context as returned by a +/// successful call to `security_secid_to_secctx`, that has not yet been destroyed by calling +/// `security_release_secctx`. +pub struct SecurityCtx { + secdata: *mut core::ffi::c_char, + seclen: usize, +} + +impl SecurityCtx { + /// Get the security context given its id. + pub fn from_secid(secid: u32) -> Result { + let mut secdata = core::ptr::null_mut(); + let mut seclen = 0u32; + // SAFETY: Just a C FFI call. The pointers are valid for writes. + unsafe { + to_result(bindings::security_secid_to_secctx( + secid, + &mut secdata, + &mut seclen, + ))?; + } + + // INVARIANT: If the above call did not fail, then we have a valid security context. + Ok(Self { + secdata, + seclen: seclen as usize, + }) + } + + /// Returns whether the security context is empty. + pub fn is_empty(&self) -> bool { + self.seclen == 0 + } + + /// Returns the length of this security context. + pub fn len(&self) -> usize { + self.seclen + } + + /// Returns the bytes for this security context. + pub fn as_bytes(&self) -> &[u8] { + let ptr = self.secdata; + if ptr.is_null() { + // We can't pass a null pointer to `slice::from_raw_parts` even if the length is zero. + debug_assert_eq!(self.seclen, 0); + return &[]; + } + + // SAFETY: The call to `security_secid_to_secctx` guarantees that the pointer is valid for + // `seclen` bytes. Furthermore, if the length is zero, then we have ensured that the + // pointer is not null. + unsafe { core::slice::from_raw_parts(ptr.cast(), self.seclen) } + } +} + +impl Drop for SecurityCtx { + fn drop(&mut self) { + // SAFETY: This frees a pointer that came from a successful call to + // `security_secid_to_secctx` and has not yet been destroyed by `security_release_secctx`. + unsafe { + bindings::security_release_secctx(self.secdata, self.seclen as u32); + } + } +} From 5a5888c0fac591ac4e5dad2977e77b6a6d7844b4 Mon Sep 17 00:00:00 2001 From: Wedson Almeida Filho Date: Wed, 6 Dec 2023 11:59:49 +0000 Subject: [PATCH 05/30] rust: file: add `FileDescriptorReservation` Allow for the creation of a file descriptor in two steps: first, we reserve a slot for it, then we commit or drop the reservation. The first step may fail (e.g., the current process ran out of available slots), but commit and drop never fail (and are mutually exclusive). This is needed by Rust Binder when fds are sent from one process to another. It has to be a two-step process to properly handle the case where multiple fds are sent: The operation must fail or succeed atomically, which we achieve by first reserving the fds we need, and only installing the files once we have reserved enough fds to send the files. Fd reservations assume that the value of `current` does not change between the call to get_unused_fd_flags and the call to fd_install (or put_unused_fd). By not implementing the Send trait, this abstraction ensures that the `FileDescriptorReservation` cannot be moved into a different process. Signed-off-by: Wedson Almeida Filho Co-developed-by: Alice Ryhl Signed-off-by: Alice Ryhl Link: https://lore.kernel.org/r/20231206-alice-file-v2-4-af617c0d9d94@google.com --- rust/kernel/file.rs | 68 ++++++++++++++++++++++++++++++++++++++++++-- rust/kernel/types.rs | 10 +++++++ 2 files changed, 76 insertions(+), 2 deletions(-) diff --git a/rust/kernel/file.rs b/rust/kernel/file.rs index a88140794a8deb..2d036d4636a0fc 100644 --- a/rust/kernel/file.rs +++ b/rust/kernel/file.rs @@ -9,9 +9,9 @@ use crate::{ bindings, cred::Credential, error::{code::*, Error, Result}, - types::{ARef, AlwaysRefCounted, Opaque}, + types::{ARef, AlwaysRefCounted, NotThreadSafe, Opaque}, }; -use core::ptr; +use core::{marker::PhantomData, ptr}; /// Flags associated with a [`File`]. pub mod flags { @@ -193,6 +193,70 @@ unsafe impl AlwaysRefCounted for File { } } +/// A file descriptor reservation. +/// +/// This allows the creation of a file descriptor in two steps: first, we reserve a slot for it, +/// then we commit or drop the reservation. The first step may fail (e.g., the current process ran +/// out of available slots), but commit and drop never fail (and are mutually exclusive). +/// +/// Dropping the reservation happens in the destructor of this type. +/// +/// # Invariants +/// +/// The fd stored in this struct must correspond to a reserved file descriptor of the current task. +pub struct FileDescriptorReservation { + fd: u32, + /// Prevent values of this type from being moved to a different task. + /// + /// The `fd_install` and `put_unused_fd` functions assume that the value of `current` is + /// unchanged since the call to `get_unused_fd_flags`. By adding this marker to this type, we + /// prevent it from being moved across task boundaries, which ensures that `current` does not + /// change while this value exists. + _not_send: NotThreadSafe, +} + +impl FileDescriptorReservation { + /// Creates a new file descriptor reservation. + pub fn get_unused_fd_flags(flags: u32) -> Result { + // SAFETY: FFI call, there are no safety requirements on `flags`. + let fd: i32 = unsafe { bindings::get_unused_fd_flags(flags) }; + if fd < 0 { + return Err(Error::from_errno(fd)); + } + Ok(Self { + fd: fd as u32, + _not_send: PhantomData, + }) + } + + /// Returns the file descriptor number that was reserved. + pub fn reserved_fd(&self) -> u32 { + self.fd + } + + /// Commits the reservation. + /// + /// The previously reserved file descriptor is bound to `file`. This method consumes the + /// [`FileDescriptorReservation`], so it will not be usable after this call. + pub fn fd_install(self, file: ARef) { + // SAFETY: `self.fd` was previously returned by `get_unused_fd_flags`, and `file.ptr` is + // guaranteed to have an owned ref count by its type invariants. + unsafe { bindings::fd_install(self.fd, file.0.get()) }; + + // `fd_install` consumes both the file descriptor and the file reference, so we cannot run + // the destructors. + core::mem::forget(self); + core::mem::forget(file); + } +} + +impl Drop for FileDescriptorReservation { + fn drop(&mut self) { + // SAFETY: `self.fd` was returned by a previous call to `get_unused_fd_flags`. + unsafe { bindings::put_unused_fd(self.fd) }; + } +} + /// Represents the `EBADF` error code. /// /// Used for methods that can only fail with `EBADF`. diff --git a/rust/kernel/types.rs b/rust/kernel/types.rs index d849e1979ac707..c0940af1239b1b 100644 --- a/rust/kernel/types.rs +++ b/rust/kernel/types.rs @@ -432,3 +432,13 @@ pub enum Either { /// Constructs an instance of [`Either`] containing a value of type `R`. Right(R), } + +/// Zero-sized type to mark types not [`Send`]. +/// +/// Add this type as a field to your struct if your type should not be sent to a different task. +/// Since [`Send`] is an auto trait, adding a single field that is `!Send` will ensure that the +/// whole type is `!Send`. +/// +/// If a type is `!Send` it is impossible to give control over an instance of the type to another +/// task. This is useful when a type stores task-local information for example file descriptors. +pub type NotThreadSafe = PhantomData<*mut ()>; From e98a9c75f288acb699d2c835d8e93b1c93e14cf5 Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Wed, 6 Dec 2023 11:59:50 +0000 Subject: [PATCH 06/30] rust: file: add `Kuid` wrapper Adds a wrapper around `kuid_t` called `Kuid`. This allows us to define various operations on kuids such as equality and current_euid. It also lets us provide conversions from kuid into userspace values. Rust Binder needs these operations because it needs to compare kuids for equality, and it needs to tell userspace about the pid and uid of incoming transactions. Signed-off-by: Alice Ryhl Link: https://lore.kernel.org/r/20231206-alice-file-v2-5-af617c0d9d94@google.com --- rust/bindings/bindings_helper.h | 1 + rust/helpers.c | 45 ++++++++++++++++++++++ rust/kernel/cred.rs | 5 ++- rust/kernel/task.rs | 68 ++++++++++++++++++++++++++++++++- 4 files changed, 116 insertions(+), 3 deletions(-) diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h index b5678c8b6f58f0..71c52d234a24ec 100644 --- a/rust/bindings/bindings_helper.h +++ b/rust/bindings/bindings_helper.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include diff --git a/rust/helpers.c b/rust/helpers.c index fd633d9db79a38..58e3a9dff3499d 100644 --- a/rust/helpers.c +++ b/rust/helpers.c @@ -142,6 +142,51 @@ void rust_helper_put_task_struct(struct task_struct *t) } EXPORT_SYMBOL_GPL(rust_helper_put_task_struct); +kuid_t rust_helper_task_uid(struct task_struct *task) +{ + return task_uid(task); +} +EXPORT_SYMBOL_GPL(rust_helper_task_uid); + +kuid_t rust_helper_task_euid(struct task_struct *task) +{ + return task_euid(task); +} +EXPORT_SYMBOL_GPL(rust_helper_task_euid); + +#ifndef CONFIG_USER_NS +uid_t rust_helper_from_kuid(struct user_namespace *to, kuid_t uid) +{ + return from_kuid(to, uid); +} +EXPORT_SYMBOL_GPL(rust_helper_from_kuid); +#endif /* CONFIG_USER_NS */ + +bool rust_helper_uid_eq(kuid_t left, kuid_t right) +{ + return uid_eq(left, right); +} +EXPORT_SYMBOL_GPL(rust_helper_uid_eq); + +kuid_t rust_helper_current_euid(void) +{ + return current_euid(); +} +EXPORT_SYMBOL_GPL(rust_helper_current_euid); + +struct user_namespace *rust_helper_current_user_ns(void) +{ + return current_user_ns(); +} +EXPORT_SYMBOL_GPL(rust_helper_current_user_ns); + +pid_t rust_helper_task_tgid_nr_ns(struct task_struct *tsk, + struct pid_namespace *ns) +{ + return task_tgid_nr_ns(tsk, ns); +} +EXPORT_SYMBOL_GPL(rust_helper_task_tgid_nr_ns); + struct kunit *rust_helper_kunit_get_current_test(void) { return kunit_get_current_test(); diff --git a/rust/kernel/cred.rs b/rust/kernel/cred.rs index 3794937b52947d..fbc749788bfa3c 100644 --- a/rust/kernel/cred.rs +++ b/rust/kernel/cred.rs @@ -8,6 +8,7 @@ use crate::{ bindings, + task::Kuid, types::{AlwaysRefCounted, Opaque}, }; @@ -52,9 +53,9 @@ impl Credential { } /// Returns the effective UID of the given credential. - pub fn euid(&self) -> bindings::kuid_t { + pub fn euid(&self) -> Kuid { // SAFETY: By the type invariant, we know that `self.0` is valid. - unsafe { (*self.0.get()).euid } + Kuid::from_raw(unsafe { (*self.0.get()).euid }) } } diff --git a/rust/kernel/task.rs b/rust/kernel/task.rs index 9451932d5d8674..d2f2615fe4a1b3 100644 --- a/rust/kernel/task.rs +++ b/rust/kernel/task.rs @@ -5,7 +5,12 @@ //! C header: [`include/linux/sched.h`](srctree/include/linux/sched.h). use crate::{bindings, types::Opaque}; -use core::{marker::PhantomData, ops::Deref, ptr}; +use core::{ + cmp::{Eq, PartialEq}, + marker::PhantomData, + ops::Deref, + ptr, +}; /// Returns the currently running task. #[macro_export] @@ -78,6 +83,12 @@ unsafe impl Sync for Task {} /// The type of process identifiers (PIDs). type Pid = bindings::pid_t; +/// The type of user identifiers (UIDs). +#[derive(Copy, Clone)] +pub struct Kuid { + kuid: bindings::kuid_t, +} + impl Task { /// Returns a task reference for the currently executing task/thread. /// @@ -132,12 +143,32 @@ impl Task { unsafe { *ptr::addr_of!((*self.0.get()).pid) } } + /// Returns the UID of the given task. + pub fn uid(&self) -> Kuid { + // SAFETY: By the type invariant, we know that `self.0` is valid. + Kuid::from_raw(unsafe { bindings::task_uid(self.0.get()) }) + } + + /// Returns the effective UID of the given task. + pub fn euid(&self) -> Kuid { + // SAFETY: By the type invariant, we know that `self.0` is valid. + Kuid::from_raw(unsafe { bindings::task_euid(self.0.get()) }) + } + /// Determines whether the given task has pending signals. pub fn signal_pending(&self) -> bool { // SAFETY: By the type invariant, we know that `self.0` is valid. unsafe { bindings::signal_pending(self.0.get()) != 0 } } + /// Returns the given task's pid in the current pid namespace. + pub fn pid_in_current_ns(&self) -> Pid { + // SAFETY: Calling `task_active_pid_ns` with the current task is always safe. + let namespace = unsafe { bindings::task_active_pid_ns(bindings::get_current()) }; + // SAFETY: We know that `self.0.get()` is valid by the type invariant. + unsafe { bindings::task_tgid_nr_ns(self.0.get(), namespace) } + } + /// Wakes up the task. pub fn wake_up(&self) { // SAFETY: By the type invariant, we know that `self.0.get()` is non-null and valid. @@ -147,6 +178,41 @@ impl Task { } } +impl Kuid { + /// Get the current euid. + pub fn current_euid() -> Kuid { + // SAFETY: Just an FFI call. + Self::from_raw(unsafe { bindings::current_euid() }) + } + + /// Create a `Kuid` given the raw C type. + pub fn from_raw(kuid: bindings::kuid_t) -> Self { + Self { kuid } + } + + /// Turn this kuid into the raw C type. + pub fn into_raw(self) -> bindings::kuid_t { + self.kuid + } + + /// Converts this kernel UID into a userspace UID. + /// + /// Uses the namespace of the current task. + pub fn into_uid_in_current_ns(self) -> bindings::uid_t { + // SAFETY: Just an FFI call. + unsafe { bindings::from_kuid(bindings::current_user_ns(), self.kuid) } + } +} + +impl PartialEq for Kuid { + fn eq(&self, other: &Kuid) -> bool { + // SAFETY: Just an FFI call. + unsafe { bindings::uid_eq(self.kuid, other.kuid) } + } +} + +impl Eq for Kuid {} + // SAFETY: The type invariants guarantee that `Task` is always ref-counted. unsafe impl crate::types::AlwaysRefCounted for Task { fn inc_ref(&self) { From befd1319adc48ef378b038f6ba2731a9f33bfc7e Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Wed, 6 Dec 2023 11:59:51 +0000 Subject: [PATCH 07/30] rust: file: add `DeferredFdCloser` To close an fd from kernel space, we could call `ksys_close`. However, if we do this to an fd that is held using `fdget`, then we may trigger a use-after-free. Introduce a helper that can be used to close an fd even if the fd is currently held with `fdget`. This is done by grabbing an extra refcount to the file and dropping it in a task work once we return to userspace. This is necessary for Rust Binder because otherwise the user might try to have Binder close its fd for /dev/binder, which would cause problems as this happens inside an ioctl on /dev/binder, and ioctls hold the fd using `fdget`. Additional motivation can be found in commit 80cd795630d6 ("binder: fix use-after-free due to ksys_close() during fdget()") and in the comments on `binder_do_fd_close`. If there is some way to detect whether an fd is currently held with `fdget`, then this could be optimized to skip the allocation and task work when this is not the case. Another possible optimization would be to combine several fds into a single task work, since this is used with fd arrays that might hold several fds. That said, it might not be necessary to optimize it, because Rust Binder has two ways to send fds: BINDER_TYPE_FD and BINDER_TYPE_FDA. With BINDER_TYPE_FD, it is userspace's responsibility to close the fd, so this mechanism is used only by BINDER_TYPE_FDA, but fd arrays are used rarely these days. Signed-off-by: Alice Ryhl Link: https://lore.kernel.org/r/20231206-alice-file-v2-6-af617c0d9d94@google.com [boqun: Resolve conflicts against net-next] --- rust/bindings/bindings_helper.h | 2 + rust/helpers.c | 8 ++ rust/kernel/file.rs | 157 +++++++++++++++++++++++++++++++- 3 files changed, 166 insertions(+), 1 deletion(-) diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h index 71c52d234a24ec..b5c22bad951d3a 100644 --- a/rust/bindings/bindings_helper.h +++ b/rust/bindings/bindings_helper.h @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -20,6 +21,7 @@ #include #include #include +#include #include /* `bindgen` gets confused at certain things. */ diff --git a/rust/helpers.c b/rust/helpers.c index 58e3a9dff3499d..d146bbf25aec4a 100644 --- a/rust/helpers.c +++ b/rust/helpers.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -243,6 +244,13 @@ void rust_helper_security_release_secctx(char *secdata, u32 seclen) EXPORT_SYMBOL_GPL(rust_helper_security_release_secctx); #endif +void rust_helper_init_task_work(struct callback_head *twork, + task_work_func_t func) +{ + init_task_work(twork, func); +} +EXPORT_SYMBOL_GPL(rust_helper_init_task_work); + /* * `bindgen` binds the C `size_t` type as the Rust `usize` type, so we can * use it in contexts where Rust expects a `usize` like slice (array) indices. diff --git a/rust/kernel/file.rs b/rust/kernel/file.rs index 2d036d4636a0fc..eba96af4bdf76c 100644 --- a/rust/kernel/file.rs +++ b/rust/kernel/file.rs @@ -11,7 +11,8 @@ use crate::{ error::{code::*, Error, Result}, types::{ARef, AlwaysRefCounted, NotThreadSafe, Opaque}, }; -use core::{marker::PhantomData, ptr}; +use alloc::boxed::Box; +use core::{alloc::AllocError, marker::PhantomData, mem, ptr}; /// Flags associated with a [`File`]. pub mod flags { @@ -257,6 +258,160 @@ impl Drop for FileDescriptorReservation { } } +/// Helper used for closing file descriptors in a way that is safe even if the file is currently +/// held using `fdget`. +/// +/// Additional motivation can be found in commit 80cd795630d6 ("binder: fix use-after-free due to +/// ksys_close() during fdget()") and in the comments on `binder_do_fd_close`. +pub struct DeferredFdCloser { + inner: Box, +} + +/// SAFETY: This just holds an allocation with no real content, so there's no safety issue with +/// moving it across threads. +unsafe impl Send for DeferredFdCloser {} +unsafe impl Sync for DeferredFdCloser {} + +#[repr(C)] +struct DeferredFdCloserInner { + twork: mem::MaybeUninit, + file: *mut bindings::file, +} + +impl DeferredFdCloser { + /// Create a new [`DeferredFdCloser`]. + pub fn new() -> Result { + Ok(Self { + inner: Box::try_new(DeferredFdCloserInner { + twork: mem::MaybeUninit::uninit(), + file: core::ptr::null_mut(), + })?, + }) + } + + /// Schedule a task work that closes the file descriptor when this task returns to userspace. + /// + /// Fails if this is called from a context where we cannot run work when returning to + /// userspace. (E.g., from a kthread.) + pub fn close_fd(self, fd: u32) -> Result<(), DeferredFdCloseError> { + use bindings::task_work_notify_mode_TWA_RESUME as TWA_RESUME; + + // In this method, we schedule the task work before closing the file. This is because + // scheduling a task work is fallible, and we need to know whether it will fail before we + // attempt to close the file. + + // SAFETY: Getting a pointer to current is always safe. + let current = unsafe { bindings::get_current() }; + + // SAFETY: Accessing the `flags` field of `current` is always safe. + let is_kthread = (unsafe { (*current).flags } & bindings::PF_KTHREAD) != 0; + if is_kthread { + return Err(DeferredFdCloseError::TaskWorkUnavailable); + } + + // This disables the destructor of the box, so the allocation is not cleaned up + // automatically below. + let inner = Box::into_raw(self.inner); + + // The `callback_head` field is first in the struct, so this cast correctly gives us a + // pointer to the field. + let callback_head = inner.cast::(); + // SAFETY: This pointer offset operation does not go out-of-bounds. + let file_field = unsafe { core::ptr::addr_of_mut!((*inner).file) }; + + // SAFETY: The `callback_head` pointer is compatible with the `do_close_fd` method. + unsafe { bindings::init_task_work(callback_head, Some(Self::do_close_fd)) }; + // SAFETY: The `callback_head` pointer points at a valid and fully initialized task work + // that is ready to be scheduled. + // + // If the task work gets scheduled as-is, then it will be a no-op. However, we will update + // the file pointer below to tell it which file to fput. + let res = unsafe { bindings::task_work_add(current, callback_head, TWA_RESUME) }; + + if res != 0 { + // SAFETY: Scheduling the task work failed, so we still have ownership of the box, so + // we may destroy it. + unsafe { drop(Box::from_raw(inner)) }; + + return Err(DeferredFdCloseError::TaskWorkUnavailable); + } + + // SAFETY: Just an FFI call. This is safe no matter what `fd` is. + let file = unsafe { bindings::close_fd_get_file(fd) }; + if file.is_null() { + // We don't clean up the task work since that might be expensive if the task work queue + // is long. Just let it execute and let it clean up for itself. + return Err(DeferredFdCloseError::BadFd); + } + + // SAFETY: The `file` pointer points at a valid file. + unsafe { bindings::get_file(file) }; + + // SAFETY: Due to the above `get_file`, even if the current task holds an `fdget` to + // this file right now, the refcount will not drop to zero until after it is released + // with `fdput`. This is because when using `fdget`, you must always use `fdput` before + // returning to userspace, and our task work runs after any `fdget` users have returned + // to userspace. + // + // Note: fl_owner_t is currently a void pointer. + unsafe { bindings::filp_close(file, (*current).files as bindings::fl_owner_t) }; + + // We update the file pointer that the task work is supposed to fput. + // + // SAFETY: Task works are executed on the current thread once we return to userspace, so + // this write is guaranteed to happen before `do_close_fd` is called, which means that a + // race is not possible here. + // + // It's okay to pass this pointer to the task work, since we just acquired a refcount with + // the previous call to `get_file`. Furthermore, the refcount will not drop to zero during + // an `fdget` call, since we defer the `fput` until after returning to userspace. + unsafe { *file_field = file }; + + Ok(()) + } + + // SAFETY: This function is an implementation detail of `close_fd`, so its safety comments + // should be read in extension of that method. + unsafe extern "C" fn do_close_fd(inner: *mut bindings::callback_head) { + // SAFETY: In `close_fd` we use this method together with a pointer that originates from a + // `Box`, and we have just been given ownership of that allocation. + let inner = unsafe { Box::from_raw(inner as *mut DeferredFdCloserInner) }; + if !inner.file.is_null() { + // SAFETY: This drops a refcount we acquired in `close_fd`. Since this callback runs in + // a task work after we return to userspace, it is guaranteed that the current thread + // doesn't hold this file with `fdget`, as `fdget` must be released before returning to + // userspace. + unsafe { bindings::fput(inner.file) }; + } + // Free the allocation. + drop(inner); + } +} + +/// Represents a failure to close an fd in a deferred manner. +#[derive(Copy, Clone, Eq, PartialEq)] +pub enum DeferredFdCloseError { + /// Closing the fd failed because we were unable to schedule a task work. + TaskWorkUnavailable, + /// Closing the fd failed because the fd does not exist. + BadFd, +} + +impl From for Error { + fn from(err: DeferredFdCloseError) -> Error { + match err { + DeferredFdCloseError::TaskWorkUnavailable => ESRCH, + DeferredFdCloseError::BadFd => EBADF, + } + } +} + +impl core::fmt::Debug for DeferredFdCloseError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + Error::from(*self).fmt(f) + } +} + /// Represents the `EBADF` error code. /// /// Used for methods that can only fail with `EBADF`. From 0d0153373af9ecd5490820790057f50e0885518f Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Wed, 6 Dec 2023 11:59:52 +0000 Subject: [PATCH 08/30] rust: file: add abstraction for `poll_table` The existing `CondVar` abstraction is a wrapper around `wait_list`, but it does not support all use-cases of the C `wait_list` type. To be specific, a `CondVar` cannot be registered with a `struct poll_table`. This limitation has the advantage that you do not need to call `synchronize_rcu` when destroying a `CondVar`. However, we need the ability to register a `poll_table` with a `wait_list` in Rust Binder. To enable this, introduce a type called `PollCondVar`, which is like `CondVar` except that you can register a `poll_table`. We also introduce `PollTable`, which is a safe wrapper around `poll_table` that is intended to be used with `PollCondVar`. The destructor of `PollCondVar` unconditionally calls `synchronize_rcu` to ensure that the removal of epoll waiters has fully completed before the `wait_list` is destroyed. That said, `synchronize_rcu` is rather expensive and is not needed in all cases: If we have never registered a `poll_table` with the `wait_list`, then we don't need to call `synchronize_rcu`. (And this is a common case in Binder - not all processes use Binder with epoll.) The current implementation does not account for this, but if we find that it is necessary to improve this, a future patch could change store a boolean next to the `wait_list` to keep track of whether a `poll_table` has ever been registered. Signed-off-by: Alice Ryhl Link: https://lore.kernel.org/r/20231206-alice-file-v2-7-af617c0d9d94@google.com [ boqun: Removes unused POLLFREE definition ] --- rust/bindings/bindings_helper.h | 1 + rust/kernel/sync.rs | 1 + rust/kernel/sync/poll.rs | 103 ++++++++++++++++++++++++++++++++ 3 files changed, 105 insertions(+) create mode 100644 rust/kernel/sync/poll.rs diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h index b5c22bad951d3a..39680883768144 100644 --- a/rust/bindings/bindings_helper.h +++ b/rust/bindings/bindings_helper.h @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include diff --git a/rust/kernel/sync.rs b/rust/kernel/sync.rs index d219ee518eff15..84726f80c4069c 100644 --- a/rust/kernel/sync.rs +++ b/rust/kernel/sync.rs @@ -11,6 +11,7 @@ mod arc; mod condvar; pub mod lock; mod locked_by; +pub mod poll; pub use arc::{Arc, ArcBorrow, UniqueArc}; pub use condvar::CondVar; diff --git a/rust/kernel/sync/poll.rs b/rust/kernel/sync/poll.rs new file mode 100644 index 00000000000000..e1dded9b7b9d69 --- /dev/null +++ b/rust/kernel/sync/poll.rs @@ -0,0 +1,103 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Utilities for working with `struct poll_table`. + +use crate::{ + bindings, + file::File, + prelude::*, + sync::{CondVar, LockClassKey}, + types::Opaque, +}; +use core::ops::Deref; + +/// Creates a [`PollCondVar`] initialiser with the given name and a newly-created lock class. +#[macro_export] +macro_rules! new_poll_condvar { + ($($name:literal)?) => { + $crate::file::PollCondVar::new($crate::optional_name!($($name)?), $crate::static_lock_class!()) + }; +} + +/// Wraps the kernel's `struct poll_table`. +#[repr(transparent)] +pub struct PollTable(Opaque); + +impl PollTable { + /// Creates a reference to a [`PollTable`] from a valid pointer. + /// + /// # Safety + /// + /// The caller must ensure that for the duration of 'a, the pointer will point at a valid poll + /// table, and that it is only accessed via the returned reference. + pub unsafe fn from_ptr<'a>(ptr: *mut bindings::poll_table) -> &'a mut PollTable { + // SAFETY: The safety requirements guarantee the validity of the dereference, while the + // `PollTable` type being transparent makes the cast ok. + unsafe { &mut *ptr.cast() } + } + + fn get_qproc(&self) -> bindings::poll_queue_proc { + let ptr = self.0.get(); + // SAFETY: The `ptr` is valid because it originates from a reference, and the `_qproc` + // field is not modified concurrently with this call since we have an immutable reference. + unsafe { (*ptr)._qproc } + } + + /// Register this [`PollTable`] with the provided [`PollCondVar`], so that it can be notified + /// using the condition variable. + pub fn register_wait(&mut self, file: &File, cv: &PollCondVar) { + if let Some(qproc) = self.get_qproc() { + // SAFETY: The pointers to `self` and `file` are valid because they are references. + // + // Before the wait list is destroyed, the destructor of `PollCondVar` will clear + // everything in the wait list, so the wait list is not used after it is freed. + unsafe { qproc(file.as_ptr() as _, cv.wait_list.get(), self.0.get()) }; + } + } +} + +/// A wrapper around [`CondVar`] that makes it usable with [`PollTable`]. +/// +/// # Invariant +/// +/// If `needs_synchronize_rcu` is false, then there is nothing registered with `register_wait`. +/// +/// [`CondVar`]: crate::sync::CondVar +#[pin_data(PinnedDrop)] +pub struct PollCondVar { + #[pin] + inner: CondVar, +} + +impl PollCondVar { + /// Constructs a new condvar initialiser. + pub fn new(name: &'static CStr, key: &'static LockClassKey) -> impl PinInit { + pin_init!(Self { + inner <- CondVar::new(name, key), + }) + } +} + +// Make the `CondVar` methods callable on `PollCondVar`. +impl Deref for PollCondVar { + type Target = CondVar; + + fn deref(&self) -> &CondVar { + &self.inner + } +} + +#[pinned_drop] +impl PinnedDrop for PollCondVar { + fn drop(self: Pin<&mut Self>) { + // Clear anything registered using `register_wait`. + // + // SAFETY: The pointer points at a valid wait list. + unsafe { bindings::__wake_up_pollfree(self.inner.wait_list.get()) }; + + // Wait for epoll items to be properly removed. + // + // SAFETY: Just an FFI call. + unsafe { bindings::synchronize_rcu() }; + } +} From ba17aad29ad45c9b829c6e5d63c2a8d22149352d Mon Sep 17 00:00:00 2001 From: Benno Lossin Date: Wed, 13 Dec 2023 22:08:32 +0000 Subject: [PATCH 09/30] rust: macros: add `decl_generics` to `parse_generics()` The generic parameters on a type definition can specify default values. Currently `parse_generics()` cannot handle this though. For example when parsing the following generics: The `impl_generics` will be set to `T: Clone, const N: usize = 0` and `ty_generics` will be set to `T, N`. Now using the `impl_generics` on an impl block: impl<$($impl_generics)*> Foo {} will result in invalid Rust code, because default values are only available on type definitions. Therefore add parsing support for generic parameter default values using a new kind of generics called `decl_generics` and change the old behavior of `impl_generics` to not contain the generic parameter default values. Now `Generics` has three fields: - `impl_generics`: the generics with bounds (e.g. `T: Clone, const N: usize`) - `decl_generics`: the generics with bounds and default values (e.g. `T: Clone, const N: usize = 0`) - `ty_generics`: contains the generics without bounds and without default values (e.g. `T, N`) `impl_generics` is designed to be used on `impl<$impl_generics>`, `decl_generics` for the type definition, so `struct Foo<$decl_generics>` and `ty_generics` whenever you use the type, so `Foo<$ty_generics>`. Here is an example that uses all three different types of generics: let (Generics { decl_generics, impl_generics, ty_generics }, rest) = parse_generics(input); quote! { struct Foo<$($decl_generics)*> { // ... } impl<$impl_generics> Foo<$ty_generics> { fn foo() { // ... } } } The next commit contains a fix to the `#[pin_data]` macro making it compatible with generic parameter default values by relying on this new behavior. Signed-off-by: Benno Lossin Reviewed-by: Martin Rodriguez Reboredo Link: https://lore.kernel.org/r/20231213220447.3613500-1-benno.lossin@proton.me --- rust/macros/helpers.rs | 122 ++++++++++++++++++++++++++++++---------- rust/macros/pin_data.rs | 1 + rust/macros/zeroable.rs | 1 + 3 files changed, 94 insertions(+), 30 deletions(-) diff --git a/rust/macros/helpers.rs b/rust/macros/helpers.rs index afb0f2e3a36a9d..3f50a5c847c8cc 100644 --- a/rust/macros/helpers.rs +++ b/rust/macros/helpers.rs @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 -use proc_macro::{token_stream, Group, Punct, Spacing, TokenStream, TokenTree}; +use proc_macro::{token_stream, Group, TokenStream, TokenTree}; pub(crate) fn try_ident(it: &mut token_stream::IntoIter) -> Option { if let Some(TokenTree::Ident(ident)) = it.next() { @@ -70,8 +70,40 @@ pub(crate) fn expect_end(it: &mut token_stream::IntoIter) { } } +/// Parsed generics. +/// +/// See the field documentation for an explanation what each of the fields represents. +/// +/// # Examples +/// +/// ```rust,ignore +/// # let input = todo!(); +/// let (Generics { decl_generics, impl_generics, ty_generics }, rest) = parse_generics(input); +/// quote! { +/// struct Foo<$($decl_generics)*> { +/// // ... +/// } +/// +/// impl<$impl_generics> Foo<$ty_generics> { +/// fn foo() { +/// // ... +/// } +/// } +/// } +/// ``` pub(crate) struct Generics { + /// The generics with bounds and default values (e.g. `T: Clone, const N: usize = 0`). + /// + /// Use this on type definitions e.g. `struct Foo<$decl_generics> ...` (or `union`/`enum`). + pub(crate) decl_generics: Vec, + /// The generics with bounds (e.g. `T: Clone, const N: usize`). + /// + /// Use this on `impl` blocks e.g. `impl<$impl_generics> Trait for ...`. pub(crate) impl_generics: Vec, + /// The generics without bounds and without default values (e.g. `T, N`). + /// + /// Use this when you use the type that is declared with these generics e.g. + /// `Foo<$ty_generics>`. pub(crate) ty_generics: Vec, } @@ -81,6 +113,8 @@ pub(crate) struct Generics { pub(crate) fn parse_generics(input: TokenStream) -> (Generics, Vec) { // `impl_generics`, the declared generics with their bounds. let mut impl_generics = vec![]; + // The generics with bounds and default values. + let mut decl_generics = vec![]; // Only the names of the generics, without any bounds. let mut ty_generics = vec![]; // Tokens not related to the generics e.g. the `where` token and definition. @@ -90,10 +124,17 @@ pub(crate) fn parse_generics(input: TokenStream) -> (Generics, Vec) { let mut toks = input.into_iter(); // If we are at the beginning of a generic parameter. let mut at_start = true; - for tt in &mut toks { + let mut skip_until_comma = false; + while let Some(tt) = toks.next() { + if nesting == 1 && matches!(&tt, TokenTree::Punct(p) if p.as_char() == '>') { + // Found the end of the generics. + break; + } else if nesting >= 1 { + decl_generics.push(tt.clone()); + } match tt.clone() { TokenTree::Punct(p) if p.as_char() == '<' => { - if nesting >= 1 { + if nesting >= 1 && !skip_until_comma { // This is inside of the generics and part of some bound. impl_generics.push(tt); } @@ -105,49 +146,70 @@ pub(crate) fn parse_generics(input: TokenStream) -> (Generics, Vec) { break; } else { nesting -= 1; - if nesting >= 1 { + if nesting >= 1 && !skip_until_comma { // We are still inside of the generics and part of some bound. impl_generics.push(tt); } - if nesting == 0 { - break; - } } } - tt => { + TokenTree::Punct(p) if skip_until_comma && p.as_char() == ',' => { if nesting == 1 { - // Here depending on the token, it might be a generic variable name. - match &tt { - // Ignore const. - TokenTree::Ident(i) if i.to_string() == "const" => {} - TokenTree::Ident(_) if at_start => { - ty_generics.push(tt.clone()); - // We also already push the `,` token, this makes it easier to append - // generics. - ty_generics.push(TokenTree::Punct(Punct::new(',', Spacing::Alone))); - at_start = false; - } - TokenTree::Punct(p) if p.as_char() == ',' => at_start = true, - // Lifetimes begin with `'`. - TokenTree::Punct(p) if p.as_char() == '\'' && at_start => { - ty_generics.push(tt.clone()); - } - _ => {} - } + impl_generics.push(TokenTree::Punct(p.clone())); + ty_generics.push(TokenTree::Punct(p)); + skip_until_comma = false; } - if nesting >= 1 { - impl_generics.push(tt); - } else if nesting == 0 { + } + tt if !skip_until_comma => { + match nesting { // If we haven't entered the generics yet, we still want to keep these tokens. - rest.push(tt); + 0 => rest.push(tt), + 1 => { + // Here depending on the token, it might be a generic variable name. + match tt { + TokenTree::Ident(i) if at_start && i.to_string() == "const" => { + let Some(name) = toks.next() else { + // Parsing error. + break; + }; + impl_generics.push(TokenTree::Ident(i)); + impl_generics.push(name.clone()); + ty_generics.push(name.clone()); + decl_generics.push(name); + at_start = false; + } + tt @ TokenTree::Ident(_) if at_start => { + impl_generics.push(tt.clone()); + ty_generics.push(tt); + at_start = false; + } + TokenTree::Punct(p) if p.as_char() == ',' => { + impl_generics.push(TokenTree::Punct(p.clone())); + ty_generics.push(TokenTree::Punct(p)); + at_start = true; + } + // Lifetimes begin with `'`. + TokenTree::Punct(p) if p.as_char() == '\'' && at_start => { + ty_generics.push(TokenTree::Punct(p.clone())); + impl_generics.push(TokenTree::Punct(p)); + } + // Generics can have default values, we skip these. + TokenTree::Punct(p) if p.as_char() == '=' => { + skip_until_comma = true; + } + tt => impl_generics.push(tt), + } + } + _ => impl_generics.push(tt), } } + _ => {} } } rest.extend(toks); ( Generics { impl_generics, + decl_generics, ty_generics, }, rest, diff --git a/rust/macros/pin_data.rs b/rust/macros/pin_data.rs index 6d58cfda9872e6..022e68e9720d45 100644 --- a/rust/macros/pin_data.rs +++ b/rust/macros/pin_data.rs @@ -10,6 +10,7 @@ pub(crate) fn pin_data(args: TokenStream, input: TokenStream) -> TokenStream { let ( Generics { impl_generics, + decl_generics: _, ty_generics, }, rest, diff --git a/rust/macros/zeroable.rs b/rust/macros/zeroable.rs index 0d605c46ab3b40..cfee2cec18d5f5 100644 --- a/rust/macros/zeroable.rs +++ b/rust/macros/zeroable.rs @@ -7,6 +7,7 @@ pub(crate) fn derive(input: TokenStream) -> TokenStream { let ( Generics { impl_generics, + decl_generics: _, ty_generics, }, mut rest, From 884a0c6b1fba2c8ba4fa34a5c86b8dfe64b0b3a4 Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Mon, 10 Jul 2023 07:46:42 +0000 Subject: [PATCH 10/30] rust: add improved version of `ForeignOwnable::borrow_mut` Previously, the `ForeignOwnable` trait had a method called `borrow_mut` that was intended to provide mutable access to the inner value. However, the method accidentally made it possible to change the address of the object being modified, which usually isn't what we want. (And when we want that, it can be done by calling `from_foreign` and `into_foreign`, like how the old `borrow_mut` was implemented.) In this patch, we introduce an alternate definition of `borrow_mut` that solves the previous problem. Conceptually, given a pointer type `P` that implements `ForeignOwnable`, the `borrow_mut` method gives you the same kind of access as an `&mut P` would, except that it does not let you change the pointer `P` itself. This is analogous to how the existing `borrow` method provides the same kind of access to the inner value as an `&P`. Note that for types like `Arc`, having an `&mut Arc` only gives you immutable access to the inner `T`. This is because mutable references assume exclusive access, but there might be other handles to the same reference counted value, so the access isn't exclusive. The `Arc` type implements this by making `borrow_mut` return the same type as `borrow`. Signed-off-by: Alice Ryhl Reviewed-by: Benno Lossin Reviewed-by: Martin Rodriguez Reboredo Reviewed-by: Boqun Feng Link: https://lore.kernel.org/r/20230710074642.683831-1-aliceryhl@google.com --- rust/kernel/sync/arc.rs | 25 ++++++++---- rust/kernel/types.rs | 87 +++++++++++++++++++++++++++++++---------- 2 files changed, 83 insertions(+), 29 deletions(-) diff --git a/rust/kernel/sync/arc.rs b/rust/kernel/sync/arc.rs index 77cdbcf7bd2eba..97e7146a7b1fa0 100644 --- a/rust/kernel/sync/arc.rs +++ b/rust/kernel/sync/arc.rs @@ -277,26 +277,35 @@ impl Arc { impl ForeignOwnable for Arc { type Borrowed<'a> = ArcBorrow<'a, T>; + // Mutable access to the `Arc` does not give any extra abilities over + // immutable access. + type BorrowedMut<'a> = ArcBorrow<'a, T>; fn into_foreign(self) -> *const core::ffi::c_void { ManuallyDrop::new(self).ptr.as_ptr() as _ } + unsafe fn from_foreign(ptr: *const core::ffi::c_void) -> Self { + // SAFETY: By the safety requirement of this function, we know that `ptr` came from + // a previous call to `Arc::into_foreign`, which guarantees that `ptr` is valid and + // holds a reference count increment that is transferrable to us. + unsafe { Self::from_inner(NonNull::new_unchecked(ptr as _)) } + } + unsafe fn borrow<'a>(ptr: *const core::ffi::c_void) -> ArcBorrow<'a, T> { // SAFETY: By the safety requirement of this function, we know that `ptr` came from // a previous call to `Arc::into_foreign`. - let inner = NonNull::new(ptr as *mut ArcInner).unwrap(); + let inner = unsafe { NonNull::new_unchecked(ptr as *mut ArcInner) }; - // SAFETY: The safety requirements of `from_foreign` ensure that the object remains alive - // for the lifetime of the returned value. + // SAFETY: The safety requirements ensure that we will not give up our + // foreign-owned refcount while the `ArcBorrow` is still live. unsafe { ArcBorrow::new(inner) } } - unsafe fn from_foreign(ptr: *const core::ffi::c_void) -> Self { - // SAFETY: By the safety requirement of this function, we know that `ptr` came from - // a previous call to `Arc::into_foreign`, which guarantees that `ptr` is valid and - // holds a reference count increment that is transferrable to us. - unsafe { Self::from_inner(NonNull::new(ptr as _).unwrap()) } + unsafe fn borrow_mut<'a>(ptr: *const core::ffi::c_void) -> ArcBorrow<'a, T> { + // SAFETY: The safety requirements for `borrow_mut` are a superset of the safety + // requirements for `borrow`. + unsafe { Self::borrow(ptr) } } } diff --git a/rust/kernel/types.rs b/rust/kernel/types.rs index fdb778e65d79d3..d849e1979ac707 100644 --- a/rust/kernel/types.rs +++ b/rust/kernel/types.rs @@ -20,66 +20,111 @@ use core::{ /// This trait is meant to be used in cases when Rust objects are stored in C objects and /// eventually "freed" back to Rust. pub trait ForeignOwnable: Sized { - /// Type of values borrowed between calls to [`ForeignOwnable::into_foreign`] and - /// [`ForeignOwnable::from_foreign`]. + /// Type used to immutably borrow a value that is currently foreign-owned. type Borrowed<'a>; + /// Type used to mutably borrow a value that is currently foreign-owned. + type BorrowedMut<'a>; + /// Converts a Rust-owned object to a foreign-owned one. /// /// The foreign representation is a pointer to void. fn into_foreign(self) -> *const core::ffi::c_void; - /// Borrows a foreign-owned object. + /// Converts a foreign-owned object back to a Rust-owned one. + /// + /// # Safety + /// + /// The provided pointer must have been returned by a previous call to [`into_foreign`], and it + /// must not be passed to `from_foreign` more than once. + /// + /// [`into_foreign`]: Self::into_foreign + unsafe fn from_foreign(ptr: *const core::ffi::c_void) -> Self; + + /// Borrows a foreign-owned object immutably. + /// + /// This method provides a way to access a foreign-owned value from Rust immutably. It provides + /// you with exactly the same abilities as an `&Self` when the value is Rust-owned. /// /// # Safety /// - /// `ptr` must have been returned by a previous call to [`ForeignOwnable::into_foreign`] for - /// which a previous matching [`ForeignOwnable::from_foreign`] hasn't been called yet. + /// The provided pointer must have been returned by a previous call to [`into_foreign`], and if + /// the pointer is ever passed to [`from_foreign`], then that call must happen after the end of + /// the lifetime 'a. + /// + /// [`into_foreign`]: Self::into_foreign + /// [`from_foreign`]: Self::from_foreign unsafe fn borrow<'a>(ptr: *const core::ffi::c_void) -> Self::Borrowed<'a>; - /// Converts a foreign-owned object back to a Rust-owned one. + /// Borrows a foreign-owned object mutably. + /// + /// This method provides a way to access a foreign-owned value from Rust mutably. It provides + /// you with exactly the same abilities as an `&mut Self` when the value is Rust-owned, except + /// that this method does not let you swap the foreign-owned object for another. (That is, it + /// does not let you change the address of the void pointer that the foreign code is storing.) + /// + /// Note that for types like [`Arc`], an `&mut Arc` only gives you immutable access to the + /// inner value, so this method also only provides immutable access in that case. + /// + /// In the case of `Box`, this method gives you the ability to modify the inner `T`, but it + /// does not let you change the box itself. That is, you cannot change which allocation the box + /// points at. /// /// # Safety /// - /// `ptr` must have been returned by a previous call to [`ForeignOwnable::into_foreign`] for - /// which a previous matching [`ForeignOwnable::from_foreign`] hasn't been called yet. - /// Additionally, all instances (if any) of values returned by [`ForeignOwnable::borrow`] for - /// this object must have been dropped. - unsafe fn from_foreign(ptr: *const core::ffi::c_void) -> Self; + /// The provided pointer must have been returned by a previous call to [`into_foreign`], and if + /// the pointer is ever passed to [`from_foreign`], then that call must happen after the end of + /// the lifetime 'a. + /// + /// The lifetime 'a must not overlap with the lifetime of any other call to [`borrow`] or + /// `borrow_mut` on the same object. + /// + /// [`into_foreign`]: Self::into_foreign + /// [`from_foreign`]: Self::from_foreign + /// [`borrow`]: Self::borrow + /// [`Arc`]: crate::sync::Arc + unsafe fn borrow_mut<'a>(ptr: *const core::ffi::c_void) -> Self::BorrowedMut<'a>; } impl ForeignOwnable for Box { type Borrowed<'a> = &'a T; + type BorrowedMut<'a> = &'a mut T; fn into_foreign(self) -> *const core::ffi::c_void { Box::into_raw(self) as _ } - unsafe fn borrow<'a>(ptr: *const core::ffi::c_void) -> &'a T { - // SAFETY: The safety requirements for this function ensure that the object is still alive, - // so it is safe to dereference the raw pointer. - // The safety requirements of `from_foreign` also ensure that the object remains alive for - // the lifetime of the returned value. - unsafe { &*ptr.cast() } - } - unsafe fn from_foreign(ptr: *const core::ffi::c_void) -> Self { // SAFETY: The safety requirements of this function ensure that `ptr` comes from a previous // call to `Self::into_foreign`. unsafe { Box::from_raw(ptr as _) } } + + unsafe fn borrow<'a>(ptr: *const core::ffi::c_void) -> &'a T { + // SAFETY: The safety requirements of this method ensure that the object remains alive and + // immutable for the duration of 'a. + unsafe { &*ptr.cast() } + } + + unsafe fn borrow_mut<'a>(ptr: *const core::ffi::c_void) -> &'a mut T { + // SAFETY: The safety requirements of this method ensure that the pointer is valid and that + // nothing else will access the value for the duration of 'a. + unsafe { &mut *ptr.cast_mut().cast() } + } } impl ForeignOwnable for () { type Borrowed<'a> = (); + type BorrowedMut<'a> = (); fn into_foreign(self) -> *const core::ffi::c_void { core::ptr::NonNull::dangling().as_ptr() } - unsafe fn borrow<'a>(_: *const core::ffi::c_void) -> Self::Borrowed<'a> {} - unsafe fn from_foreign(_: *const core::ffi::c_void) -> Self {} + + unsafe fn borrow<'a>(_: *const core::ffi::c_void) -> Self::Borrowed<'a> {} + unsafe fn borrow_mut<'a>(_: *const core::ffi::c_void) -> Self::BorrowedMut<'a> {} } /// Runs a cleanup function/closure when dropped. From 1fd25cc39eb9069df78be992f38ae29ad7748710 Mon Sep 17 00:00:00 2001 From: Benno Lossin Date: Wed, 13 Dec 2023 22:08:53 +0000 Subject: [PATCH 11/30] rust: macros: allow generic parameter default values in `#[pin_data]` Add support for generic parameters defaults in `#[pin_data]` by using the newly introduced `decl_generics` instead of the `impl_generics`. Before this would not compile: #[pin_data] struct Foo { // ... } because it would be expanded to this: struct Foo { // ... } const _: () = { struct __ThePinData { __phantom: ::core::marker::PhantomData) -> Foo>, } impl ::core::clone::Clone for __ThePinData { fn clone(&self) -> Self { *self } } // [...] rest of expansion omitted }; The problem is with the `impl`, since that is invalid Rust syntax. It should not mention the default value at all, since default values only make sense on type definitions. The new `impl_generics` do not contain the default values, thus generating correct Rust code. This is used by the next commit that puts `#[pin_data]` on `kernel::workqueue::Work`. Signed-off-by: Benno Lossin Reviewed-by: Martin Rodriguez Reboredo Reviewed-by: Gary Guo Link: https://lore.kernel.org/r/20231213220447.3613500-2-benno.lossin@proton.me --- rust/kernel/init/macros.rs | 19 ++++++++++++++++++- rust/macros/pin_data.rs | 3 ++- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/rust/kernel/init/macros.rs b/rust/kernel/init/macros.rs index cb6e61b6c50bda..624e9108e3b423 100644 --- a/rust/kernel/init/macros.rs +++ b/rust/kernel/init/macros.rs @@ -538,6 +538,7 @@ macro_rules! __pin_data { ), @impl_generics($($impl_generics:tt)*), @ty_generics($($ty_generics:tt)*), + @decl_generics($($decl_generics:tt)*), @body({ $($fields:tt)* }), ) => { // We now use token munching to iterate through all of the fields. While doing this we @@ -560,6 +561,9 @@ macro_rules! __pin_data { @impl_generics($($impl_generics)*), // The 'ty generics', the generics that will need to be specified on the impl blocks. @ty_generics($($ty_generics)*), + // The 'decl generics', the generics that need to be specified on the struct + // definition. + @decl_generics($($decl_generics)*), // The where clause of any impl block and the declaration. @where($($($whr)*)?), // The remaining fields tokens that need to be processed. @@ -585,6 +589,7 @@ macro_rules! __pin_data { @name($name:ident), @impl_generics($($impl_generics:tt)*), @ty_generics($($ty_generics:tt)*), + @decl_generics($($decl_generics:tt)*), @where($($whr:tt)*), // We found a PhantomPinned field, this should generally be pinned! @fields_munch($field:ident : $($($(::)?core::)?marker::)?PhantomPinned, $($rest:tt)*), @@ -607,6 +612,7 @@ macro_rules! __pin_data { @name($name), @impl_generics($($impl_generics)*), @ty_generics($($ty_generics)*), + @decl_generics($($decl_generics)*), @where($($whr)*), @fields_munch($($rest)*), @pinned($($pinned)* $($accum)* $field: ::core::marker::PhantomPinned,), @@ -623,6 +629,7 @@ macro_rules! __pin_data { @name($name:ident), @impl_generics($($impl_generics:tt)*), @ty_generics($($ty_generics:tt)*), + @decl_generics($($decl_generics:tt)*), @where($($whr:tt)*), // We reached the field declaration. @fields_munch($field:ident : $type:ty, $($rest:tt)*), @@ -640,6 +647,7 @@ macro_rules! __pin_data { @name($name), @impl_generics($($impl_generics)*), @ty_generics($($ty_generics)*), + @decl_generics($($decl_generics)*), @where($($whr)*), @fields_munch($($rest)*), @pinned($($pinned)* $($accum)* $field: $type,), @@ -656,6 +664,7 @@ macro_rules! __pin_data { @name($name:ident), @impl_generics($($impl_generics:tt)*), @ty_generics($($ty_generics:tt)*), + @decl_generics($($decl_generics:tt)*), @where($($whr:tt)*), // We reached the field declaration. @fields_munch($field:ident : $type:ty, $($rest:tt)*), @@ -673,6 +682,7 @@ macro_rules! __pin_data { @name($name), @impl_generics($($impl_generics)*), @ty_generics($($ty_generics)*), + @decl_generics($($decl_generics)*), @where($($whr)*), @fields_munch($($rest)*), @pinned($($pinned)*), @@ -689,6 +699,7 @@ macro_rules! __pin_data { @name($name:ident), @impl_generics($($impl_generics:tt)*), @ty_generics($($ty_generics:tt)*), + @decl_generics($($decl_generics:tt)*), @where($($whr:tt)*), // We found the `#[pin]` attr. @fields_munch(#[pin] $($rest:tt)*), @@ -705,6 +716,7 @@ macro_rules! __pin_data { @name($name), @impl_generics($($impl_generics)*), @ty_generics($($ty_generics)*), + @decl_generics($($decl_generics)*), @where($($whr)*), @fields_munch($($rest)*), // We do not include `#[pin]` in the list of attributes, since it is not actually an @@ -724,6 +736,7 @@ macro_rules! __pin_data { @name($name:ident), @impl_generics($($impl_generics:tt)*), @ty_generics($($ty_generics:tt)*), + @decl_generics($($decl_generics:tt)*), @where($($whr:tt)*), // We reached the field declaration with visibility, for simplicity we only munch the // visibility and put it into `$accum`. @@ -741,6 +754,7 @@ macro_rules! __pin_data { @name($name), @impl_generics($($impl_generics)*), @ty_generics($($ty_generics)*), + @decl_generics($($decl_generics)*), @where($($whr)*), @fields_munch($field $($rest)*), @pinned($($pinned)*), @@ -757,6 +771,7 @@ macro_rules! __pin_data { @name($name:ident), @impl_generics($($impl_generics:tt)*), @ty_generics($($ty_generics:tt)*), + @decl_generics($($decl_generics:tt)*), @where($($whr:tt)*), // Some other attribute, just put it into `$accum`. @fields_munch(#[$($attr:tt)*] $($rest:tt)*), @@ -773,6 +788,7 @@ macro_rules! __pin_data { @name($name), @impl_generics($($impl_generics)*), @ty_generics($($ty_generics)*), + @decl_generics($($decl_generics)*), @where($($whr)*), @fields_munch($($rest)*), @pinned($($pinned)*), @@ -789,6 +805,7 @@ macro_rules! __pin_data { @name($name:ident), @impl_generics($($impl_generics:tt)*), @ty_generics($($ty_generics:tt)*), + @decl_generics($($decl_generics:tt)*), @where($($whr:tt)*), // We reached the end of the fields, plus an optional additional comma, since we added one // before and the user is also allowed to put a trailing comma. @@ -802,7 +819,7 @@ macro_rules! __pin_data { ) => { // Declare the struct with all fields in the correct order. $($struct_attrs)* - $vis struct $name <$($impl_generics)*> + $vis struct $name <$($decl_generics)*> where $($whr)* { $($fields)* diff --git a/rust/macros/pin_data.rs b/rust/macros/pin_data.rs index 022e68e9720d45..1d4a3547c684b3 100644 --- a/rust/macros/pin_data.rs +++ b/rust/macros/pin_data.rs @@ -10,7 +10,7 @@ pub(crate) fn pin_data(args: TokenStream, input: TokenStream) -> TokenStream { let ( Generics { impl_generics, - decl_generics: _, + decl_generics, ty_generics, }, rest, @@ -77,6 +77,7 @@ pub(crate) fn pin_data(args: TokenStream, input: TokenStream) -> TokenStream { @sig(#(#rest)*), @impl_generics(#(#impl_generics)*), @ty_generics(#(#ty_generics)*), + @decl_generics(#(#decl_generics)*), @body(#last), }); quoted.extend(errs); From 7ca8daf16dafe978e4de17c622b8916384b2b70e Mon Sep 17 00:00:00 2001 From: Jamie Cunliffe Date: Fri, 20 Oct 2023 16:50:55 +0100 Subject: [PATCH 12/30] rust: Refactor the build target to allow the use of builtin targets Eventually we want all architectures to be using the target as defined by rustc. However currently some architectures can't do that and are using the target.json specification. This puts in place the foundation to allow the use of the builtin target definition or a target.json specification. Signed-off-by: Jamie Cunliffe Link: https://lore.kernel.org/r/20231020155056.3495121-2-Jamie.Cunliffe@arm.com --- Makefile | 1 - arch/x86/Makefile | 1 + rust/Makefile | 5 ++++- scripts/Makefile | 4 +++- 4 files changed, 8 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index e78ee7db0729b6..45cf497c6715c8 100644 --- a/Makefile +++ b/Makefile @@ -555,7 +555,6 @@ KBUILD_CFLAGS += -fno-strict-aliasing KBUILD_CPPFLAGS := -D__KERNEL__ KBUILD_RUSTFLAGS := $(rust_common_flags) \ - --target=$(objtree)/scripts/target.json \ -Cpanic=abort -Cembed-bitcode=n -Clto=n \ -Cforce-unwind-tables=n -Ccodegen-units=1 \ -Csymbol-mangling-version=v0 \ diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 1a068de12a564f..86d347ff6b3362 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -68,6 +68,7 @@ export BITS # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53383 # KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx +KBUILD_RUSTFLAGS += --target=$(objtree)/scripts/target.json KBUILD_RUSTFLAGS += -Ctarget-feature=-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-avx,-avx2 ifeq ($(CONFIG_X86_KERNEL_IBT),y) diff --git a/rust/Makefile b/rust/Makefile index 9d2a16cc91cb44..916a64c213d6b6 100644 --- a/rust/Makefile +++ b/rust/Makefile @@ -434,8 +434,11 @@ $(obj)/core.o: private skip_clippy = 1 $(obj)/core.o: private skip_flags = -Dunreachable_pub $(obj)/core.o: private rustc_objcopy = $(foreach sym,$(redirect-intrinsics),--redefine-sym $(sym)=__rust$(sym)) $(obj)/core.o: private rustc_target_flags = $(core-cfgs) -$(obj)/core.o: $(RUST_LIB_SRC)/core/src/lib.rs scripts/target.json FORCE +$(obj)/core.o: $(RUST_LIB_SRC)/core/src/lib.rs FORCE $(call if_changed_dep,rustc_library) +ifdef CONFIG_X86_64 +$(obj)/core.o: scripts/target.json +endif $(obj)/compiler_builtins.o: private rustc_objcopy = -w -W '__*' $(obj)/compiler_builtins.o: $(src)/compiler_builtins.rs $(obj)/core.o FORCE diff --git a/scripts/Makefile b/scripts/Makefile index 576cf64be6677c..c85d130a4125bd 100644 --- a/scripts/Makefile +++ b/scripts/Makefile @@ -11,12 +11,14 @@ hostprogs-always-$(CONFIG_MODULE_SIG_FORMAT) += sign-file hostprogs-always-$(CONFIG_SYSTEM_EXTRA_CERTIFICATE) += insert-sys-cert hostprogs-always-$(CONFIG_RUST_KERNEL_DOCTESTS) += rustdoc_test_builder hostprogs-always-$(CONFIG_RUST_KERNEL_DOCTESTS) += rustdoc_test_gen -always-$(CONFIG_RUST) += target.json +ifdef CONFIG_X86_64 +always-$(CONFIG_RUST) += target.json filechk_rust_target = $< < include/config/auto.conf $(obj)/target.json: scripts/generate_rust_target include/config/auto.conf FORCE $(call filechk,rust_target) +endif hostprogs += generate_rust_target generate_rust_target-rust := y From 8f7e3762652feb04a6ac6f204e46098d5e7664fe Mon Sep 17 00:00:00 2001 From: Benno Lossin Date: Wed, 13 Dec 2023 22:09:04 +0000 Subject: [PATCH 13/30] rust: workqueue: add `#[pin_data]` to `Work` The previous two patches made it possible to add `#[pin_data]` on structs with default generic parameter values. This patch makes `Work` use `#[pin_data]` and removes an invocation of `pin_init_from_closure`. This function is intended as a low level manual escape hatch, so it is better to rely on the safe `pin_init!` macro. Signed-off-by: Benno Lossin Reviewed-by: Martin Rodriguez Reboredo Reviewed-by: Gary Guo Link: https://lore.kernel.org/r/20231213220447.3613500-3-benno.lossin@proton.me --- rust/kernel/workqueue.rs | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/rust/kernel/workqueue.rs b/rust/kernel/workqueue.rs index 49839787737671..7f394e682f3feb 100644 --- a/rust/kernel/workqueue.rs +++ b/rust/kernel/workqueue.rs @@ -334,8 +334,10 @@ pub trait WorkItem { /// Wraps the kernel's C `struct work_struct`. /// /// This is a helper type used to associate a `work_struct` with the [`WorkItem`] that uses it. +#[pin_data] #[repr(transparent)] pub struct Work { + #[pin] work: Opaque, _inner: PhantomData, } @@ -357,21 +359,22 @@ impl Work { where T: WorkItem, { - // SAFETY: The `WorkItemPointer` implementation promises that `run` can be used as the work - // item function. - unsafe { - kernel::init::pin_init_from_closure(move |slot| { - let slot = Self::raw_get(slot); - bindings::init_work_with_key( - slot, - Some(T::Pointer::run), - false, - name.as_char_ptr(), - key.as_ptr(), - ); - Ok(()) - }) - } + pin_init!(Self { + work <- Opaque::ffi_init(|slot| { + // SAFETY: The `WorkItemPointer` implementation promises that `run` can be used as + // the work item function. + unsafe { + bindings::init_work_with_key( + slot, + Some(T::Pointer::run), + false, + name.as_char_ptr(), + key.as_ptr(), + ) + } + }), + _inner: PhantomData, + }) } /// Get a pointer to the inner `work_struct`. From df91439794878be31267bd3172bc146a004a0557 Mon Sep 17 00:00:00 2001 From: Jamie Cunliffe Date: Fri, 20 Oct 2023 16:50:56 +0100 Subject: [PATCH 14/30] arm64: rust: Enable Rust support for AArch64 This commit provides the build flags for Rust for AArch64. The core Rust support already in the kernel does the rest. This enables the PAC ret and BTI options in the Rust build flags to match the options that are used when building C. The Rust samples have been tested with this commit. Signed-off-by: Jamie Cunliffe Acked-by: Will Deacon Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20231020155056.3495121-3-Jamie.Cunliffe@arm.com --- Documentation/rust/arch-support.rst | 1 + arch/arm64/Kconfig | 1 + arch/arm64/Makefile | 4 ++++ rust/Makefile | 1 + scripts/generate_rust_target.rs | 4 +++- 5 files changed, 10 insertions(+), 1 deletion(-) diff --git a/Documentation/rust/arch-support.rst b/Documentation/rust/arch-support.rst index b91e9ef4d0c21e..6bcb3b97c5b610 100644 --- a/Documentation/rust/arch-support.rst +++ b/Documentation/rust/arch-support.rst @@ -15,6 +15,7 @@ support corresponds to ``S`` values in the ``MAINTAINERS`` file. ============ ================ ============================================== Architecture Level of support Constraints ============ ================ ============================================== +``arm64`` Maintained Little Endian only. ``um`` Maintained ``x86_64`` only. ``x86`` Maintained ``x86_64`` only. ============ ================ ============================================== diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 7b071a00425d2b..1aaa1d456d9d4b 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -227,6 +227,7 @@ config ARM64 select HAVE_FUNCTION_ARG_ACCESS_API select MMU_GATHER_RCU_TABLE_FREE select HAVE_RSEQ + select HAVE_RUST if CPU_LITTLE_ENDIAN select HAVE_STACKPROTECTOR select HAVE_SYSCALL_TRACEPOINTS select HAVE_KPROBES diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 9a2d3723cd0fa9..7af127552f5302 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -41,6 +41,8 @@ KBUILD_CFLAGS += -mgeneral-regs-only \ KBUILD_CFLAGS += $(call cc-disable-warning, psabi) KBUILD_AFLAGS += $(compat_vdso) +KBUILD_RUSTFLAGS += --target=aarch64-unknown-none -Ctarget-feature="-neon" + KBUILD_CFLAGS += $(call cc-option,-mabi=lp64) KBUILD_AFLAGS += $(call cc-option,-mabi=lp64) @@ -65,7 +67,9 @@ endif ifeq ($(CONFIG_ARM64_BTI_KERNEL),y) KBUILD_CFLAGS += -mbranch-protection=pac-ret+bti + KBUILD_RUSTFLAGS += -Zbranch-protection=bti,pac-ret else ifeq ($(CONFIG_ARM64_PTR_AUTH_KERNEL),y) + KBUILD_RUSTFLAGS += -Zbranch-protection=pac-ret ifeq ($(CONFIG_CC_HAS_BRANCH_PROT_PAC_RET),y) KBUILD_CFLAGS += -mbranch-protection=pac-ret else diff --git a/rust/Makefile b/rust/Makefile index 916a64c213d6b6..cb40a1ce133158 100644 --- a/rust/Makefile +++ b/rust/Makefile @@ -297,6 +297,7 @@ bindgen_skip_c_flags := -mno-fp-ret-in-387 -mpreferred-stack-boundary=% \ # Derived from `scripts/Makefile.clang`. BINDGEN_TARGET_x86 := x86_64-linux-gnu +BINDGEN_TARGET_arm64 := aarch64-linux-gnu BINDGEN_TARGET := $(BINDGEN_TARGET_$(SRCARCH)) # All warnings are inhibited since GCC builds are very experimental, diff --git a/scripts/generate_rust_target.rs b/scripts/generate_rust_target.rs index 3c6cbe2b278d30..ec5ef35dbe52db 100644 --- a/scripts/generate_rust_target.rs +++ b/scripts/generate_rust_target.rs @@ -148,7 +148,9 @@ fn main() { let mut ts = TargetSpec::new(); // `llvm-target`s are taken from `scripts/Makefile.clang`. - if cfg.has("X86_64") { + if cfg.has("ARM64") { + panic!("arm64 uses the builtin rustc aarch64-unknown-none target"); + } else if cfg.has("X86_64") { ts.push("arch", "x86_64"); ts.push( "data-layout", From f3bc6f92824e069a73d121d9d92dbef51e9ab4be Mon Sep 17 00:00:00 2001 From: Antonio Hickey Date: Tue, 2 Jan 2024 17:14:17 -0500 Subject: [PATCH 15/30] rust: task: add `as_raw()` to `Task` Added new function `Task::as_raw()` which returns the raw pointer for the underlying task struct. I also refactored `Task` to instead use the newly created function instead of `self.0.get()` as I feel like `self.as_raw()` is more intuitive. Signed-off-by: Antonio Hickey --- rust/kernel/task.rs | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/rust/kernel/task.rs b/rust/kernel/task.rs index d2f2615fe4a1b3..72737f5d86abdd 100644 --- a/rust/kernel/task.rs +++ b/rust/kernel/task.rs @@ -124,11 +124,16 @@ impl Task { } } + /// Returns a raw pointer to the underlying C task struct. + pub fn as_raw(&self) -> *mut bindings::task_struct { + self.0.get() + } + /// Returns the group leader of the given task. pub fn group_leader(&self) -> &Task { - // SAFETY: By the type invariant, we know that `self.0` is a valid task. Valid tasks always + // SAFETY: By the type invariant, we know that `self.as_raw()` is a valid task. Valid tasks always // have a valid group_leader. - let ptr = unsafe { *ptr::addr_of!((*self.0.get()).group_leader) }; + let ptr = unsafe { *ptr::addr_of!((*self.as_raw()).group_leader) }; // SAFETY: The lifetime of the returned task reference is tied to the lifetime of `self`, // and given that a task has a reference to its group leader, we know it must be valid for @@ -138,43 +143,43 @@ impl Task { /// Returns the PID of the given task. pub fn pid(&self) -> Pid { - // SAFETY: By the type invariant, we know that `self.0` is a valid task. Valid tasks always + // SAFETY: By the type invariant, we know that `self.as_raw()` is a valid task. Valid tasks always // have a valid pid. - unsafe { *ptr::addr_of!((*self.0.get()).pid) } + unsafe { *ptr::addr_of!((*self.as_raw()).pid) } } /// Returns the UID of the given task. pub fn uid(&self) -> Kuid { - // SAFETY: By the type invariant, we know that `self.0` is valid. - Kuid::from_raw(unsafe { bindings::task_uid(self.0.get()) }) + // SAFETY: By the type invariant, we know that `self.as_raw()` is valid. + Kuid::from_raw(unsafe { bindings::task_uid(self.as_raw()) }) } /// Returns the effective UID of the given task. pub fn euid(&self) -> Kuid { - // SAFETY: By the type invariant, we know that `self.0` is valid. - Kuid::from_raw(unsafe { bindings::task_euid(self.0.get()) }) + // SAFETY: By the type invariant, we know that `self.as_raw()` is valid. + Kuid::from_raw(unsafe { bindings::task_euid(self.as_raw()) }) } /// Determines whether the given task has pending signals. pub fn signal_pending(&self) -> bool { - // SAFETY: By the type invariant, we know that `self.0` is valid. - unsafe { bindings::signal_pending(self.0.get()) != 0 } + // SAFETY: By the type invariant, we know that `self.as_raw()` is valid. + unsafe { bindings::signal_pending(self.as_raw()) != 0 } } /// Returns the given task's pid in the current pid namespace. pub fn pid_in_current_ns(&self) -> Pid { // SAFETY: Calling `task_active_pid_ns` with the current task is always safe. let namespace = unsafe { bindings::task_active_pid_ns(bindings::get_current()) }; - // SAFETY: We know that `self.0.get()` is valid by the type invariant. - unsafe { bindings::task_tgid_nr_ns(self.0.get(), namespace) } + // SAFETY: We know that `self.raw()` is valid by the type invariant. + unsafe { bindings::task_tgid_nr_ns(self.as_raw(), namespace) } } /// Wakes up the task. pub fn wake_up(&self) { - // SAFETY: By the type invariant, we know that `self.0.get()` is non-null and valid. + // SAFETY: By the type invariant, we know that `self.raw()` is non-null and valid. // And `wake_up_process` is safe to be called for any valid task, even if the task is // running. - unsafe { bindings::wake_up_process(self.0.get()) }; + unsafe { bindings::wake_up_process(self.as_raw()) }; } } From b54bde522777347f877e1f35981acb58621fe5d8 Mon Sep 17 00:00:00 2001 From: Antonio Hickey Date: Thu, 4 Jan 2024 01:43:36 -0500 Subject: [PATCH 16/30] rust: task: use safe `current!` macro Refactor the `Task::pid_in_current_ns()` to use the safe abstraction `current!()` instead of the unsafe `bindings::get_current()` binding. Signed-off-by: Antonio Hickey --- rust/kernel/task.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rust/kernel/task.rs b/rust/kernel/task.rs index 72737f5d86abdd..ffe08c348b7263 100644 --- a/rust/kernel/task.rs +++ b/rust/kernel/task.rs @@ -168,8 +168,9 @@ impl Task { /// Returns the given task's pid in the current pid namespace. pub fn pid_in_current_ns(&self) -> Pid { + let current = current!(); // SAFETY: Calling `task_active_pid_ns` with the current task is always safe. - let namespace = unsafe { bindings::task_active_pid_ns(bindings::get_current()) }; + let namespace = unsafe { bindings::task_active_pid_ns(current.as_raw()) }; // SAFETY: We know that `self.raw()` is valid by the type invariant. unsafe { bindings::task_tgid_nr_ns(self.as_raw(), namespace) } } From 0e52e55e67fc543a3cca30c8ab9ee98aae419436 Mon Sep 17 00:00:00 2001 From: Benno Lossin Date: Wed, 13 Dec 2023 22:08:32 +0000 Subject: [PATCH 17/30] rust: macros: add `decl_generics` to `parse_generics()` The generic parameters on a type definition can specify default values. Currently `parse_generics()` cannot handle this though. For example when parsing the following generics: The `impl_generics` will be set to `T: Clone, const N: usize = 0` and `ty_generics` will be set to `T, N`. Now using the `impl_generics` on an impl block: impl<$($impl_generics)*> Foo {} will result in invalid Rust code, because default values are only available on type definitions. Therefore add parsing support for generic parameter default values using a new kind of generics called `decl_generics` and change the old behavior of `impl_generics` to not contain the generic parameter default values. Now `Generics` has three fields: - `impl_generics`: the generics with bounds (e.g. `T: Clone, const N: usize`) - `decl_generics`: the generics with bounds and default values (e.g. `T: Clone, const N: usize = 0`) - `ty_generics`: contains the generics without bounds and without default values (e.g. `T, N`) `impl_generics` is designed to be used on `impl<$impl_generics>`, `decl_generics` for the type definition, so `struct Foo<$decl_generics>` and `ty_generics` whenever you use the type, so `Foo<$ty_generics>`. Here is an example that uses all three different types of generics: let (Generics { decl_generics, impl_generics, ty_generics }, rest) = parse_generics(input); quote! { struct Foo<$($decl_generics)*> { // ... } impl<$impl_generics> Foo<$ty_generics> { fn foo() { // ... } } } The next commit contains a fix to the `#[pin_data]` macro making it compatible with generic parameter default values by relying on this new behavior. Signed-off-by: Benno Lossin Reviewed-by: Martin Rodriguez Reboredo Link: https://lore.kernel.org/r/20231213220447.3613500-1-benno.lossin@proton.me --- rust/macros/helpers.rs | 122 ++++++++++++++++++++++++++++++---------- rust/macros/pin_data.rs | 1 + rust/macros/zeroable.rs | 1 + 3 files changed, 94 insertions(+), 30 deletions(-) diff --git a/rust/macros/helpers.rs b/rust/macros/helpers.rs index afb0f2e3a36a9d..3f50a5c847c8cc 100644 --- a/rust/macros/helpers.rs +++ b/rust/macros/helpers.rs @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 -use proc_macro::{token_stream, Group, Punct, Spacing, TokenStream, TokenTree}; +use proc_macro::{token_stream, Group, TokenStream, TokenTree}; pub(crate) fn try_ident(it: &mut token_stream::IntoIter) -> Option { if let Some(TokenTree::Ident(ident)) = it.next() { @@ -70,8 +70,40 @@ pub(crate) fn expect_end(it: &mut token_stream::IntoIter) { } } +/// Parsed generics. +/// +/// See the field documentation for an explanation what each of the fields represents. +/// +/// # Examples +/// +/// ```rust,ignore +/// # let input = todo!(); +/// let (Generics { decl_generics, impl_generics, ty_generics }, rest) = parse_generics(input); +/// quote! { +/// struct Foo<$($decl_generics)*> { +/// // ... +/// } +/// +/// impl<$impl_generics> Foo<$ty_generics> { +/// fn foo() { +/// // ... +/// } +/// } +/// } +/// ``` pub(crate) struct Generics { + /// The generics with bounds and default values (e.g. `T: Clone, const N: usize = 0`). + /// + /// Use this on type definitions e.g. `struct Foo<$decl_generics> ...` (or `union`/`enum`). + pub(crate) decl_generics: Vec, + /// The generics with bounds (e.g. `T: Clone, const N: usize`). + /// + /// Use this on `impl` blocks e.g. `impl<$impl_generics> Trait for ...`. pub(crate) impl_generics: Vec, + /// The generics without bounds and without default values (e.g. `T, N`). + /// + /// Use this when you use the type that is declared with these generics e.g. + /// `Foo<$ty_generics>`. pub(crate) ty_generics: Vec, } @@ -81,6 +113,8 @@ pub(crate) struct Generics { pub(crate) fn parse_generics(input: TokenStream) -> (Generics, Vec) { // `impl_generics`, the declared generics with their bounds. let mut impl_generics = vec![]; + // The generics with bounds and default values. + let mut decl_generics = vec![]; // Only the names of the generics, without any bounds. let mut ty_generics = vec![]; // Tokens not related to the generics e.g. the `where` token and definition. @@ -90,10 +124,17 @@ pub(crate) fn parse_generics(input: TokenStream) -> (Generics, Vec) { let mut toks = input.into_iter(); // If we are at the beginning of a generic parameter. let mut at_start = true; - for tt in &mut toks { + let mut skip_until_comma = false; + while let Some(tt) = toks.next() { + if nesting == 1 && matches!(&tt, TokenTree::Punct(p) if p.as_char() == '>') { + // Found the end of the generics. + break; + } else if nesting >= 1 { + decl_generics.push(tt.clone()); + } match tt.clone() { TokenTree::Punct(p) if p.as_char() == '<' => { - if nesting >= 1 { + if nesting >= 1 && !skip_until_comma { // This is inside of the generics and part of some bound. impl_generics.push(tt); } @@ -105,49 +146,70 @@ pub(crate) fn parse_generics(input: TokenStream) -> (Generics, Vec) { break; } else { nesting -= 1; - if nesting >= 1 { + if nesting >= 1 && !skip_until_comma { // We are still inside of the generics and part of some bound. impl_generics.push(tt); } - if nesting == 0 { - break; - } } } - tt => { + TokenTree::Punct(p) if skip_until_comma && p.as_char() == ',' => { if nesting == 1 { - // Here depending on the token, it might be a generic variable name. - match &tt { - // Ignore const. - TokenTree::Ident(i) if i.to_string() == "const" => {} - TokenTree::Ident(_) if at_start => { - ty_generics.push(tt.clone()); - // We also already push the `,` token, this makes it easier to append - // generics. - ty_generics.push(TokenTree::Punct(Punct::new(',', Spacing::Alone))); - at_start = false; - } - TokenTree::Punct(p) if p.as_char() == ',' => at_start = true, - // Lifetimes begin with `'`. - TokenTree::Punct(p) if p.as_char() == '\'' && at_start => { - ty_generics.push(tt.clone()); - } - _ => {} - } + impl_generics.push(TokenTree::Punct(p.clone())); + ty_generics.push(TokenTree::Punct(p)); + skip_until_comma = false; } - if nesting >= 1 { - impl_generics.push(tt); - } else if nesting == 0 { + } + tt if !skip_until_comma => { + match nesting { // If we haven't entered the generics yet, we still want to keep these tokens. - rest.push(tt); + 0 => rest.push(tt), + 1 => { + // Here depending on the token, it might be a generic variable name. + match tt { + TokenTree::Ident(i) if at_start && i.to_string() == "const" => { + let Some(name) = toks.next() else { + // Parsing error. + break; + }; + impl_generics.push(TokenTree::Ident(i)); + impl_generics.push(name.clone()); + ty_generics.push(name.clone()); + decl_generics.push(name); + at_start = false; + } + tt @ TokenTree::Ident(_) if at_start => { + impl_generics.push(tt.clone()); + ty_generics.push(tt); + at_start = false; + } + TokenTree::Punct(p) if p.as_char() == ',' => { + impl_generics.push(TokenTree::Punct(p.clone())); + ty_generics.push(TokenTree::Punct(p)); + at_start = true; + } + // Lifetimes begin with `'`. + TokenTree::Punct(p) if p.as_char() == '\'' && at_start => { + ty_generics.push(TokenTree::Punct(p.clone())); + impl_generics.push(TokenTree::Punct(p)); + } + // Generics can have default values, we skip these. + TokenTree::Punct(p) if p.as_char() == '=' => { + skip_until_comma = true; + } + tt => impl_generics.push(tt), + } + } + _ => impl_generics.push(tt), } } + _ => {} } } rest.extend(toks); ( Generics { impl_generics, + decl_generics, ty_generics, }, rest, diff --git a/rust/macros/pin_data.rs b/rust/macros/pin_data.rs index 6d58cfda9872e6..022e68e9720d45 100644 --- a/rust/macros/pin_data.rs +++ b/rust/macros/pin_data.rs @@ -10,6 +10,7 @@ pub(crate) fn pin_data(args: TokenStream, input: TokenStream) -> TokenStream { let ( Generics { impl_generics, + decl_generics: _, ty_generics, }, rest, diff --git a/rust/macros/zeroable.rs b/rust/macros/zeroable.rs index 0d605c46ab3b40..cfee2cec18d5f5 100644 --- a/rust/macros/zeroable.rs +++ b/rust/macros/zeroable.rs @@ -7,6 +7,7 @@ pub(crate) fn derive(input: TokenStream) -> TokenStream { let ( Generics { impl_generics, + decl_generics: _, ty_generics, }, mut rest, From 9a1d84ea6bbcd5fc8ad78a9f374f7605f395db9a Mon Sep 17 00:00:00 2001 From: Benno Lossin Date: Wed, 13 Dec 2023 22:08:53 +0000 Subject: [PATCH 18/30] rust: macros: allow generic parameter default values in `#[pin_data]` Add support for generic parameters defaults in `#[pin_data]` by using the newly introduced `decl_generics` instead of the `impl_generics`. Before this would not compile: #[pin_data] struct Foo { // ... } because it would be expanded to this: struct Foo { // ... } const _: () = { struct __ThePinData { __phantom: ::core::marker::PhantomData) -> Foo>, } impl ::core::clone::Clone for __ThePinData { fn clone(&self) -> Self { *self } } // [...] rest of expansion omitted }; The problem is with the `impl`, since that is invalid Rust syntax. It should not mention the default value at all, since default values only make sense on type definitions. The new `impl_generics` do not contain the default values, thus generating correct Rust code. This is used by the next commit that puts `#[pin_data]` on `kernel::workqueue::Work`. Signed-off-by: Benno Lossin Reviewed-by: Martin Rodriguez Reboredo Reviewed-by: Gary Guo Link: https://lore.kernel.org/r/20231213220447.3613500-2-benno.lossin@proton.me --- rust/kernel/init/macros.rs | 19 ++++++++++++++++++- rust/macros/pin_data.rs | 3 ++- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/rust/kernel/init/macros.rs b/rust/kernel/init/macros.rs index cb6e61b6c50bda..624e9108e3b423 100644 --- a/rust/kernel/init/macros.rs +++ b/rust/kernel/init/macros.rs @@ -538,6 +538,7 @@ macro_rules! __pin_data { ), @impl_generics($($impl_generics:tt)*), @ty_generics($($ty_generics:tt)*), + @decl_generics($($decl_generics:tt)*), @body({ $($fields:tt)* }), ) => { // We now use token munching to iterate through all of the fields. While doing this we @@ -560,6 +561,9 @@ macro_rules! __pin_data { @impl_generics($($impl_generics)*), // The 'ty generics', the generics that will need to be specified on the impl blocks. @ty_generics($($ty_generics)*), + // The 'decl generics', the generics that need to be specified on the struct + // definition. + @decl_generics($($decl_generics)*), // The where clause of any impl block and the declaration. @where($($($whr)*)?), // The remaining fields tokens that need to be processed. @@ -585,6 +589,7 @@ macro_rules! __pin_data { @name($name:ident), @impl_generics($($impl_generics:tt)*), @ty_generics($($ty_generics:tt)*), + @decl_generics($($decl_generics:tt)*), @where($($whr:tt)*), // We found a PhantomPinned field, this should generally be pinned! @fields_munch($field:ident : $($($(::)?core::)?marker::)?PhantomPinned, $($rest:tt)*), @@ -607,6 +612,7 @@ macro_rules! __pin_data { @name($name), @impl_generics($($impl_generics)*), @ty_generics($($ty_generics)*), + @decl_generics($($decl_generics)*), @where($($whr)*), @fields_munch($($rest)*), @pinned($($pinned)* $($accum)* $field: ::core::marker::PhantomPinned,), @@ -623,6 +629,7 @@ macro_rules! __pin_data { @name($name:ident), @impl_generics($($impl_generics:tt)*), @ty_generics($($ty_generics:tt)*), + @decl_generics($($decl_generics:tt)*), @where($($whr:tt)*), // We reached the field declaration. @fields_munch($field:ident : $type:ty, $($rest:tt)*), @@ -640,6 +647,7 @@ macro_rules! __pin_data { @name($name), @impl_generics($($impl_generics)*), @ty_generics($($ty_generics)*), + @decl_generics($($decl_generics)*), @where($($whr)*), @fields_munch($($rest)*), @pinned($($pinned)* $($accum)* $field: $type,), @@ -656,6 +664,7 @@ macro_rules! __pin_data { @name($name:ident), @impl_generics($($impl_generics:tt)*), @ty_generics($($ty_generics:tt)*), + @decl_generics($($decl_generics:tt)*), @where($($whr:tt)*), // We reached the field declaration. @fields_munch($field:ident : $type:ty, $($rest:tt)*), @@ -673,6 +682,7 @@ macro_rules! __pin_data { @name($name), @impl_generics($($impl_generics)*), @ty_generics($($ty_generics)*), + @decl_generics($($decl_generics)*), @where($($whr)*), @fields_munch($($rest)*), @pinned($($pinned)*), @@ -689,6 +699,7 @@ macro_rules! __pin_data { @name($name:ident), @impl_generics($($impl_generics:tt)*), @ty_generics($($ty_generics:tt)*), + @decl_generics($($decl_generics:tt)*), @where($($whr:tt)*), // We found the `#[pin]` attr. @fields_munch(#[pin] $($rest:tt)*), @@ -705,6 +716,7 @@ macro_rules! __pin_data { @name($name), @impl_generics($($impl_generics)*), @ty_generics($($ty_generics)*), + @decl_generics($($decl_generics)*), @where($($whr)*), @fields_munch($($rest)*), // We do not include `#[pin]` in the list of attributes, since it is not actually an @@ -724,6 +736,7 @@ macro_rules! __pin_data { @name($name:ident), @impl_generics($($impl_generics:tt)*), @ty_generics($($ty_generics:tt)*), + @decl_generics($($decl_generics:tt)*), @where($($whr:tt)*), // We reached the field declaration with visibility, for simplicity we only munch the // visibility and put it into `$accum`. @@ -741,6 +754,7 @@ macro_rules! __pin_data { @name($name), @impl_generics($($impl_generics)*), @ty_generics($($ty_generics)*), + @decl_generics($($decl_generics)*), @where($($whr)*), @fields_munch($field $($rest)*), @pinned($($pinned)*), @@ -757,6 +771,7 @@ macro_rules! __pin_data { @name($name:ident), @impl_generics($($impl_generics:tt)*), @ty_generics($($ty_generics:tt)*), + @decl_generics($($decl_generics:tt)*), @where($($whr:tt)*), // Some other attribute, just put it into `$accum`. @fields_munch(#[$($attr:tt)*] $($rest:tt)*), @@ -773,6 +788,7 @@ macro_rules! __pin_data { @name($name), @impl_generics($($impl_generics)*), @ty_generics($($ty_generics)*), + @decl_generics($($decl_generics)*), @where($($whr)*), @fields_munch($($rest)*), @pinned($($pinned)*), @@ -789,6 +805,7 @@ macro_rules! __pin_data { @name($name:ident), @impl_generics($($impl_generics:tt)*), @ty_generics($($ty_generics:tt)*), + @decl_generics($($decl_generics:tt)*), @where($($whr:tt)*), // We reached the end of the fields, plus an optional additional comma, since we added one // before and the user is also allowed to put a trailing comma. @@ -802,7 +819,7 @@ macro_rules! __pin_data { ) => { // Declare the struct with all fields in the correct order. $($struct_attrs)* - $vis struct $name <$($impl_generics)*> + $vis struct $name <$($decl_generics)*> where $($whr)* { $($fields)* diff --git a/rust/macros/pin_data.rs b/rust/macros/pin_data.rs index 022e68e9720d45..1d4a3547c684b3 100644 --- a/rust/macros/pin_data.rs +++ b/rust/macros/pin_data.rs @@ -10,7 +10,7 @@ pub(crate) fn pin_data(args: TokenStream, input: TokenStream) -> TokenStream { let ( Generics { impl_generics, - decl_generics: _, + decl_generics, ty_generics, }, rest, @@ -77,6 +77,7 @@ pub(crate) fn pin_data(args: TokenStream, input: TokenStream) -> TokenStream { @sig(#(#rest)*), @impl_generics(#(#impl_generics)*), @ty_generics(#(#ty_generics)*), + @decl_generics(#(#decl_generics)*), @body(#last), }); quoted.extend(errs); From b01a973c6acb26c910490d300ab4a9bbaf893dbc Mon Sep 17 00:00:00 2001 From: Benno Lossin Date: Wed, 13 Dec 2023 22:09:04 +0000 Subject: [PATCH 19/30] rust: workqueue: add `#[pin_data]` to `Work` The previous two patches made it possible to add `#[pin_data]` on structs with default generic parameter values. This patch makes `Work` use `#[pin_data]` and removes an invocation of `pin_init_from_closure`. This function is intended as a low level manual escape hatch, so it is better to rely on the safe `pin_init!` macro. Signed-off-by: Benno Lossin Reviewed-by: Martin Rodriguez Reboredo Reviewed-by: Gary Guo Link: https://lore.kernel.org/r/20231213220447.3613500-3-benno.lossin@proton.me --- rust/kernel/workqueue.rs | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/rust/kernel/workqueue.rs b/rust/kernel/workqueue.rs index 49839787737671..7f394e682f3feb 100644 --- a/rust/kernel/workqueue.rs +++ b/rust/kernel/workqueue.rs @@ -334,8 +334,10 @@ pub trait WorkItem { /// Wraps the kernel's C `struct work_struct`. /// /// This is a helper type used to associate a `work_struct` with the [`WorkItem`] that uses it. +#[pin_data] #[repr(transparent)] pub struct Work { + #[pin] work: Opaque, _inner: PhantomData, } @@ -357,21 +359,22 @@ impl Work { where T: WorkItem, { - // SAFETY: The `WorkItemPointer` implementation promises that `run` can be used as the work - // item function. - unsafe { - kernel::init::pin_init_from_closure(move |slot| { - let slot = Self::raw_get(slot); - bindings::init_work_with_key( - slot, - Some(T::Pointer::run), - false, - name.as_char_ptr(), - key.as_ptr(), - ); - Ok(()) - }) - } + pin_init!(Self { + work <- Opaque::ffi_init(|slot| { + // SAFETY: The `WorkItemPointer` implementation promises that `run` can be used as + // the work item function. + unsafe { + bindings::init_work_with_key( + slot, + Some(T::Pointer::run), + false, + name.as_char_ptr(), + key.as_ptr(), + ) + } + }), + _inner: PhantomData, + }) } /// Get a pointer to the inner `work_struct`. From d02944d5fe527f51360a882f4951038acfad4a1d Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Mon, 8 Jan 2024 14:49:57 +0000 Subject: [PATCH 20/30] rust: sync: add `CondVar::notify_sync` Wake up another thread synchronously. This method behaves like `notify_one`, except that it hints to the scheduler that the current thread is about to go to sleep, so it should schedule the target thread on the same CPU. This is used by Rust Binder as a performance optimization. When sending a transaction to a different process, we usually know which thread will handle it, so we can schedule that thread for execution next on this CPU for better cache locality. Reviewed-by: Benno Lossin Reviewed-by: Martin Rodriguez Reboredo Reviewed-by: Tiago Lam Reviewed-by: Boqun Feng Signed-off-by: Alice Ryhl Link: https://lore.kernel.org/r/20240108-rb-new-condvar-methods-v4-1-88e0c871cc05@google.com --- rust/kernel/sync/condvar.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/rust/kernel/sync/condvar.rs b/rust/kernel/sync/condvar.rs index f65e19d5a37c15..b323a0c26513fa 100644 --- a/rust/kernel/sync/condvar.rs +++ b/rust/kernel/sync/condvar.rs @@ -155,6 +155,16 @@ impl CondVar { }; } + /// Calls the kernel function to notify one thread synchronously. + /// + /// This method behaves like `notify_one`, except that it hints to the scheduler that the + /// current thread is about to go to sleep, so it should schedule the target thread on the same + /// CPU. + pub fn notify_sync(&self) { + // SAFETY: `wait_list` points to valid memory. + unsafe { bindings::__wake_up_sync(self.wait_list.get(), bindings::TASK_NORMAL) }; + } + /// Wakes a single waiter up, if any. /// /// This is not 'sticky' in the sense that if no thread is waiting, the notification is lost From b13bfeb16e3838449a26c3a6e9d9fcfefa77419f Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Mon, 8 Jan 2024 14:49:58 +0000 Subject: [PATCH 21/30] rust: time: add msecs to jiffies conversion Defines type aliases and conversions for msecs and jiffies. This is used by Rust Binder for process freezing. There, we want to sleep until the freeze operation completes, but we want to be able to abort the process freezing if it doesn't complete within some timeout. The freeze timeout is supplied in msecs. Note that we need to convert to jiffies in Binder. It is not enough to introduce a variant of `CondVar::wait_timeout` that takes the timeout in msecs because we need to be able to restart the sleep with the remaining sleep duration if it is interrupted, and if the API takes msecs rather than jiffies, then that would require a conversion roundtrip jiffies-> msecs->jiffies that is best avoided. Suggested-by: Boqun Feng Reviewed-by: Boqun Feng Reviewed-by: Benno Lossin Reviewed-by: Martin Rodriguez Reboredo Reviewed-by: Tiago Lam Signed-off-by: Alice Ryhl Link: https://lore.kernel.org/r/20240108-rb-new-condvar-methods-v4-2-88e0c871cc05@google.com --- rust/bindings/bindings_helper.h | 1 + rust/kernel/lib.rs | 1 + rust/kernel/time.rs | 20 ++++++++++++++++++++ 3 files changed, 22 insertions(+) create mode 100644 rust/kernel/time.rs diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h index c0cb4b05b9185b..936651110c39a5 100644 --- a/rust/bindings/bindings_helper.h +++ b/rust/bindings/bindings_helper.h @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs index cb2d024db51feb..b89ecf4e97a072 100644 --- a/rust/kernel/lib.rs +++ b/rust/kernel/lib.rs @@ -48,6 +48,7 @@ pub mod std_vendor; pub mod str; pub mod sync; pub mod task; +pub mod time; pub mod types; pub mod workqueue; diff --git a/rust/kernel/time.rs b/rust/kernel/time.rs new file mode 100644 index 00000000000000..25a896eed4689f --- /dev/null +++ b/rust/kernel/time.rs @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Time related primitives. +//! +//! This module contains the kernel APIs related to time and timers that +//! have been ported or wrapped for usage by Rust code in the kernel. + +/// The time unit of Linux kernel. One jiffy equals (1/HZ) second. +pub type Jiffies = core::ffi::c_ulong; + +/// The millisecond time unit. +pub type Msecs = core::ffi::c_uint; + +/// Converts milliseconds to jiffies. +#[inline] +pub fn msecs_to_jiffies(msecs: Msecs) -> Jiffies { + // SAFETY: The `__msecs_to_jiffies` function is always safe to call no + // matter what the argument is. + unsafe { bindings::__msecs_to_jiffies(msecs) } +} From af3c12d7922513d1fd5457b8ae68fa7fac66ffbd Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Mon, 8 Jan 2024 14:49:59 +0000 Subject: [PATCH 22/30] rust: sync: add `CondVar::wait_timeout` Sleep on a condition variable with a timeout. This is used by Rust Binder for process freezing. There, we want to sleep until the freeze operation completes, but we want to be able to abort the process freezing if it doesn't complete within some timeout. Note that it is not enough to avoid jiffies by introducing a variant of `CondVar::wait_timeout` that takes the timeout in msecs because we need to be able to restart the sleep with the remaining sleep duration if it is interrupted, and if the API takes msecs rather than jiffies, then that would require a conversion roundtrip jiffies->msecs->jiffies that is best avoided. Reviewed-by: Martin Rodriguez Reboredo Reviewed-by: Tiago Lam Reviewed-by: Boqun Feng Signed-off-by: Alice Ryhl Link: https://lore.kernel.org/r/20240108-rb-new-condvar-methods-v4-3-88e0c871cc05@google.com --- rust/kernel/sync/condvar.rs | 60 +++++++++++++++++++++++++++++++++---- rust/kernel/sync/lock.rs | 4 +-- rust/kernel/task.rs | 5 +++- 3 files changed, 60 insertions(+), 9 deletions(-) diff --git a/rust/kernel/sync/condvar.rs b/rust/kernel/sync/condvar.rs index b323a0c26513fa..fa38c095cbe055 100644 --- a/rust/kernel/sync/condvar.rs +++ b/rust/kernel/sync/condvar.rs @@ -6,7 +6,11 @@ //! variable. use super::{lock::Backend, lock::Guard, LockClassKey}; -use crate::{bindings, init::PinInit, pin_init, str::CStr, types::Opaque}; +use crate::{ + bindings, init::PinInit, pin_init, str::CStr, task::MAX_SCHEDULE_TIMEOUT, time::Jiffies, + types::Opaque, +}; +use core::ffi::c_long; use core::marker::PhantomPinned; use macros::pin_data; @@ -102,7 +106,12 @@ impl CondVar { }) } - fn wait_internal(&self, wait_state: u32, guard: &mut Guard<'_, T, B>) { + fn wait_internal( + &self, + wait_state: u32, + guard: &mut Guard<'_, T, B>, + timeout_in_jiffies: c_long, + ) -> c_long { let wait = Opaque::::uninit(); // SAFETY: `wait` points to valid memory. @@ -113,11 +122,13 @@ impl CondVar { bindings::prepare_to_wait_exclusive(self.wait_list.get(), wait.get(), wait_state as _) }; - // SAFETY: No arguments, switches to another thread. - guard.do_unlocked(|| unsafe { bindings::schedule() }); + // SAFETY: Switches to another thread. The timeout can be any number. + let ret = guard.do_unlocked(|| unsafe { bindings::schedule_timeout(timeout_in_jiffies) }); // SAFETY: Both `wait` and `wait_list` point to valid memory. unsafe { bindings::finish_wait(self.wait_list.get(), wait.get()) }; + + ret } /// Releases the lock and waits for a notification in uninterruptible mode. @@ -127,7 +138,7 @@ impl CondVar { /// [`CondVar::notify_one`] or [`CondVar::notify_all`]. Note that it may also wake up /// spuriously. pub fn wait(&self, guard: &mut Guard<'_, T, B>) { - self.wait_internal(bindings::TASK_UNINTERRUPTIBLE, guard); + self.wait_internal(bindings::TASK_UNINTERRUPTIBLE, guard, MAX_SCHEDULE_TIMEOUT); } /// Releases the lock and waits for a notification in interruptible mode. @@ -138,10 +149,31 @@ impl CondVar { /// Returns whether there is a signal pending. #[must_use = "wait_interruptible returns if a signal is pending, so the caller must check the return value"] pub fn wait_interruptible(&self, guard: &mut Guard<'_, T, B>) -> bool { - self.wait_internal(bindings::TASK_INTERRUPTIBLE, guard); + self.wait_internal(bindings::TASK_INTERRUPTIBLE, guard, MAX_SCHEDULE_TIMEOUT); crate::current!().signal_pending() } + /// Releases the lock and waits for a notification in interruptible mode. + /// + /// Atomically releases the given lock (whose ownership is proven by the guard) and puts the + /// thread to sleep. It wakes up when notified by [`CondVar::notify_one`] or + /// [`CondVar::notify_all`], or when a timeout occurs, or when the thread receives a signal. + #[must_use = "wait_interruptible_timeout returns if a signal is pending, so the caller must check the return value"] + pub fn wait_interruptible_timeout( + &self, + guard: &mut Guard<'_, T, B>, + jiffies: Jiffies, + ) -> CondVarTimeoutResult { + let jiffies = jiffies.try_into().unwrap_or(MAX_SCHEDULE_TIMEOUT); + let res = self.wait_internal(bindings::TASK_INTERRUPTIBLE, guard, jiffies); + + match (res as Jiffies, crate::current!().signal_pending()) { + (jiffies, true) => CondVarTimeoutResult::Signal { jiffies }, + (0, false) => CondVarTimeoutResult::Timeout, + (jiffies, false) => CondVarTimeoutResult::Woken { jiffies }, + } + } + /// Calls the kernel function to notify the appropriate number of threads with the given flags. fn notify(&self, count: i32, flags: u32) { // SAFETY: `wait_list` points to valid memory. @@ -181,3 +213,19 @@ impl CondVar { self.notify(0, 0); } } + +/// The return type of `wait_timeout`. +pub enum CondVarTimeoutResult { + /// The timeout was reached. + Timeout, + /// Somebody woke us up. + Woken { + /// Remaining sleep duration. + jiffies: Jiffies, + }, + /// A signal occurred. + Signal { + /// Remaining sleep duration. + jiffies: Jiffies, + }, +} diff --git a/rust/kernel/sync/lock.rs b/rust/kernel/sync/lock.rs index f12a684bc95794..149a5259d431a8 100644 --- a/rust/kernel/sync/lock.rs +++ b/rust/kernel/sync/lock.rs @@ -139,7 +139,7 @@ pub struct Guard<'a, T: ?Sized, B: Backend> { unsafe impl Sync for Guard<'_, T, B> {} impl Guard<'_, T, B> { - pub(crate) fn do_unlocked(&mut self, cb: impl FnOnce()) { + pub(crate) fn do_unlocked(&mut self, cb: impl FnOnce() -> U) -> U { // SAFETY: The caller owns the lock, so it is safe to unlock it. unsafe { B::unlock(self.lock.state.get(), &self.state) }; @@ -147,7 +147,7 @@ impl Guard<'_, T, B> { let _relock = ScopeGuard::new(|| unsafe { B::relock(self.lock.state.get(), &mut self.state) }); - cb(); + cb() } } diff --git a/rust/kernel/task.rs b/rust/kernel/task.rs index 9451932d5d8674..17f14ebb8f488c 100644 --- a/rust/kernel/task.rs +++ b/rust/kernel/task.rs @@ -5,7 +5,10 @@ //! C header: [`include/linux/sched.h`](srctree/include/linux/sched.h). use crate::{bindings, types::Opaque}; -use core::{marker::PhantomData, ops::Deref, ptr}; +use core::{ffi::c_long, marker::PhantomData, ops::Deref, ptr}; + +/// A sentinal value used for infinite timeouts. +pub const MAX_SCHEDULE_TIMEOUT: c_long = c_long::MAX; /// Returns the currently running task. #[macro_export] From 80a1985ef1d0b4b8640deea30e21eb2201dd61d4 Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Mon, 8 Jan 2024 14:50:00 +0000 Subject: [PATCH 23/30] rust: sync: update integer types in CondVar Reduce the chances of compilation failures due to integer type mismatches in `CondVar`. When an integer is defined using a #define in C, bindgen doesn't know which integer type it is supposed to be, so it will just use `u32` by default (if it fits in an u32). Whenever the right type is something else, we insert a cast in Rust. However, this means that the code has a lot of extra casts, and sometimes the code will be missing casts if u32 happens to be correct on the developer's machine, even though the type might be something else on a different platform. This patch updates all uses of such constants in `rust/kernel/sync/condvar.rs` to use constants defined with the right type. This allows us to remove various unnecessary casts, while also future-proofing for the case where `unsigned int != u32`. I wrote this patch at the suggestion of Benno in [1]. Link: https://lore.kernel.org/all/nAEg-6vbtX72ZY3oirDhrSEf06TBWmMiTt73EklMzEAzN4FD4mF3TPEyAOxBZgZtjzoiaBYtYr3s8sa9wp1uYH9vEWRf2M-Lf4I0BY9rAgk=@proton.me/ [1] Suggested-by: Benno Lossin Reviewed-by: Tiago Lam Reviewed-by: Boqun Feng Reviewed-by: Benno Lossin Reviewed-by: Martin Rodriguez Reboredo Signed-off-by: Alice Ryhl Link: https://lore.kernel.org/r/20240108-rb-new-condvar-methods-v4-4-88e0c871cc05@google.com --- rust/kernel/sync/condvar.rs | 39 ++++++++++++++++++------------------- rust/kernel/task.rs | 15 +++++++++++++- 2 files changed, 33 insertions(+), 21 deletions(-) diff --git a/rust/kernel/sync/condvar.rs b/rust/kernel/sync/condvar.rs index fa38c095cbe055..7f2b78e4abc71a 100644 --- a/rust/kernel/sync/condvar.rs +++ b/rust/kernel/sync/condvar.rs @@ -7,11 +7,17 @@ use super::{lock::Backend, lock::Guard, LockClassKey}; use crate::{ - bindings, init::PinInit, pin_init, str::CStr, task::MAX_SCHEDULE_TIMEOUT, time::Jiffies, + bindings, + init::PinInit, + pin_init, + str::CStr, + task::{MAX_SCHEDULE_TIMEOUT, TASK_INTERRUPTIBLE, TASK_NORMAL, TASK_UNINTERRUPTIBLE}, + time::Jiffies, types::Opaque, }; -use core::ffi::c_long; +use core::ffi::{c_int, c_long}; use core::marker::PhantomPinned; +use core::ptr; use macros::pin_data; /// Creates a [`CondVar`] initialiser with the given name and a newly-created lock class. @@ -108,7 +114,7 @@ impl CondVar { fn wait_internal( &self, - wait_state: u32, + wait_state: c_int, guard: &mut Guard<'_, T, B>, timeout_in_jiffies: c_long, ) -> c_long { @@ -119,7 +125,7 @@ impl CondVar { // SAFETY: Both `wait` and `wait_list` point to valid memory. unsafe { - bindings::prepare_to_wait_exclusive(self.wait_list.get(), wait.get(), wait_state as _) + bindings::prepare_to_wait_exclusive(self.wait_list.get(), wait.get(), wait_state) }; // SAFETY: Switches to another thread. The timeout can be any number. @@ -138,7 +144,7 @@ impl CondVar { /// [`CondVar::notify_one`] or [`CondVar::notify_all`]. Note that it may also wake up /// spuriously. pub fn wait(&self, guard: &mut Guard<'_, T, B>) { - self.wait_internal(bindings::TASK_UNINTERRUPTIBLE, guard, MAX_SCHEDULE_TIMEOUT); + self.wait_internal(TASK_UNINTERRUPTIBLE, guard, MAX_SCHEDULE_TIMEOUT); } /// Releases the lock and waits for a notification in interruptible mode. @@ -149,7 +155,7 @@ impl CondVar { /// Returns whether there is a signal pending. #[must_use = "wait_interruptible returns if a signal is pending, so the caller must check the return value"] pub fn wait_interruptible(&self, guard: &mut Guard<'_, T, B>) -> bool { - self.wait_internal(bindings::TASK_INTERRUPTIBLE, guard, MAX_SCHEDULE_TIMEOUT); + self.wait_internal(TASK_INTERRUPTIBLE, guard, MAX_SCHEDULE_TIMEOUT); crate::current!().signal_pending() } @@ -165,7 +171,7 @@ impl CondVar { jiffies: Jiffies, ) -> CondVarTimeoutResult { let jiffies = jiffies.try_into().unwrap_or(MAX_SCHEDULE_TIMEOUT); - let res = self.wait_internal(bindings::TASK_INTERRUPTIBLE, guard, jiffies); + let res = self.wait_internal(TASK_INTERRUPTIBLE, guard, jiffies); match (res as Jiffies, crate::current!().signal_pending()) { (jiffies, true) => CondVarTimeoutResult::Signal { jiffies }, @@ -174,17 +180,10 @@ impl CondVar { } } - /// Calls the kernel function to notify the appropriate number of threads with the given flags. - fn notify(&self, count: i32, flags: u32) { + /// Calls the kernel function to notify the appropriate number of threads. + fn notify(&self, count: c_int) { // SAFETY: `wait_list` points to valid memory. - unsafe { - bindings::__wake_up( - self.wait_list.get(), - bindings::TASK_NORMAL, - count, - flags as _, - ) - }; + unsafe { bindings::__wake_up(self.wait_list.get(), TASK_NORMAL, count, ptr::null_mut()) }; } /// Calls the kernel function to notify one thread synchronously. @@ -194,7 +193,7 @@ impl CondVar { /// CPU. pub fn notify_sync(&self) { // SAFETY: `wait_list` points to valid memory. - unsafe { bindings::__wake_up_sync(self.wait_list.get(), bindings::TASK_NORMAL) }; + unsafe { bindings::__wake_up_sync(self.wait_list.get(), TASK_NORMAL) }; } /// Wakes a single waiter up, if any. @@ -202,7 +201,7 @@ impl CondVar { /// This is not 'sticky' in the sense that if no thread is waiting, the notification is lost /// completely (as opposed to automatically waking up the next waiter). pub fn notify_one(&self) { - self.notify(1, 0); + self.notify(1); } /// Wakes all waiters up, if any. @@ -210,7 +209,7 @@ impl CondVar { /// This is not 'sticky' in the sense that if no thread is waiting, the notification is lost /// completely (as opposed to automatically waking up the next waiter). pub fn notify_all(&self) { - self.notify(0, 0); + self.notify(0); } } diff --git a/rust/kernel/task.rs b/rust/kernel/task.rs index 17f14ebb8f488c..6072b0de4a3e5a 100644 --- a/rust/kernel/task.rs +++ b/rust/kernel/task.rs @@ -5,11 +5,24 @@ //! C header: [`include/linux/sched.h`](srctree/include/linux/sched.h). use crate::{bindings, types::Opaque}; -use core::{ffi::c_long, marker::PhantomData, ops::Deref, ptr}; +use core::{ + ffi::{c_int, c_long, c_uint}, + marker::PhantomData, + ops::Deref, + ptr, +}; /// A sentinal value used for infinite timeouts. pub const MAX_SCHEDULE_TIMEOUT: c_long = c_long::MAX; +/// Bitmask for tasks that are sleeping in an interruptible state. +pub const TASK_INTERRUPTIBLE: c_int = bindings::TASK_INTERRUPTIBLE as c_int; +/// Bitmask for tasks that are sleeping in an uninterruptible state. +pub const TASK_UNINTERRUPTIBLE: c_int = bindings::TASK_UNINTERRUPTIBLE as c_int; +/// Convenience constant for waking up tasks regardless of whether they are in interruptible or +/// uninterruptible sleep. +pub const TASK_NORMAL: c_uint = bindings::TASK_NORMAL as c_uint; + /// Returns the currently running task. #[macro_export] macro_rules! current { From 5f46355104c4dee4f56017183c1914b25919b96c Mon Sep 17 00:00:00 2001 From: Wedson Almeida Filho Date: Wed, 6 Dec 2023 11:59:46 +0000 Subject: [PATCH 24/30] rust: file: add Rust abstraction for `struct file` This abstraction makes it possible to manipulate the open files for a process. The new `File` struct wraps the C `struct file`. When accessing it using the smart pointer `ARef`, the pointer will own a reference count to the file. When accessing it as `&File`, then the reference does not own a refcount, but the borrow checker will ensure that the reference count does not hit zero while the `&File` is live. Since this is intended to manipulate the open files of a process, we introduce a `from_fd` constructor that corresponds to the C `fget` method. In future patches, it will become possible to create a new fd in a process and bind it to a `File`. Rust Binder will use these to send fds from one process to another. We also provide a method for accessing the file's flags. Rust Binder will use this to access the flags of the Binder fd to check whether the non-blocking flag is set, which affects what the Binder ioctl does. This introduces a struct for the EBADF error type, rather than just using the Error type directly. This has two advantages: * `File::from_fd` returns a `Result, BadFdError>`, which the compiler will represent as a single pointer, with null being an error. This is possible because the compiler understands that `BadFdError` has only one possible value, and it also understands that the `ARef` smart pointer is guaranteed non-null. * Additionally, we promise to users of the method that the method can only fail with EBADF, which means that they can rely on this promise without having to inspect its implementation. That said, there are also two disadvantages: * Defining additional error types involves boilerplate. * The question mark operator will only utilize the `From` trait once, which prevents you from using the question mark operator on `BadFdError` in methods that return some third error type that the kernel `Error` is convertible into. (However, it works fine in methods that return `Error`.) Signed-off-by: Wedson Almeida Filho Co-developed-by: Daniel Xu Signed-off-by: Daniel Xu Co-developed-by: Alice Ryhl Signed-off-by: Alice Ryhl Link: https://lore.kernel.org/r/20231206-alice-file-v2-1-af617c0d9d94@google.com [boqun: Resolve conflicts against net-next] --- rust/bindings/bindings_helper.h | 2 + rust/helpers.c | 7 ++ rust/kernel/file.rs | 196 ++++++++++++++++++++++++++++++++ rust/kernel/lib.rs | 1 + 4 files changed, 206 insertions(+) create mode 100644 rust/kernel/file.rs diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h index 936651110c39a5..41fcd2905ed470 100644 --- a/rust/bindings/bindings_helper.h +++ b/rust/bindings/bindings_helper.h @@ -9,6 +9,8 @@ #include #include #include +#include +#include #include #include #include diff --git a/rust/helpers.c b/rust/helpers.c index 70e59efd92bc43..03141a3608a45a 100644 --- a/rust/helpers.c +++ b/rust/helpers.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -157,6 +158,12 @@ void rust_helper_init_work_with_key(struct work_struct *work, work_func_t func, } EXPORT_SYMBOL_GPL(rust_helper_init_work_with_key); +struct file *rust_helper_get_file(struct file *f) +{ + return get_file(f); +} +EXPORT_SYMBOL_GPL(rust_helper_get_file); + /* * `bindgen` binds the C `size_t` type as the Rust `usize` type, so we can * use it in contexts where Rust expects a `usize` like slice (array) indices. diff --git a/rust/kernel/file.rs b/rust/kernel/file.rs new file mode 100644 index 00000000000000..29e1aacacd0662 --- /dev/null +++ b/rust/kernel/file.rs @@ -0,0 +1,196 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Files and file descriptors. +//! +//! C headers: [`include/linux/fs.h`](../../../../include/linux/fs.h) and +//! [`include/linux/file.h`](../../../../include/linux/file.h) + +use crate::{ + bindings, + error::{code::*, Error, Result}, + types::{ARef, AlwaysRefCounted, Opaque}, +}; +use core::ptr; + +/// Flags associated with a [`File`]. +pub mod flags { + /// File is opened in append mode. + pub const O_APPEND: u32 = bindings::O_APPEND; + + /// Signal-driven I/O is enabled. + pub const O_ASYNC: u32 = bindings::FASYNC; + + /// Close-on-exec flag is set. + pub const O_CLOEXEC: u32 = bindings::O_CLOEXEC; + + /// File was created if it didn't already exist. + pub const O_CREAT: u32 = bindings::O_CREAT; + + /// Direct I/O is enabled for this file. + pub const O_DIRECT: u32 = bindings::O_DIRECT; + + /// File must be a directory. + pub const O_DIRECTORY: u32 = bindings::O_DIRECTORY; + + /// Like [`O_SYNC`] except metadata is not synced. + pub const O_DSYNC: u32 = bindings::O_DSYNC; + + /// Ensure that this file is created with the `open(2)` call. + pub const O_EXCL: u32 = bindings::O_EXCL; + + /// Large file size enabled (`off64_t` over `off_t`). + pub const O_LARGEFILE: u32 = bindings::O_LARGEFILE; + + /// Do not update the file last access time. + pub const O_NOATIME: u32 = bindings::O_NOATIME; + + /// File should not be used as process's controlling terminal. + pub const O_NOCTTY: u32 = bindings::O_NOCTTY; + + /// If basename of path is a symbolic link, fail open. + pub const O_NOFOLLOW: u32 = bindings::O_NOFOLLOW; + + /// File is using nonblocking I/O. + pub const O_NONBLOCK: u32 = bindings::O_NONBLOCK; + + /// Also known as `O_NDELAY`. + /// + /// This is effectively the same flag as [`O_NONBLOCK`] on all architectures + /// except SPARC64. + pub const O_NDELAY: u32 = bindings::O_NDELAY; + + /// Used to obtain a path file descriptor. + pub const O_PATH: u32 = bindings::O_PATH; + + /// Write operations on this file will flush data and metadata. + pub const O_SYNC: u32 = bindings::O_SYNC; + + /// This file is an unnamed temporary regular file. + pub const O_TMPFILE: u32 = bindings::O_TMPFILE; + + /// File should be truncated to length 0. + pub const O_TRUNC: u32 = bindings::O_TRUNC; + + /// Bitmask for access mode flags. + /// + /// # Examples + /// + /// ``` + /// use kernel::file; + /// # fn do_something() {} + /// # let flags = 0; + /// if (flags & file::flags::O_ACCMODE) == file::flags::O_RDONLY { + /// do_something(); + /// } + /// ``` + pub const O_ACCMODE: u32 = bindings::O_ACCMODE; + + /// File is read only. + pub const O_RDONLY: u32 = bindings::O_RDONLY; + + /// File is write only. + pub const O_WRONLY: u32 = bindings::O_WRONLY; + + /// File can be both read and written. + pub const O_RDWR: u32 = bindings::O_RDWR; +} + +/// Wraps the kernel's `struct file`. +/// +/// # Invariants +/// +/// Instances of this type are always ref-counted, that is, a call to `get_file` ensures that the +/// allocation remains valid at least until the matching call to `fput`. +#[repr(transparent)] +pub struct File(Opaque); + +// SAFETY: By design, the only way to access a `File` is via an immutable reference or an `ARef`. +// This means that the only situation in which a `File` can be accessed mutably is when the +// refcount drops to zero and the destructor runs. It is safe for that to happen on any thread, so +// it is ok for this type to be `Send`. +unsafe impl Send for File {} + +// SAFETY: All methods defined on `File` that take `&self` are safe to call even if other threads +// are concurrently accessing the same `struct file`, because those methods either access immutable +// properties or have proper synchronization to ensure that such accesses are safe. +unsafe impl Sync for File {} + +impl File { + /// Constructs a new `struct file` wrapper from a file descriptor. + /// + /// The file descriptor belongs to the current process. + pub fn fget(fd: u32) -> Result, BadFdError> { + // SAFETY: FFI call, there are no requirements on `fd`. + let ptr = ptr::NonNull::new(unsafe { bindings::fget(fd) }).ok_or(BadFdError)?; + + // SAFETY: `fget` either returns null or a valid pointer to a file, and we checked for null + // above. + // + // INVARIANT: `fget` increments the refcount before returning. + Ok(unsafe { ARef::from_raw(ptr.cast()) }) + } + + /// Creates a reference to a [`File`] from a valid pointer. + /// + /// # Safety + /// + /// The caller must ensure that `ptr` points at a valid file and that its refcount does not + /// reach zero during the lifetime 'a. + pub unsafe fn from_ptr<'a>(ptr: *const bindings::file) -> &'a File { + // SAFETY: The caller guarantees that the pointer is not dangling and stays valid for the + // duration of 'a. The cast is okay because `File` is `repr(transparent)`. + // + // INVARIANT: The safety requirements guarantee that the refcount does not hit zero during + // 'a. + unsafe { &*ptr.cast() } + } + + /// Returns a raw pointer to the inner C struct. + #[inline] + pub fn as_ptr(&self) -> *mut bindings::file { + self.0.get() + } + + /// Returns the flags associated with the file. + /// + /// The flags are a combination of the constants in [`flags`]. + pub fn flags(&self) -> u32 { + // This `read_volatile` is intended to correspond to a READ_ONCE call. + // + // SAFETY: The file is valid because the shared reference guarantees a nonzero refcount. + // + // TODO: Replace with `read_once` when available on the Rust side. + unsafe { core::ptr::addr_of!((*self.as_ptr()).f_flags).read_volatile() } + } +} + +// SAFETY: The type invariants guarantee that `File` is always ref-counted. +unsafe impl AlwaysRefCounted for File { + fn inc_ref(&self) { + // SAFETY: The existence of a shared reference means that the refcount is nonzero. + unsafe { bindings::get_file(self.as_ptr()) }; + } + + unsafe fn dec_ref(obj: ptr::NonNull) { + // SAFETY: The safety requirements guarantee that the refcount is nonzero. + unsafe { bindings::fput(obj.cast().as_ptr()) } + } +} + +/// Represents the `EBADF` error code. +/// +/// Used for methods that can only fail with `EBADF`. +#[derive(Copy, Clone, Eq, PartialEq)] +pub struct BadFdError; + +impl From for Error { + fn from(_: BadFdError) -> Error { + EBADF + } +} + +impl core::fmt::Debug for BadFdError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.pad("EBADF") + } +} diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs index b89ecf4e97a072..9353dd713a2087 100644 --- a/rust/kernel/lib.rs +++ b/rust/kernel/lib.rs @@ -34,6 +34,7 @@ extern crate self as kernel; mod allocator; mod build_assert; pub mod error; +pub mod file; pub mod init; pub mod ioctl; #[cfg(CONFIG_KUNIT)] From 87c2ee29c006713aa50108b883e61c477061ad92 Mon Sep 17 00:00:00 2001 From: Wedson Almeida Filho Date: Wed, 6 Dec 2023 11:59:47 +0000 Subject: [PATCH 25/30] rust: cred: add Rust abstraction for `struct cred` Add a wrapper around `struct cred` called `Credential`, and provide functionality to get the `Credential` associated with a `File`. Rust Binder must check the credentials of processes when they attempt to perform various operations, and these checks usually take a `&Credential` as parameter. The security_binder_set_context_mgr function would be one example. This patch is necessary to access these security_* methods from Rust. Signed-off-by: Wedson Almeida Filho Co-developed-by: Alice Ryhl Signed-off-by: Alice Ryhl Link: https://lore.kernel.org/r/20231206-alice-file-v2-2-af617c0d9d94@google.com --- rust/bindings/bindings_helper.h | 1 + rust/helpers.c | 13 +++++++ rust/kernel/cred.rs | 64 +++++++++++++++++++++++++++++++++ rust/kernel/file.rs | 16 +++++++++ rust/kernel/lib.rs | 1 + 5 files changed, 95 insertions(+) create mode 100644 rust/kernel/cred.rs diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h index 41fcd2905ed470..84a56e8b6b6741 100644 --- a/rust/bindings/bindings_helper.h +++ b/rust/bindings/bindings_helper.h @@ -7,6 +7,7 @@ */ #include +#include #include #include #include diff --git a/rust/helpers.c b/rust/helpers.c index 03141a3608a45a..10ed69f76424da 100644 --- a/rust/helpers.c +++ b/rust/helpers.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -164,6 +165,18 @@ struct file *rust_helper_get_file(struct file *f) } EXPORT_SYMBOL_GPL(rust_helper_get_file); +const struct cred *rust_helper_get_cred(const struct cred *cred) +{ + return get_cred(cred); +} +EXPORT_SYMBOL_GPL(rust_helper_get_cred); + +void rust_helper_put_cred(const struct cred *cred) +{ + put_cred(cred); +} +EXPORT_SYMBOL_GPL(rust_helper_put_cred); + /* * `bindgen` binds the C `size_t` type as the Rust `usize` type, so we can * use it in contexts where Rust expects a `usize` like slice (array) indices. diff --git a/rust/kernel/cred.rs b/rust/kernel/cred.rs new file mode 100644 index 00000000000000..497058ec89bbf8 --- /dev/null +++ b/rust/kernel/cred.rs @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Credentials management. +//! +//! C header: [`include/linux/cred.h`](../../../../include/linux/cred.h) +//! +//! Reference: + +use crate::{ + bindings, + types::{AlwaysRefCounted, Opaque}, +}; + +/// Wraps the kernel's `struct cred`. +/// +/// # Invariants +/// +/// Instances of this type are always ref-counted, that is, a call to `get_cred` ensures that the +/// allocation remains valid at least until the matching call to `put_cred`. +#[repr(transparent)] +pub struct Credential(pub(crate) Opaque); + +// SAFETY: By design, the only way to access a `Credential` is via an immutable reference or an +// `ARef`. This means that the only situation in which a `Credential` can be accessed mutably is +// when the refcount drops to zero and the destructor runs. It is safe for that to happen on any +// thread, so it is ok for this type to be `Send`. +unsafe impl Send for Credential {} + +// SAFETY: It's OK to access `Credential` through shared references from other threads because +// we're either accessing properties that don't change or that are properly synchronised by C code. +unsafe impl Sync for Credential {} + +impl Credential { + /// Creates a reference to a [`Credential`] from a valid pointer. + /// + /// # Safety + /// + /// The caller must ensure that `ptr` is valid and remains valid for the lifetime of the + /// returned [`Credential`] reference. + pub unsafe fn from_ptr<'a>(ptr: *const bindings::cred) -> &'a Credential { + // SAFETY: The safety requirements guarantee the validity of the dereference, while the + // `Credential` type being transparent makes the cast ok. + unsafe { &*ptr.cast() } + } + + /// Returns the effective UID of the given credential. + pub fn euid(&self) -> bindings::kuid_t { + // SAFETY: By the type invariant, we know that `self.0` is valid. + unsafe { (*self.0.get()).euid } + } +} + +// SAFETY: The type invariants guarantee that `Credential` is always ref-counted. +unsafe impl AlwaysRefCounted for Credential { + fn inc_ref(&self) { + // SAFETY: The existence of a shared reference means that the refcount is nonzero. + unsafe { bindings::get_cred(self.0.get()) }; + } + + unsafe fn dec_ref(obj: core::ptr::NonNull) { + // SAFETY: The safety requirements guarantee that the refcount is nonzero. + unsafe { bindings::put_cred(obj.cast().as_ptr()) }; + } +} diff --git a/rust/kernel/file.rs b/rust/kernel/file.rs index 29e1aacacd0662..a88140794a8deb 100644 --- a/rust/kernel/file.rs +++ b/rust/kernel/file.rs @@ -7,6 +7,7 @@ use crate::{ bindings, + cred::Credential, error::{code::*, Error, Result}, types::{ARef, AlwaysRefCounted, Opaque}, }; @@ -151,6 +152,21 @@ impl File { self.0.get() } + /// Returns the credentials of the task that originally opened the file. + pub fn cred(&self) -> &Credential { + // SAFETY: Since the caller holds a reference to the file, it is guaranteed that its + // refcount does not hit zero during this function call. + // + // It's okay to read the `f_cred` field without synchronization as `f_cred` is never + // changed after initialization of the file. + let ptr = unsafe { (*self.as_ptr()).f_cred }; + + // SAFETY: The signature of this function ensures that the caller will only access the + // returned credential while the file is still valid, and the C side ensures that the + // credential stays valid at least as long as the file. + unsafe { Credential::from_ptr(ptr) } + } + /// Returns the flags associated with the file. /// /// The flags are a combination of the constants in [`flags`]. diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs index 9353dd713a2087..f65e5978f80749 100644 --- a/rust/kernel/lib.rs +++ b/rust/kernel/lib.rs @@ -33,6 +33,7 @@ extern crate self as kernel; #[cfg(not(testlib))] mod allocator; mod build_assert; +pub mod cred; pub mod error; pub mod file; pub mod init; From 31608a9dc4f017b0dd60bea104851b09ea44e2d3 Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Wed, 6 Dec 2023 11:59:48 +0000 Subject: [PATCH 26/30] rust: security: add abstraction for secctx Adds an abstraction for viewing the string representation of a security context. This is needed by Rust Binder because it has feature where a process can view the string representation of the security context for incoming transactions. The process can use that to authenticate incoming transactions, and since the feature is provided by the kernel, the process can trust that the security context is legitimate. Signed-off-by: Alice Ryhl Link: https://lore.kernel.org/r/20231206-alice-file-v2-3-af617c0d9d94@google.com --- rust/bindings/bindings_helper.h | 1 + rust/helpers.c | 21 +++++++++ rust/kernel/cred.rs | 8 ++++ rust/kernel/lib.rs | 1 + rust/kernel/security.rs | 79 +++++++++++++++++++++++++++++++++ 5 files changed, 110 insertions(+) create mode 100644 rust/kernel/security.rs diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h index 84a56e8b6b6741..5ca497d786f0f0 100644 --- a/rust/bindings/bindings_helper.h +++ b/rust/bindings/bindings_helper.h @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include diff --git a/rust/helpers.c b/rust/helpers.c index 10ed69f76424da..fd633d9db79a38 100644 --- a/rust/helpers.c +++ b/rust/helpers.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -177,6 +178,26 @@ void rust_helper_put_cred(const struct cred *cred) } EXPORT_SYMBOL_GPL(rust_helper_put_cred); +#ifndef CONFIG_SECURITY +void rust_helper_security_cred_getsecid(const struct cred *c, u32 *secid) +{ + security_cred_getsecid(c, secid); +} +EXPORT_SYMBOL_GPL(rust_helper_security_cred_getsecid); + +int rust_helper_security_secid_to_secctx(u32 secid, char **secdata, u32 *seclen) +{ + return security_secid_to_secctx(secid, secdata, seclen); +} +EXPORT_SYMBOL_GPL(rust_helper_security_secid_to_secctx); + +void rust_helper_security_release_secctx(char *secdata, u32 seclen) +{ + security_release_secctx(secdata, seclen); +} +EXPORT_SYMBOL_GPL(rust_helper_security_release_secctx); +#endif + /* * `bindgen` binds the C `size_t` type as the Rust `usize` type, so we can * use it in contexts where Rust expects a `usize` like slice (array) indices. diff --git a/rust/kernel/cred.rs b/rust/kernel/cred.rs index 497058ec89bbf8..3794937b52947d 100644 --- a/rust/kernel/cred.rs +++ b/rust/kernel/cred.rs @@ -43,6 +43,14 @@ impl Credential { unsafe { &*ptr.cast() } } + /// Get the id for this security context. + pub fn get_secid(&self) -> u32 { + let mut secid = 0; + // SAFETY: The invariants of this type ensures that the pointer is valid. + unsafe { bindings::security_cred_getsecid(self.0.get(), &mut secid) }; + secid + } + /// Returns the effective UID of the given credential. pub fn euid(&self) -> bindings::kuid_t { // SAFETY: By the type invariant, we know that `self.0` is valid. diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs index f65e5978f80749..d55d065642f059 100644 --- a/rust/kernel/lib.rs +++ b/rust/kernel/lib.rs @@ -44,6 +44,7 @@ pub mod kunit; pub mod net; pub mod prelude; pub mod print; +pub mod security; mod static_assert; #[doc(hidden)] pub mod std_vendor; diff --git a/rust/kernel/security.rs b/rust/kernel/security.rs new file mode 100644 index 00000000000000..6545bfa2fd72e9 --- /dev/null +++ b/rust/kernel/security.rs @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Linux Security Modules (LSM). +//! +//! C header: [`include/linux/security.h`](../../../../include/linux/security.h). + +use crate::{ + bindings, + error::{to_result, Result}, +}; + +/// A security context string. +/// +/// # Invariants +/// +/// The `secdata` and `seclen` fields correspond to a valid security context as returned by a +/// successful call to `security_secid_to_secctx`, that has not yet been destroyed by calling +/// `security_release_secctx`. +pub struct SecurityCtx { + secdata: *mut core::ffi::c_char, + seclen: usize, +} + +impl SecurityCtx { + /// Get the security context given its id. + pub fn from_secid(secid: u32) -> Result { + let mut secdata = core::ptr::null_mut(); + let mut seclen = 0u32; + // SAFETY: Just a C FFI call. The pointers are valid for writes. + unsafe { + to_result(bindings::security_secid_to_secctx( + secid, + &mut secdata, + &mut seclen, + ))?; + } + + // INVARIANT: If the above call did not fail, then we have a valid security context. + Ok(Self { + secdata, + seclen: seclen as usize, + }) + } + + /// Returns whether the security context is empty. + pub fn is_empty(&self) -> bool { + self.seclen == 0 + } + + /// Returns the length of this security context. + pub fn len(&self) -> usize { + self.seclen + } + + /// Returns the bytes for this security context. + pub fn as_bytes(&self) -> &[u8] { + let ptr = self.secdata; + if ptr.is_null() { + // We can't pass a null pointer to `slice::from_raw_parts` even if the length is zero. + debug_assert_eq!(self.seclen, 0); + return &[]; + } + + // SAFETY: The call to `security_secid_to_secctx` guarantees that the pointer is valid for + // `seclen` bytes. Furthermore, if the length is zero, then we have ensured that the + // pointer is not null. + unsafe { core::slice::from_raw_parts(ptr.cast(), self.seclen) } + } +} + +impl Drop for SecurityCtx { + fn drop(&mut self) { + // SAFETY: This frees a pointer that came from a successful call to + // `security_secid_to_secctx` and has not yet been destroyed by `security_release_secctx`. + unsafe { + bindings::security_release_secctx(self.secdata, self.seclen as u32); + } + } +} From fd6acd67d441ca6acb8833d01de1dd6cc1bc548d Mon Sep 17 00:00:00 2001 From: Wedson Almeida Filho Date: Wed, 6 Dec 2023 11:59:49 +0000 Subject: [PATCH 27/30] rust: file: add `FileDescriptorReservation` Allow for the creation of a file descriptor in two steps: first, we reserve a slot for it, then we commit or drop the reservation. The first step may fail (e.g., the current process ran out of available slots), but commit and drop never fail (and are mutually exclusive). This is needed by Rust Binder when fds are sent from one process to another. It has to be a two-step process to properly handle the case where multiple fds are sent: The operation must fail or succeed atomically, which we achieve by first reserving the fds we need, and only installing the files once we have reserved enough fds to send the files. Fd reservations assume that the value of `current` does not change between the call to get_unused_fd_flags and the call to fd_install (or put_unused_fd). By not implementing the Send trait, this abstraction ensures that the `FileDescriptorReservation` cannot be moved into a different process. Signed-off-by: Wedson Almeida Filho Co-developed-by: Alice Ryhl Signed-off-by: Alice Ryhl Link: https://lore.kernel.org/r/20231206-alice-file-v2-4-af617c0d9d94@google.com --- rust/kernel/file.rs | 68 ++++++++++++++++++++++++++++++++++++++++++-- rust/kernel/types.rs | 10 +++++++ 2 files changed, 76 insertions(+), 2 deletions(-) diff --git a/rust/kernel/file.rs b/rust/kernel/file.rs index a88140794a8deb..2d036d4636a0fc 100644 --- a/rust/kernel/file.rs +++ b/rust/kernel/file.rs @@ -9,9 +9,9 @@ use crate::{ bindings, cred::Credential, error::{code::*, Error, Result}, - types::{ARef, AlwaysRefCounted, Opaque}, + types::{ARef, AlwaysRefCounted, NotThreadSafe, Opaque}, }; -use core::ptr; +use core::{marker::PhantomData, ptr}; /// Flags associated with a [`File`]. pub mod flags { @@ -193,6 +193,70 @@ unsafe impl AlwaysRefCounted for File { } } +/// A file descriptor reservation. +/// +/// This allows the creation of a file descriptor in two steps: first, we reserve a slot for it, +/// then we commit or drop the reservation. The first step may fail (e.g., the current process ran +/// out of available slots), but commit and drop never fail (and are mutually exclusive). +/// +/// Dropping the reservation happens in the destructor of this type. +/// +/// # Invariants +/// +/// The fd stored in this struct must correspond to a reserved file descriptor of the current task. +pub struct FileDescriptorReservation { + fd: u32, + /// Prevent values of this type from being moved to a different task. + /// + /// The `fd_install` and `put_unused_fd` functions assume that the value of `current` is + /// unchanged since the call to `get_unused_fd_flags`. By adding this marker to this type, we + /// prevent it from being moved across task boundaries, which ensures that `current` does not + /// change while this value exists. + _not_send: NotThreadSafe, +} + +impl FileDescriptorReservation { + /// Creates a new file descriptor reservation. + pub fn get_unused_fd_flags(flags: u32) -> Result { + // SAFETY: FFI call, there are no safety requirements on `flags`. + let fd: i32 = unsafe { bindings::get_unused_fd_flags(flags) }; + if fd < 0 { + return Err(Error::from_errno(fd)); + } + Ok(Self { + fd: fd as u32, + _not_send: PhantomData, + }) + } + + /// Returns the file descriptor number that was reserved. + pub fn reserved_fd(&self) -> u32 { + self.fd + } + + /// Commits the reservation. + /// + /// The previously reserved file descriptor is bound to `file`. This method consumes the + /// [`FileDescriptorReservation`], so it will not be usable after this call. + pub fn fd_install(self, file: ARef) { + // SAFETY: `self.fd` was previously returned by `get_unused_fd_flags`, and `file.ptr` is + // guaranteed to have an owned ref count by its type invariants. + unsafe { bindings::fd_install(self.fd, file.0.get()) }; + + // `fd_install` consumes both the file descriptor and the file reference, so we cannot run + // the destructors. + core::mem::forget(self); + core::mem::forget(file); + } +} + +impl Drop for FileDescriptorReservation { + fn drop(&mut self) { + // SAFETY: `self.fd` was returned by a previous call to `get_unused_fd_flags`. + unsafe { bindings::put_unused_fd(self.fd) }; + } +} + /// Represents the `EBADF` error code. /// /// Used for methods that can only fail with `EBADF`. diff --git a/rust/kernel/types.rs b/rust/kernel/types.rs index d849e1979ac707..c0940af1239b1b 100644 --- a/rust/kernel/types.rs +++ b/rust/kernel/types.rs @@ -432,3 +432,13 @@ pub enum Either { /// Constructs an instance of [`Either`] containing a value of type `R`. Right(R), } + +/// Zero-sized type to mark types not [`Send`]. +/// +/// Add this type as a field to your struct if your type should not be sent to a different task. +/// Since [`Send`] is an auto trait, adding a single field that is `!Send` will ensure that the +/// whole type is `!Send`. +/// +/// If a type is `!Send` it is impossible to give control over an instance of the type to another +/// task. This is useful when a type stores task-local information for example file descriptors. +pub type NotThreadSafe = PhantomData<*mut ()>; From 82571388096b8f1986053bf7dad9673d26a5a426 Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Wed, 6 Dec 2023 11:59:50 +0000 Subject: [PATCH 28/30] rust: file: add `Kuid` wrapper Adds a wrapper around `kuid_t` called `Kuid`. This allows us to define various operations on kuids such as equality and current_euid. It also lets us provide conversions from kuid into userspace values. Rust Binder needs these operations because it needs to compare kuids for equality, and it needs to tell userspace about the pid and uid of incoming transactions. Signed-off-by: Alice Ryhl Link: https://lore.kernel.org/r/20231206-alice-file-v2-5-af617c0d9d94@google.com [boqun: Resolve conflicts with wait_timeout patchset] --- rust/bindings/bindings_helper.h | 1 + rust/helpers.c | 45 ++++++++++++++++++++++++ rust/kernel/cred.rs | 5 +-- rust/kernel/task.rs | 62 +++++++++++++++++++++++++++++++++ 4 files changed, 111 insertions(+), 2 deletions(-) diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h index 5ca497d786f0f0..1951040041c36e 100644 --- a/rust/bindings/bindings_helper.h +++ b/rust/bindings/bindings_helper.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include diff --git a/rust/helpers.c b/rust/helpers.c index fd633d9db79a38..58e3a9dff3499d 100644 --- a/rust/helpers.c +++ b/rust/helpers.c @@ -142,6 +142,51 @@ void rust_helper_put_task_struct(struct task_struct *t) } EXPORT_SYMBOL_GPL(rust_helper_put_task_struct); +kuid_t rust_helper_task_uid(struct task_struct *task) +{ + return task_uid(task); +} +EXPORT_SYMBOL_GPL(rust_helper_task_uid); + +kuid_t rust_helper_task_euid(struct task_struct *task) +{ + return task_euid(task); +} +EXPORT_SYMBOL_GPL(rust_helper_task_euid); + +#ifndef CONFIG_USER_NS +uid_t rust_helper_from_kuid(struct user_namespace *to, kuid_t uid) +{ + return from_kuid(to, uid); +} +EXPORT_SYMBOL_GPL(rust_helper_from_kuid); +#endif /* CONFIG_USER_NS */ + +bool rust_helper_uid_eq(kuid_t left, kuid_t right) +{ + return uid_eq(left, right); +} +EXPORT_SYMBOL_GPL(rust_helper_uid_eq); + +kuid_t rust_helper_current_euid(void) +{ + return current_euid(); +} +EXPORT_SYMBOL_GPL(rust_helper_current_euid); + +struct user_namespace *rust_helper_current_user_ns(void) +{ + return current_user_ns(); +} +EXPORT_SYMBOL_GPL(rust_helper_current_user_ns); + +pid_t rust_helper_task_tgid_nr_ns(struct task_struct *tsk, + struct pid_namespace *ns) +{ + return task_tgid_nr_ns(tsk, ns); +} +EXPORT_SYMBOL_GPL(rust_helper_task_tgid_nr_ns); + struct kunit *rust_helper_kunit_get_current_test(void) { return kunit_get_current_test(); diff --git a/rust/kernel/cred.rs b/rust/kernel/cred.rs index 3794937b52947d..fbc749788bfa3c 100644 --- a/rust/kernel/cred.rs +++ b/rust/kernel/cred.rs @@ -8,6 +8,7 @@ use crate::{ bindings, + task::Kuid, types::{AlwaysRefCounted, Opaque}, }; @@ -52,9 +53,9 @@ impl Credential { } /// Returns the effective UID of the given credential. - pub fn euid(&self) -> bindings::kuid_t { + pub fn euid(&self) -> Kuid { // SAFETY: By the type invariant, we know that `self.0` is valid. - unsafe { (*self.0.get()).euid } + Kuid::from_raw(unsafe { (*self.0.get()).euid }) } } diff --git a/rust/kernel/task.rs b/rust/kernel/task.rs index 6072b0de4a3e5a..06692004fa1e87 100644 --- a/rust/kernel/task.rs +++ b/rust/kernel/task.rs @@ -7,6 +7,7 @@ use crate::{bindings, types::Opaque}; use core::{ ffi::{c_int, c_long, c_uint}, + cmp::{Eq, PartialEq}, marker::PhantomData, ops::Deref, ptr, @@ -94,6 +95,12 @@ unsafe impl Sync for Task {} /// The type of process identifiers (PIDs). type Pid = bindings::pid_t; +/// The type of user identifiers (UIDs). +#[derive(Copy, Clone)] +pub struct Kuid { + kuid: bindings::kuid_t, +} + impl Task { /// Returns a task reference for the currently executing task/thread. /// @@ -148,12 +155,32 @@ impl Task { unsafe { *ptr::addr_of!((*self.0.get()).pid) } } + /// Returns the UID of the given task. + pub fn uid(&self) -> Kuid { + // SAFETY: By the type invariant, we know that `self.0` is valid. + Kuid::from_raw(unsafe { bindings::task_uid(self.0.get()) }) + } + + /// Returns the effective UID of the given task. + pub fn euid(&self) -> Kuid { + // SAFETY: By the type invariant, we know that `self.0` is valid. + Kuid::from_raw(unsafe { bindings::task_euid(self.0.get()) }) + } + /// Determines whether the given task has pending signals. pub fn signal_pending(&self) -> bool { // SAFETY: By the type invariant, we know that `self.0` is valid. unsafe { bindings::signal_pending(self.0.get()) != 0 } } + /// Returns the given task's pid in the current pid namespace. + pub fn pid_in_current_ns(&self) -> Pid { + // SAFETY: Calling `task_active_pid_ns` with the current task is always safe. + let namespace = unsafe { bindings::task_active_pid_ns(bindings::get_current()) }; + // SAFETY: We know that `self.0.get()` is valid by the type invariant. + unsafe { bindings::task_tgid_nr_ns(self.0.get(), namespace) } + } + /// Wakes up the task. pub fn wake_up(&self) { // SAFETY: By the type invariant, we know that `self.0.get()` is non-null and valid. @@ -163,6 +190,41 @@ impl Task { } } +impl Kuid { + /// Get the current euid. + pub fn current_euid() -> Kuid { + // SAFETY: Just an FFI call. + Self::from_raw(unsafe { bindings::current_euid() }) + } + + /// Create a `Kuid` given the raw C type. + pub fn from_raw(kuid: bindings::kuid_t) -> Self { + Self { kuid } + } + + /// Turn this kuid into the raw C type. + pub fn into_raw(self) -> bindings::kuid_t { + self.kuid + } + + /// Converts this kernel UID into a userspace UID. + /// + /// Uses the namespace of the current task. + pub fn into_uid_in_current_ns(self) -> bindings::uid_t { + // SAFETY: Just an FFI call. + unsafe { bindings::from_kuid(bindings::current_user_ns(), self.kuid) } + } +} + +impl PartialEq for Kuid { + fn eq(&self, other: &Kuid) -> bool { + // SAFETY: Just an FFI call. + unsafe { bindings::uid_eq(self.kuid, other.kuid) } + } +} + +impl Eq for Kuid {} + // SAFETY: The type invariants guarantee that `Task` is always ref-counted. unsafe impl crate::types::AlwaysRefCounted for Task { fn inc_ref(&self) { From a49b634a6a018dfcd99519e2f19cf8a6df532f7d Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Wed, 6 Dec 2023 11:59:51 +0000 Subject: [PATCH 29/30] rust: file: add `DeferredFdCloser` To close an fd from kernel space, we could call `ksys_close`. However, if we do this to an fd that is held using `fdget`, then we may trigger a use-after-free. Introduce a helper that can be used to close an fd even if the fd is currently held with `fdget`. This is done by grabbing an extra refcount to the file and dropping it in a task work once we return to userspace. This is necessary for Rust Binder because otherwise the user might try to have Binder close its fd for /dev/binder, which would cause problems as this happens inside an ioctl on /dev/binder, and ioctls hold the fd using `fdget`. Additional motivation can be found in commit 80cd795630d6 ("binder: fix use-after-free due to ksys_close() during fdget()") and in the comments on `binder_do_fd_close`. If there is some way to detect whether an fd is currently held with `fdget`, then this could be optimized to skip the allocation and task work when this is not the case. Another possible optimization would be to combine several fds into a single task work, since this is used with fd arrays that might hold several fds. That said, it might not be necessary to optimize it, because Rust Binder has two ways to send fds: BINDER_TYPE_FD and BINDER_TYPE_FDA. With BINDER_TYPE_FD, it is userspace's responsibility to close the fd, so this mechanism is used only by BINDER_TYPE_FDA, but fd arrays are used rarely these days. Signed-off-by: Alice Ryhl Link: https://lore.kernel.org/r/20231206-alice-file-v2-6-af617c0d9d94@google.com [boqun: Resolve conflicts against net-next] --- rust/bindings/bindings_helper.h | 2 + rust/helpers.c | 8 ++ rust/kernel/file.rs | 157 +++++++++++++++++++++++++++++++- 3 files changed, 166 insertions(+), 1 deletion(-) diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h index 1951040041c36e..6ab2792cae7275 100644 --- a/rust/bindings/bindings_helper.h +++ b/rust/bindings/bindings_helper.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -21,6 +22,7 @@ #include #include #include +#include #include /* `bindgen` gets confused at certain things. */ diff --git a/rust/helpers.c b/rust/helpers.c index 58e3a9dff3499d..d146bbf25aec4a 100644 --- a/rust/helpers.c +++ b/rust/helpers.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -243,6 +244,13 @@ void rust_helper_security_release_secctx(char *secdata, u32 seclen) EXPORT_SYMBOL_GPL(rust_helper_security_release_secctx); #endif +void rust_helper_init_task_work(struct callback_head *twork, + task_work_func_t func) +{ + init_task_work(twork, func); +} +EXPORT_SYMBOL_GPL(rust_helper_init_task_work); + /* * `bindgen` binds the C `size_t` type as the Rust `usize` type, so we can * use it in contexts where Rust expects a `usize` like slice (array) indices. diff --git a/rust/kernel/file.rs b/rust/kernel/file.rs index 2d036d4636a0fc..eba96af4bdf76c 100644 --- a/rust/kernel/file.rs +++ b/rust/kernel/file.rs @@ -11,7 +11,8 @@ use crate::{ error::{code::*, Error, Result}, types::{ARef, AlwaysRefCounted, NotThreadSafe, Opaque}, }; -use core::{marker::PhantomData, ptr}; +use alloc::boxed::Box; +use core::{alloc::AllocError, marker::PhantomData, mem, ptr}; /// Flags associated with a [`File`]. pub mod flags { @@ -257,6 +258,160 @@ impl Drop for FileDescriptorReservation { } } +/// Helper used for closing file descriptors in a way that is safe even if the file is currently +/// held using `fdget`. +/// +/// Additional motivation can be found in commit 80cd795630d6 ("binder: fix use-after-free due to +/// ksys_close() during fdget()") and in the comments on `binder_do_fd_close`. +pub struct DeferredFdCloser { + inner: Box, +} + +/// SAFETY: This just holds an allocation with no real content, so there's no safety issue with +/// moving it across threads. +unsafe impl Send for DeferredFdCloser {} +unsafe impl Sync for DeferredFdCloser {} + +#[repr(C)] +struct DeferredFdCloserInner { + twork: mem::MaybeUninit, + file: *mut bindings::file, +} + +impl DeferredFdCloser { + /// Create a new [`DeferredFdCloser`]. + pub fn new() -> Result { + Ok(Self { + inner: Box::try_new(DeferredFdCloserInner { + twork: mem::MaybeUninit::uninit(), + file: core::ptr::null_mut(), + })?, + }) + } + + /// Schedule a task work that closes the file descriptor when this task returns to userspace. + /// + /// Fails if this is called from a context where we cannot run work when returning to + /// userspace. (E.g., from a kthread.) + pub fn close_fd(self, fd: u32) -> Result<(), DeferredFdCloseError> { + use bindings::task_work_notify_mode_TWA_RESUME as TWA_RESUME; + + // In this method, we schedule the task work before closing the file. This is because + // scheduling a task work is fallible, and we need to know whether it will fail before we + // attempt to close the file. + + // SAFETY: Getting a pointer to current is always safe. + let current = unsafe { bindings::get_current() }; + + // SAFETY: Accessing the `flags` field of `current` is always safe. + let is_kthread = (unsafe { (*current).flags } & bindings::PF_KTHREAD) != 0; + if is_kthread { + return Err(DeferredFdCloseError::TaskWorkUnavailable); + } + + // This disables the destructor of the box, so the allocation is not cleaned up + // automatically below. + let inner = Box::into_raw(self.inner); + + // The `callback_head` field is first in the struct, so this cast correctly gives us a + // pointer to the field. + let callback_head = inner.cast::(); + // SAFETY: This pointer offset operation does not go out-of-bounds. + let file_field = unsafe { core::ptr::addr_of_mut!((*inner).file) }; + + // SAFETY: The `callback_head` pointer is compatible with the `do_close_fd` method. + unsafe { bindings::init_task_work(callback_head, Some(Self::do_close_fd)) }; + // SAFETY: The `callback_head` pointer points at a valid and fully initialized task work + // that is ready to be scheduled. + // + // If the task work gets scheduled as-is, then it will be a no-op. However, we will update + // the file pointer below to tell it which file to fput. + let res = unsafe { bindings::task_work_add(current, callback_head, TWA_RESUME) }; + + if res != 0 { + // SAFETY: Scheduling the task work failed, so we still have ownership of the box, so + // we may destroy it. + unsafe { drop(Box::from_raw(inner)) }; + + return Err(DeferredFdCloseError::TaskWorkUnavailable); + } + + // SAFETY: Just an FFI call. This is safe no matter what `fd` is. + let file = unsafe { bindings::close_fd_get_file(fd) }; + if file.is_null() { + // We don't clean up the task work since that might be expensive if the task work queue + // is long. Just let it execute and let it clean up for itself. + return Err(DeferredFdCloseError::BadFd); + } + + // SAFETY: The `file` pointer points at a valid file. + unsafe { bindings::get_file(file) }; + + // SAFETY: Due to the above `get_file`, even if the current task holds an `fdget` to + // this file right now, the refcount will not drop to zero until after it is released + // with `fdput`. This is because when using `fdget`, you must always use `fdput` before + // returning to userspace, and our task work runs after any `fdget` users have returned + // to userspace. + // + // Note: fl_owner_t is currently a void pointer. + unsafe { bindings::filp_close(file, (*current).files as bindings::fl_owner_t) }; + + // We update the file pointer that the task work is supposed to fput. + // + // SAFETY: Task works are executed on the current thread once we return to userspace, so + // this write is guaranteed to happen before `do_close_fd` is called, which means that a + // race is not possible here. + // + // It's okay to pass this pointer to the task work, since we just acquired a refcount with + // the previous call to `get_file`. Furthermore, the refcount will not drop to zero during + // an `fdget` call, since we defer the `fput` until after returning to userspace. + unsafe { *file_field = file }; + + Ok(()) + } + + // SAFETY: This function is an implementation detail of `close_fd`, so its safety comments + // should be read in extension of that method. + unsafe extern "C" fn do_close_fd(inner: *mut bindings::callback_head) { + // SAFETY: In `close_fd` we use this method together with a pointer that originates from a + // `Box`, and we have just been given ownership of that allocation. + let inner = unsafe { Box::from_raw(inner as *mut DeferredFdCloserInner) }; + if !inner.file.is_null() { + // SAFETY: This drops a refcount we acquired in `close_fd`. Since this callback runs in + // a task work after we return to userspace, it is guaranteed that the current thread + // doesn't hold this file with `fdget`, as `fdget` must be released before returning to + // userspace. + unsafe { bindings::fput(inner.file) }; + } + // Free the allocation. + drop(inner); + } +} + +/// Represents a failure to close an fd in a deferred manner. +#[derive(Copy, Clone, Eq, PartialEq)] +pub enum DeferredFdCloseError { + /// Closing the fd failed because we were unable to schedule a task work. + TaskWorkUnavailable, + /// Closing the fd failed because the fd does not exist. + BadFd, +} + +impl From for Error { + fn from(err: DeferredFdCloseError) -> Error { + match err { + DeferredFdCloseError::TaskWorkUnavailable => ESRCH, + DeferredFdCloseError::BadFd => EBADF, + } + } +} + +impl core::fmt::Debug for DeferredFdCloseError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + Error::from(*self).fmt(f) + } +} + /// Represents the `EBADF` error code. /// /// Used for methods that can only fail with `EBADF`. From 779be36264e7a7d1720865ce66b482ff4b554a01 Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Wed, 6 Dec 2023 11:59:52 +0000 Subject: [PATCH 30/30] rust: file: add abstraction for `poll_table` The existing `CondVar` abstraction is a wrapper around `wait_list`, but it does not support all use-cases of the C `wait_list` type. To be specific, a `CondVar` cannot be registered with a `struct poll_table`. This limitation has the advantage that you do not need to call `synchronize_rcu` when destroying a `CondVar`. However, we need the ability to register a `poll_table` with a `wait_list` in Rust Binder. To enable this, introduce a type called `PollCondVar`, which is like `CondVar` except that you can register a `poll_table`. We also introduce `PollTable`, which is a safe wrapper around `poll_table` that is intended to be used with `PollCondVar`. The destructor of `PollCondVar` unconditionally calls `synchronize_rcu` to ensure that the removal of epoll waiters has fully completed before the `wait_list` is destroyed. That said, `synchronize_rcu` is rather expensive and is not needed in all cases: If we have never registered a `poll_table` with the `wait_list`, then we don't need to call `synchronize_rcu`. (And this is a common case in Binder - not all processes use Binder with epoll.) The current implementation does not account for this, but if we find that it is necessary to improve this, a future patch could change store a boolean next to the `wait_list` to keep track of whether a `poll_table` has ever been registered. Signed-off-by: Alice Ryhl Link: https://lore.kernel.org/r/20231206-alice-file-v2-7-af617c0d9d94@google.com [ boqun: Removes unused POLLFREE definition ] --- rust/bindings/bindings_helper.h | 1 + rust/kernel/sync.rs | 1 + rust/kernel/sync/poll.rs | 103 ++++++++++++++++++++++++++++++++ 3 files changed, 105 insertions(+) create mode 100644 rust/kernel/sync/poll.rs diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h index 6ab2792cae7275..1bd8f39898adbf 100644 --- a/rust/bindings/bindings_helper.h +++ b/rust/bindings/bindings_helper.h @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include diff --git a/rust/kernel/sync.rs b/rust/kernel/sync.rs index d219ee518eff15..84726f80c4069c 100644 --- a/rust/kernel/sync.rs +++ b/rust/kernel/sync.rs @@ -11,6 +11,7 @@ mod arc; mod condvar; pub mod lock; mod locked_by; +pub mod poll; pub use arc::{Arc, ArcBorrow, UniqueArc}; pub use condvar::CondVar; diff --git a/rust/kernel/sync/poll.rs b/rust/kernel/sync/poll.rs new file mode 100644 index 00000000000000..e1dded9b7b9d69 --- /dev/null +++ b/rust/kernel/sync/poll.rs @@ -0,0 +1,103 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Utilities for working with `struct poll_table`. + +use crate::{ + bindings, + file::File, + prelude::*, + sync::{CondVar, LockClassKey}, + types::Opaque, +}; +use core::ops::Deref; + +/// Creates a [`PollCondVar`] initialiser with the given name and a newly-created lock class. +#[macro_export] +macro_rules! new_poll_condvar { + ($($name:literal)?) => { + $crate::file::PollCondVar::new($crate::optional_name!($($name)?), $crate::static_lock_class!()) + }; +} + +/// Wraps the kernel's `struct poll_table`. +#[repr(transparent)] +pub struct PollTable(Opaque); + +impl PollTable { + /// Creates a reference to a [`PollTable`] from a valid pointer. + /// + /// # Safety + /// + /// The caller must ensure that for the duration of 'a, the pointer will point at a valid poll + /// table, and that it is only accessed via the returned reference. + pub unsafe fn from_ptr<'a>(ptr: *mut bindings::poll_table) -> &'a mut PollTable { + // SAFETY: The safety requirements guarantee the validity of the dereference, while the + // `PollTable` type being transparent makes the cast ok. + unsafe { &mut *ptr.cast() } + } + + fn get_qproc(&self) -> bindings::poll_queue_proc { + let ptr = self.0.get(); + // SAFETY: The `ptr` is valid because it originates from a reference, and the `_qproc` + // field is not modified concurrently with this call since we have an immutable reference. + unsafe { (*ptr)._qproc } + } + + /// Register this [`PollTable`] with the provided [`PollCondVar`], so that it can be notified + /// using the condition variable. + pub fn register_wait(&mut self, file: &File, cv: &PollCondVar) { + if let Some(qproc) = self.get_qproc() { + // SAFETY: The pointers to `self` and `file` are valid because they are references. + // + // Before the wait list is destroyed, the destructor of `PollCondVar` will clear + // everything in the wait list, so the wait list is not used after it is freed. + unsafe { qproc(file.as_ptr() as _, cv.wait_list.get(), self.0.get()) }; + } + } +} + +/// A wrapper around [`CondVar`] that makes it usable with [`PollTable`]. +/// +/// # Invariant +/// +/// If `needs_synchronize_rcu` is false, then there is nothing registered with `register_wait`. +/// +/// [`CondVar`]: crate::sync::CondVar +#[pin_data(PinnedDrop)] +pub struct PollCondVar { + #[pin] + inner: CondVar, +} + +impl PollCondVar { + /// Constructs a new condvar initialiser. + pub fn new(name: &'static CStr, key: &'static LockClassKey) -> impl PinInit { + pin_init!(Self { + inner <- CondVar::new(name, key), + }) + } +} + +// Make the `CondVar` methods callable on `PollCondVar`. +impl Deref for PollCondVar { + type Target = CondVar; + + fn deref(&self) -> &CondVar { + &self.inner + } +} + +#[pinned_drop] +impl PinnedDrop for PollCondVar { + fn drop(self: Pin<&mut Self>) { + // Clear anything registered using `register_wait`. + // + // SAFETY: The pointer points at a valid wait list. + unsafe { bindings::__wake_up_pollfree(self.inner.wait_list.get()) }; + + // Wait for epoll items to be properly removed. + // + // SAFETY: Just an FFI call. + unsafe { bindings::synchronize_rcu() }; + } +}