From 624a9d8653b2bdc1b18d27d89941e5c528212d4f Mon Sep 17 00:00:00 2001 From: Youjie Zheng Date: Wed, 20 Nov 2024 21:21:52 +0800 Subject: [PATCH] [feat] Insert function into `.init_array` section to support Linux --- Cargo.toml | 6 +++- README.md | 39 +++++++++++++++++++++----- constructor_array/README.md | 39 +++++++++++++++++++++----- constructor_array/src/lib.rs | 14 +++++----- constructor_array/tests/test_ctor.rs | 40 +++++++++++++++++++++++++++ constructor_array/tests/test_empty.rs | 8 ++++++ constructor_array_macros/README.md | 39 +++++++++++++++++++++----- constructor_array_macros/src/lib.rs | 8 +++--- 8 files changed, 160 insertions(+), 33 deletions(-) create mode 100644 constructor_array/tests/test_ctor.rs create mode 100644 constructor_array/tests/test_empty.rs diff --git a/Cargo.toml b/Cargo.toml index 147aa87..e3984d7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,4 +14,8 @@ homepage = "https://github.com/arceos-org/arceos" documentation = "https://arceos-org.github.io/constructor_array" repository = "https://github.com/arceos-org/constructor_array" keywords = ["arceos", "constructor"] -categories = ["development-tools::procedural-macro-helpers", "no-std"] \ No newline at end of file +categories = ["development-tools::procedural-macro-helpers", "no-std"] + +[packages] +constructor_array = { path = "constructor_array" } +constructor_array_macros = { path = "constructor_array_macros" } \ No newline at end of file diff --git a/README.md b/README.md index a264fcf..aea0366 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,23 @@ # constructor_array +[![Crates.io](https://img.shields.io/crates/v/constructor_array)](https://crates.io/crates/constructor_array) +[![Docs.rs](https://docs.rs/constructor_array/badge.svg)](https://docs.rs/constructor_array) +[![CI](https://github.com/arceos-org/constructor_array/actions/workflows/ci.yml/badge.svg?branch=main)](https://github.com/arceos-org/constructor_array/actions/workflows/ci.yml) + + Module initialization functions for Rust (like __attribute__((constructor)) in C/C++) under no_std. -After registering a constructor function, a function pointer pointing to it will be stored in the `ctor` section. +After registering a constructor function, a function pointer pointing to it will be stored in the `.init_array` section. + + +It can support Linux, Windows, MacOS and other systems, and can be also used in `no_std` environments when developing your own kernel. -When the program starts, it can call all initialization functions in the `ctor` section in order. +In Linux, Windows, MacOS and other systems, the `.init_array` section is a default section to store initialization functions. When the program starts, the system will call all functions in the `.init_array` section in order. + + +When you are running your own operating system, you can call `constructor_array::invoke_ctors` to invoke all registered constructor functions. ## Usage @@ -25,11 +36,25 @@ fn set_max_num() { } fn main() { - constructor_array::invoke_ctors(); - println!( - "MAX_NUM: {}", - MAX_NUM.load(std::sync::atomic::Ordering::Relaxed) - ); + assert_eq!(MAX_NUM.load(std::sync::atomic::Ordering::Relaxed), 20); +} +``` + +Because the `.init_array` section is a default section to store initialization functions in Linux and some other systems, it will be included in the linker script of compilers like GCC and Clang. + + +**However**, if you are using a custom linker script, you need to **add the `.init_array` section to the `.text` section manually**, so that these functions can be mapped into the page table and executed correctly. You can add the following line to your linker script as a reference: + +```test, ignore +.text : ALIGN(4K) { + # other sections in the `.text` section + + _init_array_start = .; + _init_array_end = _init_array_start + SIZEOF(.init_array); + *(.init_array .init_array.*) + . = _init_array_end; + + # other sections in the `.text` section } ``` diff --git a/constructor_array/README.md b/constructor_array/README.md index a264fcf..aea0366 100644 --- a/constructor_array/README.md +++ b/constructor_array/README.md @@ -1,12 +1,23 @@ # constructor_array +[![Crates.io](https://img.shields.io/crates/v/constructor_array)](https://crates.io/crates/constructor_array) +[![Docs.rs](https://docs.rs/constructor_array/badge.svg)](https://docs.rs/constructor_array) +[![CI](https://github.com/arceos-org/constructor_array/actions/workflows/ci.yml/badge.svg?branch=main)](https://github.com/arceos-org/constructor_array/actions/workflows/ci.yml) + + Module initialization functions for Rust (like __attribute__((constructor)) in C/C++) under no_std. -After registering a constructor function, a function pointer pointing to it will be stored in the `ctor` section. +After registering a constructor function, a function pointer pointing to it will be stored in the `.init_array` section. + + +It can support Linux, Windows, MacOS and other systems, and can be also used in `no_std` environments when developing your own kernel. -When the program starts, it can call all initialization functions in the `ctor` section in order. +In Linux, Windows, MacOS and other systems, the `.init_array` section is a default section to store initialization functions. When the program starts, the system will call all functions in the `.init_array` section in order. + + +When you are running your own operating system, you can call `constructor_array::invoke_ctors` to invoke all registered constructor functions. ## Usage @@ -25,11 +36,25 @@ fn set_max_num() { } fn main() { - constructor_array::invoke_ctors(); - println!( - "MAX_NUM: {}", - MAX_NUM.load(std::sync::atomic::Ordering::Relaxed) - ); + assert_eq!(MAX_NUM.load(std::sync::atomic::Ordering::Relaxed), 20); +} +``` + +Because the `.init_array` section is a default section to store initialization functions in Linux and some other systems, it will be included in the linker script of compilers like GCC and Clang. + + +**However**, if you are using a custom linker script, you need to **add the `.init_array` section to the `.text` section manually**, so that these functions can be mapped into the page table and executed correctly. You can add the following line to your linker script as a reference: + +```test, ignore +.text : ALIGN(4K) { + # other sections in the `.text` section + + _init_array_start = .; + _init_array_end = _init_array_start + SIZEOF(.init_array); + *(.init_array .init_array.*) + . = _init_array_end; + + # other sections in the `.text` section } ``` diff --git a/constructor_array/src/lib.rs b/constructor_array/src/lib.rs index 5571891..c4d85ec 100644 --- a/constructor_array/src/lib.rs +++ b/constructor_array/src/lib.rs @@ -3,23 +3,23 @@ pub use constructor_array_macros::register_ctor; -/// Placeholder for the `ctors` section, so that -/// the `__start_ctors` and `__stop_ctors` symbols can be generated. -#[link_section = "ctors"] +/// Placeholder for the `.init_array` section, so that +/// the `__init_array_start` and `__init_array_end` symbols can be generated. +#[link_section = ".init_array"] #[used] static _SECTION_PLACE_HOLDER: [u8; 0] = []; extern "C" { - fn __start_ctors(); - fn __stop_ctors(); + fn __init_array_start(); + fn __init_array_end(); } /// Invoke all constructor functions registered by the `register_ctor` attribute. /// /// # Notes -/// Caller should ensure that the `ctor` section will not be disturbed by other sections. +/// Caller should ensure that the `.init_array` section will not be disturbed by other sections. pub fn invoke_ctors() { - for ctor_ptr in (__start_ctors as usize..__stop_ctors as usize) + for ctor_ptr in (__init_array_start as usize..__init_array_end as usize) .step_by(core::mem::size_of::<*const core::ffi::c_void>()) { unsafe { diff --git a/constructor_array/tests/test_ctor.rs b/constructor_array/tests/test_ctor.rs new file mode 100644 index 0000000..d6abdd0 --- /dev/null +++ b/constructor_array/tests/test_ctor.rs @@ -0,0 +1,40 @@ +use std::sync::{atomic::AtomicUsize, Mutex}; + +use constructor_array::*; + +static INIT_NUM: AtomicUsize = AtomicUsize::new(0); + +#[register_ctor] +fn set_init_num() { + INIT_NUM.fetch_add(20, std::sync::atomic::Ordering::Relaxed); +} + +static INIT_VEC: Mutex> = Mutex::new(Vec::new()); + +#[register_ctor] +fn init_vector() { + let mut vec = INIT_VEC.lock().unwrap(); + vec.push(1); + vec.push(2); + vec.push(3); +} + +#[test] +fn test_constructor_array() { + // The constructor functions will be called before the main function. + assert!(INIT_NUM.load(std::sync::atomic::Ordering::Relaxed) == 20); + let vec = INIT_VEC.lock().unwrap(); + assert!(vec.len() == 3); + assert!(vec[0] == 1); + assert!(vec[1] == 2); + assert!(vec[2] == 3); + drop(vec); + + // But we can invoke the constructor functions again manually. + init_vector(); + let vec = INIT_VEC.lock().unwrap(); + assert!(vec.len() == 6); + assert!(vec[3] == 1); + assert!(vec[4] == 2); + assert!(vec[5] == 3); +} diff --git a/constructor_array/tests/test_empty.rs b/constructor_array/tests/test_empty.rs new file mode 100644 index 0000000..812dab2 --- /dev/null +++ b/constructor_array/tests/test_empty.rs @@ -0,0 +1,8 @@ +#[test] +fn test_empty() { + // Sometimes under certain conditions, we may not have any constructor functions. + // But the `invoke_ctors` function should still work, and the `__init_array_start` and + // `__init_array_end` symbols should be valid. + constructor_array::invoke_ctors(); + println!("It should exit successfully when we don't specify any constructor functions."); +} diff --git a/constructor_array_macros/README.md b/constructor_array_macros/README.md index a264fcf..aea0366 100644 --- a/constructor_array_macros/README.md +++ b/constructor_array_macros/README.md @@ -1,12 +1,23 @@ # constructor_array +[![Crates.io](https://img.shields.io/crates/v/constructor_array)](https://crates.io/crates/constructor_array) +[![Docs.rs](https://docs.rs/constructor_array/badge.svg)](https://docs.rs/constructor_array) +[![CI](https://github.com/arceos-org/constructor_array/actions/workflows/ci.yml/badge.svg?branch=main)](https://github.com/arceos-org/constructor_array/actions/workflows/ci.yml) + + Module initialization functions for Rust (like __attribute__((constructor)) in C/C++) under no_std. -After registering a constructor function, a function pointer pointing to it will be stored in the `ctor` section. +After registering a constructor function, a function pointer pointing to it will be stored in the `.init_array` section. + + +It can support Linux, Windows, MacOS and other systems, and can be also used in `no_std` environments when developing your own kernel. -When the program starts, it can call all initialization functions in the `ctor` section in order. +In Linux, Windows, MacOS and other systems, the `.init_array` section is a default section to store initialization functions. When the program starts, the system will call all functions in the `.init_array` section in order. + + +When you are running your own operating system, you can call `constructor_array::invoke_ctors` to invoke all registered constructor functions. ## Usage @@ -25,11 +36,25 @@ fn set_max_num() { } fn main() { - constructor_array::invoke_ctors(); - println!( - "MAX_NUM: {}", - MAX_NUM.load(std::sync::atomic::Ordering::Relaxed) - ); + assert_eq!(MAX_NUM.load(std::sync::atomic::Ordering::Relaxed), 20); +} +``` + +Because the `.init_array` section is a default section to store initialization functions in Linux and some other systems, it will be included in the linker script of compilers like GCC and Clang. + + +**However**, if you are using a custom linker script, you need to **add the `.init_array` section to the `.text` section manually**, so that these functions can be mapped into the page table and executed correctly. You can add the following line to your linker script as a reference: + +```test, ignore +.text : ALIGN(4K) { + # other sections in the `.text` section + + _init_array_start = .; + _init_array_end = _init_array_start + SIZEOF(.init_array); + *(.init_array .init_array.*) + . = _init_array_end; + + # other sections in the `.text` section } ``` diff --git a/constructor_array_macros/src/lib.rs b/constructor_array_macros/src/lib.rs index 2bb7d35..3485709 100644 --- a/constructor_array_macros/src/lib.rs +++ b/constructor_array_macros/src/lib.rs @@ -2,9 +2,9 @@ //! //! **DO NOT** use this crate directly. Use the [constructor_array](https://docs.rs/constructor_array) crate instead. //! -//! After attching the `register_ctor` macro to the given function, a pointer pointing to it will be stored in the `ctors` section. +//! After attching the `register_ctor` macro to the given function, a pointer pointing to it will be stored in the `.init_array` section. //! When the program is loaded, this section will be linked into the binary. The `invoke_ctors` function in the `constructor_array` -//! crate will call all the constructor functions in the `ctors` section. +//! crate will call all the constructor functions in the `.init_array` section. //! //! See the documentation of the [constructor_array](https://docs.rs/constructor_array) crate for more details. @@ -33,7 +33,7 @@ pub fn register_ctor(attr: TokenStream, function: TokenStream) -> TokenStream { if let Item::Fn(func) = item { let name = &func.sig.ident; let name_str = name.to_string(); - let name_ident = format_ident!("_CTOR_{}", name_str); + let name_ident = format_ident!("_INIT_{}", name_str); let output = &func.sig.output; // Constructor functions should not have any return value. if let syn::ReturnType::Type(_, _) = output { @@ -57,7 +57,7 @@ pub fn register_ctor(attr: TokenStream, function: TokenStream) -> TokenStream { let block = &func.block; quote! { - #[link_section = "ctors"] + #[link_section = ".init_array"] #[allow(non_upper_case_globals)] static #name_ident: extern "C" fn() = #name;