-
Notifications
You must be signed in to change notification settings - Fork 358
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add support for systemd managed cgroups
Showing
13 changed files
with
529 additions
and
15 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -40,12 +40,29 @@ For other platforms, please use the devcontainer that we prepared. | |
- Rust(See [here](https://www.rust-lang.org/tools/install)) | ||
- Docker(See [here](https://docs.docker.com/engine/install)) | ||
|
||
## Building | ||
## Dependencies | ||
```sh | ||
$ cargo install cargo-when | ||
``` | ||
|
||
### Debian, Ubuntu and related distributions | ||
```sh | ||
$ sudo dnf install \ | ||
pkg-config \ | ||
libsystemd-dev \ | ||
libdbus-glib-1-dev | ||
``` | ||
|
||
|
||
### Fedora, Centos, RHEL and related distributions | ||
```sh | ||
$ cargo install cargo-when # installs prerequisite for building youki | ||
$ sudo dnf install \ | ||
pkg-config \ | ||
systemd-dev \ | ||
dbus-devel | ||
``` | ||
|
||
## Build | ||
```sh | ||
$ git clone [email protected]:containers/youki.git | ||
$ cd youki | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,308 @@ | ||
use std::{ | ||
fs::{self}, | ||
os::unix::fs::PermissionsExt, | ||
}; | ||
|
||
use anyhow::{anyhow, bail, Result}; | ||
use nix::unistd::Pid; | ||
use oci_spec::LinuxResources; | ||
use std::path::{Path, PathBuf}; | ||
|
||
use super::{cpu::Cpu, cpuset::CpuSet, hugetlb::HugeTlb, io::Io, memory::Memory, pids::Pids}; | ||
use crate::cgroups::common; | ||
use crate::cgroups::common::{write_cgroup_file, CgroupManager}; | ||
use crate::cgroups::v2::controller::Controller; | ||
use crate::cgroups::v2::controller_type::ControllerType; | ||
use crate::utils::PathBufExt; | ||
|
||
const CGROUP_PROCS: &str = "cgroup.procs"; | ||
const CGROUP_CONTROLLERS: &str = "cgroup.controllers"; | ||
const CGROUP_SUBTREE_CONTROL: &str = "cgroup.subtree_control"; | ||
|
||
// v2 systemd only supports cpu, io, memory and pids. | ||
const CONTROLLER_TYPES: &[ControllerType] = &[ | ||
ControllerType::Cpu, | ||
ControllerType::Io, | ||
ControllerType::Memory, | ||
ControllerType::Pids, | ||
]; | ||
|
||
/// SystemDCGroupManager is a driver for managing cgroups via systemd. | ||
pub struct SystemDCGroupManager { | ||
root_path: PathBuf, | ||
cgroups_path: CgroupsPath, | ||
} | ||
|
||
/// Represents the systemd cgroups path: | ||
/// It should be of the form [slice]:[scope_prefix]:[name]. | ||
/// The slice is the "parent" and should be expanded properly, | ||
/// see expand_slice below. | ||
struct CgroupsPath { | ||
parent: String, | ||
scope: String, | ||
name: String, | ||
} | ||
|
||
impl SystemDCGroupManager { | ||
pub fn new(root_path: PathBuf, cgroups_path: PathBuf) -> Result<Self> { | ||
// cgroups path may never be empty as it is defaulted to `/youki` | ||
// see 'get_cgroup_path' under utils.rs. | ||
// if cgroups_path was provided it should be of the form [slice]:[scope_prefix]:[name], | ||
// for example: "system.slice:docker:1234". | ||
let mut parent = ""; | ||
let scope; | ||
let name; | ||
if cgroups_path.starts_with("/youki") { | ||
scope = "youki"; | ||
name = cgroups_path | ||
.strip_prefix("/youki/")? | ||
.to_str() | ||
.ok_or_else(|| anyhow!("Failed to parse cgroupsPath field."))?; | ||
} else { | ||
let parts = cgroups_path | ||
.to_str() | ||
.ok_or_else(|| anyhow!("Failed to parse cgroupsPath field."))? | ||
.split(':') | ||
.collect::<Vec<&str>>(); | ||
parent = parts[0]; | ||
scope = parts[1]; | ||
name = parts[2]; | ||
} | ||
|
||
// TODO: create the systemd unit using a dbus client. | ||
|
||
Ok(SystemDCGroupManager { | ||
root_path, | ||
cgroups_path: CgroupsPath { | ||
parent: parent.to_string(), | ||
scope: scope.to_string(), | ||
name: name.to_string(), | ||
}, | ||
}) | ||
} | ||
|
||
/// get_unit_name returns the unit (scope) name from the path provided by the user | ||
/// for example: foo:docker:bar returns in '/docker-bar.scope' | ||
fn get_unit_name(&self) -> String { | ||
// By default we create a scope unless specified explicitly. | ||
if !self.cgroups_path.name.ends_with(".slice") { | ||
return format!( | ||
"{}-{}.scope", | ||
self.cgroups_path.scope, self.cgroups_path.name | ||
); | ||
} | ||
self.cgroups_path.name.clone() | ||
} | ||
|
||
// systemd represents slice hierarchy using `-`, so we need to follow suit when | ||
// generating the path of slice. For example, 'test-a-b.slice' becomes | ||
// '/test.slice/test-a.slice/test-a-b.slice'. | ||
fn expand_slice(&self, slice: String) -> Result<PathBuf> { | ||
let suffix = ".slice"; | ||
if slice.len() <= suffix.len() || !slice.ends_with(suffix) { | ||
anyhow!("invalid slice name: {}", slice); | ||
} | ||
if slice.contains('/') { | ||
anyhow!("invalid slice name: {}", slice); | ||
} | ||
let mut path = "".to_owned(); | ||
let mut prefix = "".to_owned(); | ||
let slice_name = slice.trim_end_matches(suffix); | ||
// if input was -.slice, we should just return root now | ||
if slice_name == "-" { | ||
return Ok(Path::new("/").to_path_buf()); | ||
} | ||
for component in slice_name.split('-') { | ||
if component.is_empty() { | ||
anyhow!("Invalid slice name: {}", slice); | ||
} | ||
// Append the component to the path and to the prefix. | ||
path = format!("{}/{}{}{}", path, prefix, component, suffix); | ||
prefix = format!("{}{}-", prefix, component); | ||
} | ||
Ok(Path::new(&path).to_path_buf()) | ||
} | ||
|
||
// get_cgroups_path generates a cgroups path from the one provided by the user via cgroupsPath. | ||
// an example of the final path in rootless: | ||
// "/sys/fs/cgroup/user.slice/user-1001.slice/user@1001.service/user.slice/libpod-132ff0d72245e6f13a3bbc6cdc5376886897b60ac59eaa8dea1df7ab959cbf1c.scope" | ||
fn get_cgroups_path(&self) -> Result<PathBuf> { | ||
// the root slice is under 'machine.slice'. | ||
let mut slice = Path::new("/machine.slice").to_path_buf(); | ||
// if the user provided a '.slice' (as in a branch of a tree) | ||
// we need to "unpack it". | ||
if !self.cgroups_path.parent.is_empty() { | ||
slice = self.expand_slice(self.cgroups_path.parent.clone())?; | ||
} | ||
let unit_name = self.get_unit_name(); | ||
let cgroups_path = slice.join(unit_name); | ||
// an example of the final path: | ||
// "/sys/fs/cgroup/system.slice/user-1001.slice/user@1001.service/user.slice/libpod-132ff0d72245e6f13a3bbc6cdc5376886897b60ac59eaa8dea1df7ab959cbf1c.scope" | ||
let full_path = self.root_path.join_absolute_path(&cgroups_path)?; | ||
Ok(full_path) | ||
} | ||
|
||
/// create_unified_cgroup verifies sure that *each level* in the downward path from the root cgroup | ||
/// down to the cgroup_path provided by the user is a valid cgroup hierarchy, | ||
/// containing the attached controllers and that it contains the container pid. | ||
fn create_unified_cgroup(&self, pid: Pid) -> Result<PathBuf> { | ||
let cgroups_path = self.get_cgroups_path()?; | ||
let controllers: Vec<String> = self | ||
.get_available_controllers(common::DEFAULT_CGROUP_ROOT)? | ||
.into_iter() | ||
.map(|c| format!("{}{}", "+", c.to_string())) | ||
.collect(); | ||
|
||
// Write the controllers to the root_path. | ||
Self::write_controllers(&self.root_path, &controllers)?; | ||
|
||
let mut current_path = self.root_path.clone(); | ||
let mut components = cgroups_path.components().skip(1).peekable(); | ||
// Verify that *each level* in the downward path from the root cgroup | ||
// down to the cgroup_path provided by the user is a valid cgroup hierarchy. | ||
// containing the attached controllers. | ||
while let Some(component) = components.next() { | ||
current_path = current_path.join(component); | ||
if !current_path.exists() { | ||
fs::create_dir(¤t_path)?; | ||
fs::metadata(¤t_path)?.permissions().set_mode(0o755); | ||
} | ||
|
||
// last component cannot have subtree_control enabled due to internal process constraint | ||
// if this were set, writing to the cgroups.procs file will fail with Erno 16 (device or resource busy) | ||
if components.peek().is_some() { | ||
Self::write_controllers(¤t_path, &controllers)?; | ||
} | ||
} | ||
|
||
write_cgroup_file(cgroups_path.join(CGROUP_PROCS), &pid.to_string())?; | ||
Ok(cgroups_path) | ||
} | ||
|
||
fn get_available_controllers<P: AsRef<Path>>( | ||
&self, | ||
cgroups_path: P, | ||
) -> Result<Vec<ControllerType>> { | ||
let controllers_path = self.root_path.join(cgroups_path).join(CGROUP_CONTROLLERS); | ||
if !controllers_path.exists() { | ||
bail!( | ||
"cannot get available controllers. {:?} does not exist", | ||
controllers_path | ||
) | ||
} | ||
|
||
let mut controllers = Vec::new(); | ||
for controller in fs::read_to_string(&controllers_path)?.split_whitespace() { | ||
match controller { | ||
"cpu" => controllers.push(ControllerType::Cpu), | ||
"io" => controllers.push(ControllerType::Io), | ||
"memory" => controllers.push(ControllerType::Memory), | ||
"pids" => controllers.push(ControllerType::Pids), | ||
_ => continue, | ||
} | ||
} | ||
|
||
Ok(controllers) | ||
} | ||
|
||
fn write_controllers(path: &Path, controllers: &Vec<String>) -> Result<()> { | ||
for controller in controllers { | ||
common::write_cgroup_file_str(path.join(CGROUP_SUBTREE_CONTROL), controller)?; | ||
} | ||
|
||
Ok(()) | ||
} | ||
} | ||
|
||
impl CgroupManager for SystemDCGroupManager { | ||
fn apply(&self, linux_resources: &LinuxResources, pid: Pid) -> Result<()> { | ||
// Dont attach any pid to the cgroup if -1 is specified as a pid | ||
if pid.as_raw() == -1 { | ||
return Ok(()); | ||
} | ||
let full_cgroup_path = self.create_unified_cgroup(pid)?; | ||
|
||
for controller in CONTROLLER_TYPES { | ||
match controller { | ||
ControllerType::Cpu => Cpu::apply(linux_resources, &full_cgroup_path)?, | ||
ControllerType::CpuSet => CpuSet::apply(linux_resources, &full_cgroup_path)?, | ||
ControllerType::HugeTlb => HugeTlb::apply(linux_resources, &&full_cgroup_path)?, | ||
ControllerType::Io => Io::apply(linux_resources, &&full_cgroup_path)?, | ||
ControllerType::Memory => Memory::apply(linux_resources, &full_cgroup_path)?, | ||
ControllerType::Pids => Pids::apply(linux_resources, &&full_cgroup_path)?, | ||
} | ||
} | ||
|
||
Ok(()) | ||
} | ||
|
||
fn remove(&self) -> Result<()> { | ||
Ok(()) | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
use super::*; | ||
|
||
#[test] | ||
fn expand_slice_works() -> Result<()> { | ||
let manager = SystemDCGroupManager::new( | ||
Path::new("/sys/fs/cgroup").to_path_buf(), | ||
Path::new("test-a-b.slice:docker:foo").to_path_buf(), | ||
)?; | ||
|
||
assert_eq!( | ||
manager.expand_slice("test-a-b.slice".to_string())?, | ||
Path::new("/test.slice/test-a.slice/test-a-b.slice").to_path_buf(), | ||
); | ||
|
||
Ok(()) | ||
} | ||
|
||
#[test] | ||
fn get_cgroups_path_works_with_a_complex_slice() -> Result<()> { | ||
let manager = SystemDCGroupManager::new( | ||
Path::new("/sys/fs/cgroup").to_path_buf(), | ||
Path::new("test-a-b.slice:docker:foo").to_path_buf(), | ||
)?; | ||
|
||
assert_eq!( | ||
manager.get_cgroups_path()?, | ||
Path::new("/sys/fs/cgroup/test.slice/test-a.slice/test-a-b.slice/docker-foo.scope") | ||
.to_path_buf(), | ||
); | ||
|
||
Ok(()) | ||
} | ||
|
||
#[test] | ||
fn get_cgroups_path_works_with_a_simple_slice() -> Result<()> { | ||
let manager = SystemDCGroupManager::new( | ||
Path::new("/sys/fs/cgroup").to_path_buf(), | ||
Path::new("machine.slice:libpod:foo").to_path_buf(), | ||
)?; | ||
|
||
assert_eq!( | ||
manager.get_cgroups_path()?, | ||
Path::new("/sys/fs/cgroup/machine.slice/libpod-foo.scope").to_path_buf(), | ||
); | ||
|
||
Ok(()) | ||
} | ||
|
||
#[test] | ||
fn get_cgroups_path_works_with_scope() -> Result<()> { | ||
let manager = SystemDCGroupManager::new( | ||
Path::new("/sys/fs/cgroup").to_path_buf(), | ||
Path::new(":docker:foo").to_path_buf(), | ||
)?; | ||
|
||
assert_eq!( | ||
manager.get_cgroups_path()?, | ||
Path::new("/sys/fs/cgroup/machine.slice/docker-foo.scope").to_path_buf(), | ||
); | ||
|
||
Ok(()) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
use anyhow::Result; | ||
use dbus::blocking::Connection; | ||
use std::time::Duration; | ||
use std::vec::Vec; | ||
|
||
/// Client is a wrapper providing higher level API and abatraction around dbus. | ||
/// For more information see https://www.freedesktop.org/wiki/Software/systemd/dbus/ | ||
pub struct Client { | ||
conn: Connection, | ||
} | ||
|
||
impl Client { | ||
pub fn new() -> Result<Self> { | ||
let conn = Connection::new_session()?; | ||
Ok(Client { conn }) | ||
} | ||
|
||
/// start_unit starts a specific unit under systemd. See https://www.freedesktop.org/wiki/Software/systemd/dbus | ||
/// for more details. | ||
pub fn start_unit(&self, unit_name: &str, _properties: Vec<&str>) -> Result<()> { | ||
let proxy = self.conn.with_proxy( | ||
"org.freedesktop.systemd1.Manager", | ||
"/", | ||
Duration::from_millis(5000), | ||
); | ||
let (_job_id,): (i32,) = proxy.method_call( | ||
"org.freedesktop.systemd1.Manager", | ||
"StartTransientUnit", | ||
(unit_name, "replace"), | ||
)?; | ||
Ok(()) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
mod client; | ||
pub use client::Client; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters