diff --git a/control-plane/agents/src/bin/core/controller/scheduling/mod.rs b/control-plane/agents/src/bin/core/controller/scheduling/mod.rs index fddb31e73..f605cc25e 100644 --- a/control-plane/agents/src/bin/core/controller/scheduling/mod.rs +++ b/control-plane/agents/src/bin/core/controller/scheduling/mod.rs @@ -8,7 +8,7 @@ mod volume_policy; use crate::controller::scheduling::{ nexus::{GetPersistedNexusChildrenCtx, GetSuitableNodesContext}, resources::{ChildItem, NodeItem, PoolItem, ReplicaItem}, - volume::{GetSuitablePoolsContext, VolumeReplicasForNexusCtx}, + volume::{GetSuitablePoolsContext, ReplicaResizePoolsContext, VolumeReplicasForNexusCtx}, }; use std::{cmp::Ordering, collections::HashMap, future::Future}; use weighted_scoring::{Criteria, Value, ValueGrading, WeightedScore}; @@ -319,6 +319,14 @@ impl ReplicaFilters { item.state().online() } + /// Should only try to resize online replicas + pub(crate) fn online_for_resize( + _request: &ReplicaResizePoolsContext, + item: &ChildItem, + ) -> bool { + item.state().online() + } + /// Should only allow children with corresponding replicas with enough size pub(crate) fn size(request: &GetPersistedNexusChildrenCtx, item: &ChildItem) -> bool { match request.vol_spec() { diff --git a/control-plane/agents/src/bin/core/controller/scheduling/resources/mod.rs b/control-plane/agents/src/bin/core/controller/scheduling/resources/mod.rs index 7869f2a57..93a676655 100644 --- a/control-plane/agents/src/bin/core/controller/scheduling/resources/mod.rs +++ b/control-plane/agents/src/bin/core/controller/scheduling/resources/mod.rs @@ -9,6 +9,7 @@ use stor_port::types::v0::{ nexus_persistence::{ChildInfo, NexusInfo}, replica::ReplicaSpec, snapshots::replica::ReplicaSnapshot, + volume::VolumeSpec, }, transport::{Child, ChildUri, NodeId, PoolId, Replica}, }; @@ -101,6 +102,38 @@ impl PoolItemLister { None => vec![], } } + /// Get a list of replicas wrapped as ChildItem, for resize. + pub(crate) async fn list_for_resize(registry: &Registry, spec: &VolumeSpec) -> Vec { + let replicas = registry.specs().volume_replicas(&spec.uuid); + let mut state_replicas = Vec::with_capacity(replicas.len()); + for replica in &replicas { + if let Ok(replica) = registry.replica(replica.uuid()).await { + state_replicas.push(replica); + } + } + let pool_wrappers = registry.pool_wrappers().await; + + replicas + .iter() + .filter_map(|replica_spec| { + let replica_spec = replica_spec.lock().clone(); + let replica_state = state_replicas + .iter() + .find(|state| state.uuid == replica_spec.uuid); + + let pool_id = replica_spec.pool.pool_name(); + pool_wrappers + .iter() + .find(|p| &p.id == pool_id) + .and_then(|pool| { + replica_state.map(|replica_state| { + ChildItem::new(&replica_spec, replica_state, None, pool, None) + }) + }) + }) + .collect() + } + /// Get a list of pool items to create a snapshot clone on. /// todo: support multi-replica snapshot and clone. pub(crate) async fn list_for_clones( diff --git a/control-plane/agents/src/bin/core/controller/scheduling/volume.rs b/control-plane/agents/src/bin/core/controller/scheduling/volume.rs index f837830f3..a671effa1 100644 --- a/control-plane/agents/src/bin/core/controller/scheduling/volume.rs +++ b/control-plane/agents/src/bin/core/controller/scheduling/volume.rs @@ -8,6 +8,7 @@ use crate::controller::{ volume_policy::{SimplePolicy, ThickPolicy}, AddReplicaFilters, AddReplicaSorters, ChildSorters, ResourceData, ResourceFilter, }, + wrapper::PoolWrapper, }; use agents::errors::SvcError; use std::{collections::HashMap, ops::Deref}; @@ -95,6 +96,11 @@ impl GetSuitablePoolsContext { pub fn as_thin(&self) -> bool { self.spec.as_thin() || self.snap_repl() } + /// Helper util for overcommit checks. + pub(crate) fn overcommit(&self, allowed_commit_percent: u64, pool: &PoolWrapper) -> bool { + let max_cap_allowed = allowed_commit_percent * pool.capacity; + (self.size + pool.commitment()) * 100 < max_cap_allowed + } } impl Deref for GetSuitablePoolsContext { @@ -738,3 +744,98 @@ impl ResourceFilter for CloneVolumeSnapshot { self.data.list } } + +/// The context to check pool capacity for volume replica resize feasibility. +#[derive(Clone)] +pub(crate) struct ReplicaResizePoolsContext { + registry: Registry, + spec: VolumeSpec, + allocated_bytes: Option, + required_capacity: u64, +} + +impl ReplicaResizePoolsContext { + /// The additional capacity that we need. + pub(crate) fn required_capacity(&self) -> u64 { + self.required_capacity + } + + /// Spec for the volume undergoing resize. + pub(crate) fn spec(&self) -> &VolumeSpec { + &self.spec + } + + /// Get the currently allocated bytes (per replica). + pub(crate) fn allocated_bytes(&self) -> &Option { + &self.allocated_bytes + } + + /// Helper util for overcommit checks. + pub(crate) fn overcommit(&self, allowed_commit_percent: u64, pool: &PoolWrapper) -> bool { + let max_cap_allowed = allowed_commit_percent * pool.capacity; + (self.required_capacity + pool.commitment()) * 100 < max_cap_allowed + } +} + +/// Resize the replicas of a volume. +pub(crate) struct ResizeVolumeReplicas { + data: ResourceData, +} + +impl ResizeVolumeReplicas { + async fn builder(registry: &Registry, spec: &VolumeSpec, required_capacity: u64) -> Self { + // Reuse the method from AddVolumeReplica even though name doesn't indicate the exact + // purpose. + let allocated_bytes = AddVolumeReplica::allocated_bytes(registry, spec).await; + Self { + data: ResourceData::new( + ReplicaResizePoolsContext { + registry: registry.clone(), + spec: spec.clone(), + allocated_bytes, + required_capacity, + }, + PoolItemLister::list_for_resize(registry, spec).await, + ), + } + } + + fn with_default_policy(self) -> Self { + match self.data.context.spec.as_thin() { + true => self.with_simple_policy(), + false => self.with_thick_policy(), + } + } + fn with_thick_policy(self) -> Self { + self.policy(ThickPolicy::new()) + } + fn with_simple_policy(self) -> Self { + let simple = SimplePolicy::new(&self.data.context().registry); + self.policy(simple) + } + + /// Default rules for replica filtering when resizing replicas for a volume. + pub(crate) async fn builder_with_defaults( + registry: &Registry, + spec: &VolumeSpec, + req_capacity: u64, + ) -> Self { + Self::builder(registry, spec, req_capacity) + .await + .with_default_policy() + } +} + +#[async_trait::async_trait(?Send)] +impl ResourceFilter for ResizeVolumeReplicas { + type Request = ReplicaResizePoolsContext; + type Item = ChildItem; + + fn data(&mut self) -> &mut ResourceData { + &mut self.data + } + + fn collect(self) -> Vec { + self.data.list + } +} diff --git a/control-plane/agents/src/bin/core/controller/scheduling/volume_policy/mod.rs b/control-plane/agents/src/bin/core/controller/scheduling/volume_policy/mod.rs index 65bbe1b53..0ecefa345 100644 --- a/control-plane/agents/src/bin/core/controller/scheduling/volume_policy/mod.rs +++ b/control-plane/agents/src/bin/core/controller/scheduling/volume_policy/mod.rs @@ -1,4 +1,4 @@ -use super::ResourceFilter; +use super::{volume::ResizeVolumeReplicas, ReplicaFilters, ResourceFilter}; use crate::controller::scheduling::{ volume::{AddVolumeReplica, CloneVolumeSnapshot, SnapshotVolumeReplica}, NodeFilters, @@ -59,4 +59,9 @@ impl DefaultBasePolicy { .filter(pool::PoolBaseFilters::capacity) .filter(pool::PoolBaseFilters::min_free_space) } + fn filter_resize(request: ResizeVolumeReplicas) -> ResizeVolumeReplicas { + request + .filter(ReplicaFilters::online_for_resize) + .filter(pool::PoolBaseFilters::min_free_space_repl_resize) + } } diff --git a/control-plane/agents/src/bin/core/controller/scheduling/volume_policy/pool.rs b/control-plane/agents/src/bin/core/controller/scheduling/volume_policy/pool.rs index 1cb521a17..1b9c85471 100644 --- a/control-plane/agents/src/bin/core/controller/scheduling/volume_policy/pool.rs +++ b/control-plane/agents/src/bin/core/controller/scheduling/volume_policy/pool.rs @@ -1,4 +1,7 @@ -use crate::controller::scheduling::{resources::PoolItem, volume::GetSuitablePoolsContext}; +use crate::controller::scheduling::{ + resources::{ChildItem, PoolItem}, + volume::{GetSuitablePoolsContext, ReplicaResizePoolsContext}, +}; use std::collections::HashMap; use stor_port::types::v0::transport::{PoolStatus, PoolTopology}; @@ -20,10 +23,19 @@ impl PoolBaseFilters { allowed_commit_percent: u64, ) -> bool { match request.as_thin() { - true => { - let max_cap_allowed = allowed_commit_percent * item.pool().capacity; - (request.size + item.pool().commitment()) * 100 < max_cap_allowed - } + true => request.overcommit(allowed_commit_percent, item.pool()), + false => true, + } + } + /// Should only attempt to use pools with capacity bigger than the requested size + /// for replica expand. + pub(crate) fn overcommit_repl_resize( + request: &ReplicaResizePoolsContext, + item: &ChildItem, + allowed_commit_percent: u64, + ) -> bool { + match request.spec().as_thin() { + true => request.overcommit(allowed_commit_percent, item.pool()), false => true, } } @@ -34,6 +46,17 @@ impl PoolBaseFilters { false => item.pool.free_space() > request.size, } } + /// Return true if the pool has enough capacity to resize the replica by the requested + /// value. + pub(crate) fn min_free_space_repl_resize( + request: &ReplicaResizePoolsContext, + item: &ChildItem, + ) -> bool { + match request.spec().as_thin() { + true => item.pool().free_space() > Self::free_space_watermark(), + false => item.pool().free_space() > request.required_capacity(), + } + } /// Should only attempt to use pools with sufficient free space for a full rebuild. /// Currently the data-plane fully rebuilds a volume, meaning a thin provisioned volume /// becomes fully allocated. diff --git a/control-plane/agents/src/bin/core/controller/scheduling/volume_policy/simple.rs b/control-plane/agents/src/bin/core/controller/scheduling/volume_policy/simple.rs index 000b1b874..a3370026b 100644 --- a/control-plane/agents/src/bin/core/controller/scheduling/volume_policy/simple.rs +++ b/control-plane/agents/src/bin/core/controller/scheduling/volume_policy/simple.rs @@ -2,10 +2,10 @@ use crate::{ controller::{ registry::Registry, scheduling::{ - resources::PoolItem, + resources::{ChildItem, PoolItem}, volume::{ AddVolumeReplica, CloneVolumeSnapshot, GetSuitablePoolsContext, - SnapshotVolumeReplica, + ReplicaResizePoolsContext, ResizeVolumeReplicas, SnapshotVolumeReplica, }, volume_policy::{affinity_group, pool::PoolBaseFilters, DefaultBasePolicy}, ResourceFilter, ResourcePolicy, SortBuilder, SortCriteria, @@ -69,6 +69,15 @@ impl ResourcePolicy for SimplePolicy { } } +#[async_trait::async_trait(?Send)] +impl ResourcePolicy for SimplePolicy { + fn apply(self, to: ResizeVolumeReplicas) -> ResizeVolumeReplicas { + DefaultBasePolicy::filter_resize(to) + .filter_param(&self, SimplePolicy::min_free_space_repl_resize) + .filter_param(&self, SimplePolicy::pool_overcommit_repl_resize) + } +} + const TOTAL_REPLICA_COUNT_WEIGHT: Ranged = Ranged::new_const(25); const FREE_SPACE_WEIGHT: Ranged = Ranged::new_const(40); const OVER_COMMIT_WEIGHT: Ranged = Ranged::new_const(35); @@ -149,6 +158,26 @@ impl SimplePolicy { } } + /// Helper to figure out space availability based on pool free, pool available and + /// volume's allocated space. + fn min_free_space_util(&self, free: u64, allocated: &Option, required: u64) -> bool { + free > match allocated { + Some(bytes) => { + let size = if bytes == &0 { + self.cli_args.volume_commitment_initial * required + } else { + self.cli_args.volume_commitment * required + } / 100; + (bytes + size).min(required) + } + None => { + // We really have no clue for some reason.. should not happen but just in case + // let's be conservative? + (self.no_state_min_free_space_percent * required) / 100 + } + } + } + /// Minimum free space is the currently allocated usage plus some percentage of volume size /// slack. fn min_free_space(&self, request: &GetSuitablePoolsContext, item: &PoolItem) -> bool { @@ -160,26 +189,45 @@ impl SimplePolicy { (self.cli_args.snapshot_commitment * request.size) / 100 }; } - item.pool.free_space() - > match request.allocated_bytes() { - Some(bytes) => { - let size = if bytes == &0 { - self.cli_args.volume_commitment_initial * request.size - } else { - self.cli_args.volume_commitment * request.size - } / 100; - (bytes + size).min(request.size) - } - None => { - // We really have no clue for some reason.. should not happen but just in case - // let's be conservative? - (self.no_state_min_free_space_percent * request.size) / 100 - } - } + + self.min_free_space_util( + item.pool.free_space(), + request.allocated_bytes(), + request.size, + ) + } + + /// Checks during replica resize. Minimum free space is the currently allocated usage plus some + /// percentage of volume size slack. + /// TODO: Combine min_free_space_repl_resize and min_free_space for code reuse using generic + /// types. + fn min_free_space_repl_resize( + &self, + request: &ReplicaResizePoolsContext, + item: &ChildItem, + ) -> bool { + if !request.spec().as_thin() { + return item.pool().free_space() > request.required_capacity(); + } + + self.min_free_space_util( + item.pool().free_space(), + request.allocated_bytes(), + request.required_capacity(), + ) } + fn pool_overcommit(&self, request: &GetSuitablePoolsContext, item: &PoolItem) -> bool { PoolBaseFilters::overcommit(request, item, self.cli_args.pool_commitment) } + + fn pool_overcommit_repl_resize( + &self, + request: &ReplicaResizePoolsContext, + item: &ChildItem, + ) -> bool { + PoolBaseFilters::overcommit_repl_resize(request, item, self.cli_args.pool_commitment) + } } #[cfg(test)] diff --git a/control-plane/agents/src/bin/core/controller/scheduling/volume_policy/thick.rs b/control-plane/agents/src/bin/core/controller/scheduling/volume_policy/thick.rs index d897e5c27..d4bf6dea8 100644 --- a/control-plane/agents/src/bin/core/controller/scheduling/volume_policy/thick.rs +++ b/control-plane/agents/src/bin/core/controller/scheduling/volume_policy/thick.rs @@ -1,6 +1,8 @@ use crate::controller::scheduling::{ resources::PoolItem, - volume::{AddVolumeReplica, GetSuitablePoolsContext, SnapshotVolumeReplica}, + volume::{ + AddVolumeReplica, GetSuitablePoolsContext, ResizeVolumeReplicas, SnapshotVolumeReplica, + }, volume_policy::{affinity_group, pool::PoolBaseFilters, DefaultBasePolicy}, ResourceFilter, ResourcePolicy, SortBuilder, SortCriteria, }; @@ -34,6 +36,13 @@ impl ResourcePolicy for ThickPolicy { } } +#[async_trait::async_trait(?Send)] +impl ResourcePolicy for ThickPolicy { + fn apply(self, to: ResizeVolumeReplicas) -> ResizeVolumeReplicas { + DefaultBasePolicy::filter_resize(to) + } +} + impl ThickPolicy { /// Create a new thick policy. pub(crate) fn new() -> Self { diff --git a/control-plane/agents/src/bin/core/pool/replica_operations.rs b/control-plane/agents/src/bin/core/pool/replica_operations.rs index 3dc045c3e..2df4a0084 100644 --- a/control-plane/agents/src/bin/core/pool/replica_operations.rs +++ b/control-plane/agents/src/bin/core/pool/replica_operations.rs @@ -107,10 +107,22 @@ impl ResourceResize for OperationGuardArc { async fn resize( &mut self, - _registry: &Registry, - _request: &Self::Resize, + registry: &Registry, + request: &Self::Resize, ) -> Result { - unimplemented!() + let node = registry.node_wrapper(&request.node).await?; + + let repl = registry.replica(&request.uuid).await?; + let spec_clone = self + .start_update( + registry, + &repl, + ReplicaOperation::Resize(request.requested_size), + ) + .await?; + + let result = node.resize_replica(request).await; + self.complete_update(registry, result, spec_clone).await } } diff --git a/control-plane/agents/src/bin/core/pool/specs.rs b/control-plane/agents/src/bin/core/pool/specs.rs index edcfc00e7..bf433da5c 100644 --- a/control-plane/agents/src/bin/core/pool/specs.rs +++ b/control-plane/agents/src/bin/core/pool/specs.rs @@ -149,6 +149,8 @@ impl SpecOperationsHelper for ReplicaSpec { } ReplicaOperation::Unshare => Ok(()), ReplicaOperation::OwnerUpdate(_) => Ok(()), + // TODO: Shall we do something here with input size? + ReplicaOperation::Resize(_) => Ok(()), _ => unreachable!(), }?; self.start_op(op); diff --git a/control-plane/agents/src/bin/core/tests/volume/mod.rs b/control-plane/agents/src/bin/core/tests/volume/mod.rs index 1f30a864a..4cef3983a 100644 --- a/control-plane/agents/src/bin/core/tests/volume/mod.rs +++ b/control-plane/agents/src/bin/core/tests/volume/mod.rs @@ -5,6 +5,7 @@ mod capacity; mod garbage_collection; mod helpers; mod hotspare; +mod resize; mod snapshot; mod snapshot_clone; mod switchover; diff --git a/control-plane/agents/src/bin/core/tests/volume/resize.rs b/control-plane/agents/src/bin/core/tests/volume/resize.rs new file mode 100644 index 000000000..519b3939b --- /dev/null +++ b/control-plane/agents/src/bin/core/tests/volume/resize.rs @@ -0,0 +1,181 @@ +use deployer_cluster::{Cluster, ClusterBuilder}; +use grpc::operations::{replica::traits::ReplicaOperations, volume::traits::VolumeOperations}; +use std::time::Duration; +use stor_port::types::v0::transport::{ + CreateVolume, DestroyVolume, Filter, PublishVolume, ResizeVolume, VolumeShareProtocol, +}; + +#[tokio::test] +async fn resize_unpublished() { + let cluster = ClusterBuilder::builder() + .with_rest(true) + .with_agents(vec!["core"]) + .with_io_engines(3) + .with_pool(0, "malloc:///p1?size_mb=200") + .with_pool(1, "malloc:///p1?size_mb=200") + .with_pool(2, "malloc:///p1?size_mb=200") + .with_cache_period("1s") + .with_reconcile_period(Duration::from_secs(1), Duration::from_secs(1)) + .build() + .await + .unwrap(); + + let vol_cli = cluster.grpc_client().volume(); + let repl_cli = cluster.grpc_client().replica(); + + let volume = vol_cli + .create( + &CreateVolume { + uuid: "de3cf927-80c2-47a8-adf0-95c486bdd7b7".try_into().unwrap(), + size: 50 * 1024 * 1024, + replicas: 3, + thin: false, + ..Default::default() + }, + None, + ) + .await + .unwrap(); + + // Unpublished volume + assert!(volume.spec().active_config().is_none() && volume.spec().num_replicas == 3); + + let resized_volume = vol_cli + .resize( + &ResizeVolume { + uuid: volume.uuid().clone(), + requested_size: 2 * volume.spec().size, + capacity_limit: None, + }, + None, + ) + .await + .unwrap(); + + tracing::info!("Resized {resized_volume:?}"); + assert!(resized_volume.spec().uuid == volume.spec().uuid); + assert!(resized_volume.spec().size == (2 * volume.spec().size)); + + let replicas = repl_cli + .get(Filter::Volume(volume.uuid().clone()), None) + .await + .unwrap(); + // Compare >= since replicas have some additional book-keeping space. + replicas + .into_inner() + .iter() + .for_each(|r| assert!(r.size >= resized_volume.spec().size)); + let _ = vol_cli + .destroy( + &DestroyVolume { + uuid: volume.uuid().clone(), + }, + None, + ) + .await; + + // Test that resizing a published volume throws error. + resize_published(&cluster).await; +} + +// Resizing a published volume should throw error that volume is in-use. +async fn resize_published(cluster: &Cluster) { + let vol_cli = cluster.grpc_client().volume(); + // Create and publish the volume. + let volume = vol_cli + .create( + &CreateVolume { + uuid: "df3cf927-80c2-47a8-adf0-95c486bdd7b7".try_into().unwrap(), + size: 50 * 1024 * 1024, + replicas: 1, + thin: false, + ..Default::default() + }, + None, + ) + .await + .unwrap(); + + vol_cli + .publish( + &PublishVolume { + uuid: volume.spec().uuid, + target_node: Some(cluster.node(0)), + share: Some(VolumeShareProtocol::Nvmf), + ..Default::default() + }, + None, + ) + .await + .unwrap(); + + let _ = vol_cli + .resize( + &ResizeVolume { + uuid: volume.uuid().clone(), + requested_size: 2 * volume.spec().size, + capacity_limit: None, + }, + None, + ) + .await + .expect_err("Expected error upon resize"); +} + +// Try to resize a volume. When any one of the replica can't be resized due to +// insufficient capacity on pool, the volume resize should fail and volume size +// should remain unchanged. +#[tokio::test] +async fn resize_on_no_capacity_pool() { + let cluster = ClusterBuilder::builder() + .with_rest(true) + .with_agents(vec!["core"]) + .with_io_engines(3) + .with_pool(0, "malloc:///p1?size_mb=200") + .with_pool(1, "malloc:///p1?size_mb=200") + .with_pool(2, "malloc:///p1?size_mb=100") + .with_cache_period("1s") + .with_reconcile_period(Duration::from_secs(1), Duration::from_secs(1)) + .build() + .await + .unwrap(); + + let vol_cli = cluster.grpc_client().volume(); + + let volume = vol_cli + .create( + &CreateVolume { + uuid: "de3cf927-80c2-47a8-adf0-95c486bdd7b7".try_into().unwrap(), + size: 50 * 1024 * 1024, + replicas: 3, + thin: false, + ..Default::default() + }, + None, + ) + .await + .unwrap(); + + let resized_volume = vol_cli + .resize( + &ResizeVolume { + uuid: volume.uuid().clone(), + requested_size: 2 * volume.spec().size, + capacity_limit: None, + }, + None, + ) + .await + .expect_err("Expected error due to insufficient pool capacity"); + + tracing::info!("Volume resize error: {resized_volume:?}"); + let v_arr = vol_cli + .get(Filter::Volume(volume.spec().uuid), false, None, None) + .await + .unwrap(); + let vol_obj = &v_arr.entries[0]; + // Size shouldn't have changed. + assert!(vol_obj.spec().size == volume.spec().size); + // TODO: Add reclaim monitor validations for replicas that got resized as part + // of this failed volume resize. +} diff --git a/control-plane/agents/src/bin/core/volume/operations.rs b/control-plane/agents/src/bin/core/volume/operations.rs index 76d804045..9fc7ebd12 100644 --- a/control-plane/agents/src/bin/core/volume/operations.rs +++ b/control-plane/agents/src/bin/core/volume/operations.rs @@ -20,8 +20,8 @@ use crate::{ clone_operations::SnapshotCloneOp, snapshot_operations::DestroyVolumeSnapshotRequest, specs::{ - create_volume_replicas, healthy_volume_replicas, volume_move_replica_candidates, - CreateReplicaCandidate, + create_volume_replicas, healthy_volume_replicas, resizeable_replicas, + vol_status_ok_for_resize, volume_move_replica_candidates, CreateReplicaCandidate, }, }, }; @@ -185,10 +185,35 @@ impl ResourceResize for OperationGuardArc { async fn resize( &mut self, - _registry: &Registry, - _request: &Self::Resize, + registry: &Registry, + request: &Self::Resize, ) -> Result { - unimplemented!() + let spec = self.as_ref().clone(); + let state = registry.volume_state(&request.uuid).await?; + + // Pre-checks - volume state eligible for resize. + vol_status_ok_for_resize(&spec)?; + // Pre-check - Ensure pools that host replicas have enough space to resize the replicas, + // and also ensure that the replicas are Online. + let resizeable_replicas = + resizeable_replicas(&spec, registry, request.requested_size).await?; + + let spec_clone = self + .start_update( + registry, + &state, + VolumeOperation::Resize(request.requested_size), + ) + .await?; + // Resize each replica of the volume. If any replica fails to be resized then the + // volume resize operation is deemed as a failure. + let result = self + .resize_volume_replicas(registry, &resizeable_replicas, request.requested_size) + .await; + + self.complete_update(registry, result, spec_clone).await?; + + registry.volume(&request.uuid).await } } diff --git a/control-plane/agents/src/bin/core/volume/operations_helper.rs b/control-plane/agents/src/bin/core/volume/operations_helper.rs index 25918f5f7..6b4e38969 100644 --- a/control-plane/agents/src/bin/core/volume/operations_helper.rs +++ b/control-plane/agents/src/bin/core/volume/operations_helper.rs @@ -2,7 +2,7 @@ use crate::{ controller::{ registry::Registry, resources::{ - operations::{ResourceLifecycle, ResourceOwnerUpdate}, + operations::{ResourceLifecycle, ResourceOwnerUpdate, ResourceResize}, operations_helper::{ GuardedOperationsHelper, OperationSequenceGuard, ResourceSpecsLocked, }, @@ -34,7 +34,7 @@ use stor_port::{ transport::{ CreateNexus, CreateReplica, Nexus, NexusId, NexusNvmePreemption, NexusNvmfConfig, NodeId, NvmeReservation, NvmfControllerIdRange, Protocol, Replica, ReplicaId, - ReplicaOwners, Volume, VolumeShareProtocol, VolumeState, + ReplicaOwners, ResizeReplica, Volume, VolumeShareProtocol, VolumeState, }, }, HostAccessControl, @@ -527,6 +527,33 @@ impl OperationGuardArc { Ok(created_replicas) } + /// Resize the replicas of the volume. + pub(super) async fn resize_volume_replicas( + &self, + registry: &Registry, + replicas: &Vec, + requested_size: u64, + ) -> Result<(), SvcError> { + for replica in replicas { + let mut replica_grd = registry.specs().replica(&replica.uuid).await?; + + replica_grd + .resize( + registry, + &ResizeReplica::new( + &replica.node, + replica_grd.as_ref().pool_name(), + None, + replica_grd.uuid(), + requested_size, + ), + ) + .await?; + } + + Ok(()) + } + /// Add the given replica to the target nexus of the volume. async fn attach_to_target( &self, diff --git a/control-plane/agents/src/bin/core/volume/scheduling.rs b/control-plane/agents/src/bin/core/volume/scheduling.rs index 701245ad5..ea904ffb0 100644 --- a/control-plane/agents/src/bin/core/volume/scheduling.rs +++ b/control-plane/agents/src/bin/core/volume/scheduling.rs @@ -17,7 +17,7 @@ use crate::{ use agents::errors::{NotEnough, SvcError}; use stor_port::types::v0::{ store::{nexus::NexusSpec, volume::VolumeSpec}, - transport::{NodeId, VolumeState}, + transport::{NodeId, Replica, VolumeState}, }; /// Return a list of pre sorted pools to be used by a volume. @@ -126,3 +126,20 @@ pub(crate) async fn target_node_candidate( Some(node) => Ok(node.clone()), } } + +/// List, filter and collect replicas for which are Online and the pools have +/// the required capacity for resizing the replica. +pub(crate) async fn resizeable_replicas( + volume: &VolumeSpec, + registry: &Registry, + req_capacity: u64, +) -> Vec { + let builder = + volume::ResizeVolumeReplicas::builder_with_defaults(registry, volume, req_capacity).await; + + builder + .collect() + .into_iter() + .map(|item| item.state().clone()) + .collect() +} diff --git a/control-plane/agents/src/bin/core/volume/specs.rs b/control-plane/agents/src/bin/core/volume/specs.rs index 57c4447ff..75837501a 100644 --- a/control-plane/agents/src/bin/core/volume/specs.rs +++ b/control-plane/agents/src/bin/core/volume/specs.rs @@ -6,7 +6,7 @@ use crate::{ GuardedOperationsHelper, OnCreateFail, OperationSequenceGuard, ResourceSpecs, ResourceSpecsLocked, SpecOperationsHelper, }, - OperationGuardArc, ResourceMutex, TraceSpan, TraceStrLog, + OperationGuardArc, ResourceMutex, ResourceUid, TraceSpan, TraceStrLog, }, scheduling::{ nexus::GetPersistedNexusChildren, @@ -41,7 +41,7 @@ use stor_port::{ SpecStatus, SpecTransaction, }, transport::{ - CreateReplica, CreateVolume, NodeId, PoolId, Protocol, ReplicaId, ReplicaName, + CreateReplica, CreateVolume, NodeId, PoolId, Protocol, Replica, ReplicaId, ReplicaName, ReplicaOwners, SnapshotId, VolumeId, VolumeShareProtocol, VolumeState, VolumeStatus, }, }, @@ -320,6 +320,53 @@ pub(crate) async fn healthy_volume_replicas( } } +/// Check if the volume state is ok for resize operation. +pub fn vol_status_ok_for_resize(spec: &VolumeSpec) -> Result<(), SvcError> { + // The volume should be unpublished/offline. + if spec.target().is_some() { + return Err(SvcError::InUse { + kind: ResourceKind::Volume, + id: spec.uuid_str(), + }); + } + + Ok(()) +} +/// Check if any replica is on a pool that doesn't have sufficient space for +/// resize operation. If no such replica present, it means the volume is good +/// to be resized and the returned vector will be of zero length. +pub(crate) async fn resizeable_replicas( + spec: &VolumeSpec, + registry: &Registry, + requested_size: u64, +) -> Result, SvcError> { + if spec.size >= requested_size { + return Err(SvcError::VolumeResizeArgsInvalid { + vol_id: spec.uuid_str(), + requested_size, + current_size: spec.size, + }); + } + let spec_replicas = registry.specs().volume_replicas(spec.uid()); + let resizable_replicas = + scheduling::resizeable_replicas(spec, registry, requested_size - spec.size).await; + + // All the replicas of the volume should be resizable, else we don't proceed with + // the volume resize. + if resizable_replicas.len() != spec.num_replicas as usize { + return Err(SvcError::ResizeReplError { + replica_ids: spec_replicas + .into_iter() + .filter(|sr| resizable_replicas.iter().all(|r| &r.uuid != sr.uuid())) + .map(|excl_repl| excl_repl.uuid().to_string()) + .collect(), + required: requested_size - spec.size, + }); + } + + Ok(resizable_replicas) +} + /// Implementation of the ResourceSpecs which is retrieved from the ResourceSpecsLocked. /// During these calls, no other thread can add/remove elements from the list. impl ResourceSpecs { @@ -1055,7 +1102,7 @@ impl SpecOperationsHelper for VolumeSpec { } VolumeOperation::CreateSnapshot(_) => Ok(()), VolumeOperation::DestroySnapshot(_) => Ok(()), - VolumeOperation::Resize(_) => todo!(), + VolumeOperation::Resize(_) => Ok(()), }?; self.start_op(operation); Ok(()) diff --git a/control-plane/agents/src/common/errors.rs b/control-plane/agents/src/common/errors.rs index f09f4eb41..8ea2f3505 100644 --- a/control-plane/agents/src/common/errors.rs +++ b/control-plane/agents/src/common/errors.rs @@ -139,6 +139,17 @@ pub enum SvcError { node: String, protocol: String, }, + #[snafu(display( + "Volume '{}' - resize args invalid. Current size: '{}', requested size: '{}'", + vol_id, + current_size, + requested_size + ))] + VolumeResizeArgsInvalid { + vol_id: String, + requested_size: u64, + current_size: u64, + }, #[snafu(display("Replica '{}' not found", replica_id))] ReplicaNotFound { replica_id: ReplicaId }, #[snafu(display("{} '{}' is already shared over {}", kind.to_string(), id, share))] @@ -285,6 +296,15 @@ pub enum SvcError { free_space: u64, required: u64, }, + #[snafu(display( + "Replicas {:?}, can't be resized due to required capacity ({}) or state(Online) not met.", + replica_ids, + required + ))] + ResizeReplError { + replica_ids: Vec, + required: u64, + }, #[snafu(display( "Service for request '{}' for '{}' is unimplemented with '{}'", request, @@ -814,6 +834,18 @@ impl From for ReplyError { source, extra, }, + SvcError::ResizeReplError { .. } => ReplyError { + kind: ReplyErrorKind::FailedPrecondition, + resource: ResourceKind::Replica, + source, + extra, + }, + SvcError::VolumeResizeArgsInvalid { .. } => ReplyError { + kind: ReplyErrorKind::InvalidArgument, + resource: ResourceKind::Volume, + source, + extra, + }, SvcError::Unimplemented { resource, .. } => ReplyError { kind: ReplyErrorKind::Unimplemented, resource, diff --git a/control-plane/grpc/src/operations/volume/client.rs b/control-plane/grpc/src/operations/volume/client.rs index 617aa903d..74ab10c15 100644 --- a/control-plane/grpc/src/operations/volume/client.rs +++ b/control-plane/grpc/src/operations/volume/client.rs @@ -17,9 +17,9 @@ use crate::{ volume::{ create_snapshot_reply, create_snapshot_volume_reply, create_volume_reply, get_snapshots_reply, get_snapshots_request, get_volumes_reply, get_volumes_request, - publish_volume_reply, republish_volume_reply, set_volume_replica_reply, share_volume_reply, - unpublish_volume_reply, volume_grpc_client::VolumeGrpcClient, GetSnapshotsRequest, - GetVolumesRequest, ProbeRequest, + publish_volume_reply, republish_volume_reply, resize_volume_reply, + set_volume_replica_reply, share_volume_reply, unpublish_volume_reply, + volume_grpc_client::VolumeGrpcClient, GetSnapshotsRequest, GetVolumesRequest, ProbeRequest, }, }; @@ -120,6 +120,22 @@ impl VolumeOperations for VolumeClient { } } + async fn resize( + &self, + request: &dyn ResizeVolumeInfo, + ctx: Option, + ) -> Result { + let req = self.request(request, ctx, MessageIdVs::ResizeVolume); + let response = self.client().resize_volume(req).await?.into_inner(); + match response.reply { + Some(resize_volume_reply) => match resize_volume_reply { + resize_volume_reply::Reply::Volume(vol) => Ok(Volume::try_from(vol)?), + resize_volume_reply::Reply::Error(err) => Err(err.into()), + }, + None => Err(ReplyError::invalid_response(ResourceKind::Volume)), + } + } + #[tracing::instrument(name = "VolumeClient::share", level = "debug", skip(self), err)] async fn share( &self, @@ -354,12 +370,4 @@ impl VolumeOperations for VolumeClient { None => Err(ReplyError::invalid_response(ResourceKind::Volume)), } } - - async fn resize( - &self, - _req: &dyn ResizeVolumeInfo, - _ctx: Option, - ) -> Result { - unimplemented!() - } } diff --git a/control-plane/grpc/src/operations/volume/traits.rs b/control-plane/grpc/src/operations/volume/traits.rs index 5f23b8986..9b431e390 100644 --- a/control-plane/grpc/src/operations/volume/traits.rs +++ b/control-plane/grpc/src/operations/volume/traits.rs @@ -1148,6 +1148,16 @@ impl From<&dyn ResizeVolumeInfo> for ResizeVolume { } } +impl From<&dyn ResizeVolumeInfo> for ResizeVolumeRequest { + fn from(data: &dyn ResizeVolumeInfo) -> Self { + Self { + uuid: data.uuid().to_string(), + requested_size: data.req_size(), + capacity_limit: data.capacity_limit(), + } + } +} + impl ResizeVolumeInfo for ValidatedResizeVolumeRequest { fn uuid(&self) -> VolumeId { self.uuid.clone() diff --git a/control-plane/rest/openapi-specs/v0_api_spec.yaml b/control-plane/rest/openapi-specs/v0_api_spec.yaml index 36b0abb3d..fada405f7 100644 --- a/control-plane/rest/openapi-specs/v0_api_spec.yaml +++ b/control-plane/rest/openapi-specs/v0_api_spec.yaml @@ -3199,6 +3199,7 @@ components: - Unpublish - CreateSnapshot - DestroySnapshot + - Resize result: description: Result of the operation type: boolean diff --git a/control-plane/stor-port/src/types/v0/store/replica.rs b/control-plane/stor-port/src/types/v0/store/replica.rs index f90056875..bcb9fca96 100644 --- a/control-plane/stor-port/src/types/v0/store/replica.rs +++ b/control-plane/stor-port/src/types/v0/store/replica.rs @@ -272,7 +272,9 @@ impl SpecTransaction for ReplicaSpec { ReplicaOperation::OwnerUpdate(owners) => { self.owners = owners; } - ReplicaOperation::Resize(_) => todo!(), + ReplicaOperation::Resize(size) => { + self.size = size; + } } } self.clear_op(); @@ -306,7 +308,7 @@ impl SpecTransaction for ReplicaSpec { ReplicaOperation::Share(_, _) => (true, true), ReplicaOperation::Unshare => (true, true), ReplicaOperation::OwnerUpdate(_) => (false, true), - ReplicaOperation::Resize(_) => todo!(), + ReplicaOperation::Resize(_) => (true, true), } } diff --git a/control-plane/stor-port/src/types/v0/store/volume.rs b/control-plane/stor-port/src/types/v0/store/volume.rs index 00e6f1df6..85866d8c1 100644 --- a/control-plane/stor-port/src/types/v0/store/volume.rs +++ b/control-plane/stor-port/src/types/v0/store/volume.rs @@ -502,7 +502,9 @@ impl SpecTransaction for VolumeSpec { VolumeOperation::DestroySnapshot(snapshot) => { self.metadata.runtime.snapshots.remove(&snapshot); } - VolumeOperation::Resize(_) => todo!(), + VolumeOperation::Resize(size) => { + self.size = size; + } } } self.clear_op(); diff --git a/control-plane/stor-port/src/types/v0/transport/mod.rs b/control-plane/stor-port/src/types/v0/transport/mod.rs index 7dff2d009..f414e91c1 100644 --- a/control-plane/stor-port/src/types/v0/transport/mod.rs +++ b/control-plane/stor-port/src/types/v0/transport/mod.rs @@ -123,6 +123,8 @@ pub enum MessageIdVs { CreateVolume, /// Delete Volume. DestroyVolume, + /// Resize Volume. + ResizeVolume, /// Publish Volume. PublishVolume, /// Republish Volume. diff --git a/control-plane/stor-port/src/types/v0/transport/replica.rs b/control-plane/stor-port/src/types/v0/transport/replica.rs index 0006790fe..1968904f6 100644 --- a/control-plane/stor-port/src/types/v0/transport/replica.rs +++ b/control-plane/stor-port/src/types/v0/transport/replica.rs @@ -619,7 +619,7 @@ impl ResizeReplica { pub fn new( node: &NodeId, pool_id: &PoolId, - name: &ReplicaName, + name: Option<&ReplicaName>, uuid: &ReplicaId, requested_size: u64, ) -> Self { @@ -627,7 +627,7 @@ impl ResizeReplica { node: node.clone(), pool_id: pool_id.clone(), uuid: uuid.clone(), - name: name.clone().into(), + name: name.cloned(), requested_size, } } diff --git a/control-plane/stor-port/src/types/v0/transport/volume.rs b/control-plane/stor-port/src/types/v0/transport/volume.rs index 090235e85..a4b589ace 100644 --- a/control-plane/stor-port/src/types/v0/transport/volume.rs +++ b/control-plane/stor-port/src/types/v0/transport/volume.rs @@ -457,7 +457,7 @@ pub struct CreateVolume { pub affinity_group: Option, } -/// Create volume request. +/// Resize volume request. #[derive(Serialize, Deserialize, Default, Debug, Clone, PartialEq)] #[serde(rename_all = "camelCase")] pub struct ResizeVolume {