diff --git a/Cargo.lock b/Cargo.lock index aa30723c7..cad138616 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -28,17 +28,6 @@ dependencies = [ "version_check", ] -[[package]] -name = "ahash" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c99f64d1e06488f620f932677e24bc6e2897582980441ae90a671415bd7ec2f" -dependencies = [ - "cfg-if", - "once_cell", - "version_check", -] - [[package]] name = "aho-corasick" version = "0.7.20" @@ -1495,15 +1484,6 @@ version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" -[[package]] -name = "hashbrown" -version = "0.13.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e" -dependencies = [ - "ahash 0.8.3", -] - [[package]] name = "hashbrown" version = "0.14.0" @@ -1765,16 +1745,6 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "lasso" -version = "0.7.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4644821e1c3d7a560fe13d842d13f587c07348a1a05d3a797152d41c90c56df2" -dependencies = [ - "dashmap", - "hashbrown 0.13.2", -] - [[package]] name = "lazy_static" version = "1.4.0" @@ -1925,7 +1895,7 @@ version = "0.20.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7b9b8653cec6897f73b519a43fba5ee3d50f62fe9af80b428accdcc093b4a849" dependencies = [ - "ahash 0.7.6", + "ahash", "metrics-macros", "portable-atomic 0.3.20", ] @@ -1992,7 +1962,6 @@ dependencies = [ "fuser", "futures", "hdrhistogram", - "lasso", "lazy_static", "libc", "metrics", diff --git a/doc/CONFIGURATION.md b/doc/CONFIGURATION.md index f2a708095..882b411a9 100644 --- a/doc/CONFIGURATION.md +++ b/doc/CONFIGURATION.md @@ -18,7 +18,7 @@ Mountpoint uses the same [credentials configuration options](https://docs.aws.am We recommend you use short-term AWS credentials whenever possible. Mountpoint supports several options for short-term AWS credentials: * When running Mountpoint on an Amazon EC2 instance, you can [associate an IAM role with your instance](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/iam-roles-for-amazon-ec2.html) using an instance profile, and Mountpoint will automatically assume that IAM role. -* When running Mountpoint in an Amazon ECS task, you can similarly [associate an IAM role with the task](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-iam-roles.html) for Mountpoint to automatically assume. +* When running Mountpoint in an Amazon ECS task, you can similarly [associate an IAM role with the task](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-iam-roles.html) for Mountpoint to automatically assume. * Otherwise, you can [acquire temporary AWS credentials for an IAM role](https://docs.aws.amazon.com/cli/latest/userguide/cli-authentication-short-term.html) from the AWS Console or with the `aws sts assume-role` AWS CLI command, and store them in the `~/.aws/credentials` file. If you need to use long-term AWS credentials, you can [store them in the configuration and credentials files](https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-files.html) in `~/.aws`, or [specify them with environment variables](https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-envvars.html) (`AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`). @@ -164,7 +164,7 @@ Amazon S3 offers a [range of storage classes](https://aws.amazon.com/s3/storage- For the full list of possible storage classes, see the [PutObject documentation](https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutObject.html#AmazonS3-PutObject-request-header-StorageClass) in the Amazon S3 User Guide. -Mountpoint supports reading existing objects from your S3 bucket when they are stored in any instant-retrieval storage class. You cannot use Mountpoint to read objects stored in the S3 Glacier Flexible Retrieval or S3 Glacier Deep Archive storage classes, or the Archive Access or Deep Archive Access tiers of S3 Intelligent-Tiering. This limitation exists even if you have restored the object. However, you can still use Mountpoint to write new objects into these storage classes or S3 Intelligent-Tiering. +Mountpoint supports reading existing objects from your S3 bucket when they are stored in any instant-retrieval storage class. You cannot use Mountpoint to read objects stored in the S3 Glacier Flexible Retrieval or S3 Glacier Deep Archive storage classes, or the Archive Access or Deep Archive Access tiers of S3 Intelligent-Tiering, unless they've been [restored](https://docs.aws.amazon.com/AmazonS3/latest/userguide/restoring-objects.html). You can use Mountpoint to write new objects into these storage classes or S3 Intelligent-Tiering. ### File and directory permissions diff --git a/doc/SEMANTICS.md b/doc/SEMANTICS.md index a7a57b291..9b1232830 100644 --- a/doc/SEMANTICS.md +++ b/doc/SEMANTICS.md @@ -19,7 +19,7 @@ By default, Mountpoint does not allow deleting existing objects with commands li You cannot rename an existing file using Mountpoint. -Objects in the S3 Glacier Flexible Retrieval and S3 Glacier Deep Archive storage classes, and the Archive Access and Deep Archive Access tiers of S3 Intelligent-Tiering, are not accessible with Mountpoint even if they have been restored. To access these objects with Mountpoint, copy them to another storage class first. +Objects in the S3 Glacier Flexible Retrieval and S3 Glacier Deep Archive storage classes, and the Archive Access and Deep Archive Access tiers of S3 Intelligent-Tiering, are only accessible with Mountpoint if they have been restored. To access these objects with Mountpoint, [restore](https://docs.aws.amazon.com/AmazonS3/latest/userguide/restoring-objects.html) them first. ## Directories @@ -108,13 +108,13 @@ S3 places fewer restrictions on [valid object keys](https://docs.aws.amazon.com/ * `blue/` * `blue/image.jpg` * `red/` - + then mounting your bucket would give a file system with a `blue` directory containing an `image.jpg` file, and an empty `red` directory. The `blue/` and `red/` objects will not be accessible. Note that the S3 Console creates zero-byte objects like `blue/` and `red/` when creating directories in a bucket, and so these directories will work as expected. * Files will be shadowed by directories with the same name. For example, if your bucket has the following object keys: * `blue` * `blue/image.jpg` - + then mounting your bucket would give a file system with a `blue` directory, containing the file `image.jpg`. The `blue` object will not be accessible. Deleting the key `blue/image.jpg` will remove the `blue` directory, and cause the `blue` file to become visible. We test Mountpoint against these restrictions using a [reference model](https://github.com/awslabs/mountpoint-s3/blob/main/mountpoint-s3/tests/reftests/reference.rs) that programmatically encodes the expected mapping between S3 objects and file system structure. diff --git a/mountpoint-s3-client/src/mock_client.rs b/mountpoint-s3-client/src/mock_client.rs index 94074a982..214bbfc45 100644 --- a/mountpoint-s3-client/src/mock_client.rs +++ b/mountpoint-s3-client/src/mock_client.rs @@ -4,6 +4,7 @@ use std::ops::Range; use std::pin::Pin; use std::sync::{Arc, RwLock}; use std::task::{Context, Poll}; +use std::time::{Duration, SystemTime}; use async_trait::async_trait; use futures::{Stream, StreamExt}; @@ -20,7 +21,7 @@ use crate::object_client::{ ObjectClient, ObjectClientError, ObjectClientResult, ObjectInfo, PutObjectError, PutObjectParams, PutObjectResult, UploadReview, UploadReviewPart, }; -use crate::{Checksum, ETag, ObjectAttribute, PutObjectRequest}; +use crate::{Checksum, ETag, ObjectAttribute, PutObjectRequest, RestoreStatus}; pub const RAMP_MODULUS: usize = 251; // Largest prime under 256 static_assertions::const_assert!((RAMP_MODULUS > 0) && (RAMP_MODULUS <= 256)); @@ -56,12 +57,12 @@ pub struct MockClientConfig { #[derive(Debug)] pub struct MockClient { config: MockClientConfig, - objects: Arc>>>, + objects: Arc>>, in_progress_uploads: Arc>>, } -fn add_object(objects: &Arc>>>, key: &str, value: MockObject) { - objects.write().unwrap().insert(key.to_owned(), Arc::new(value)); +fn add_object(objects: &Arc>>, key: &str, value: MockObject) { + objects.write().unwrap().insert(key.to_owned(), value); } impl MockClient { @@ -108,6 +109,30 @@ impl MockClient { Err(MockClientError("object not found".into())) } } + + /// Returns error if object does not exist + pub fn restore_object(&self, key: &str) -> Result<(), MockClientError> { + match self.objects.write().unwrap().get_mut(key) { + Some(mock_object) => { + mock_object.restore_status = Some(RestoreStatus::Restored { + expiry: SystemTime::now() + Duration::from_secs(3600), + }); + Ok(()) + } + None => Err(MockClientError("object not found".into())), + } + } + + pub fn is_object_restored(&self, key: &str) -> Result { + if let Some(mock_object) = self.objects.read().unwrap().get(key) { + Ok(matches!( + mock_object.restore_status, + Some(RestoreStatus::Restored { expiry: _ }) + )) + } else { + Err(MockClientError("object not found".into())) + } + } } #[derive(Clone)] @@ -115,6 +140,7 @@ pub struct MockObject { generator: Arc Box<[u8]> + Send + Sync>, size: usize, storage_class: Option, + restore_status: Option, last_modified: OffsetDateTime, etag: ETag, } @@ -131,6 +157,7 @@ impl MockObject { size: bytes.len(), generator: Arc::new(move |offset, size| bytes[offset as usize..offset as usize + size].into()), storage_class: None, + restore_status: None, last_modified: OffsetDateTime::now_utc(), etag, } @@ -141,6 +168,7 @@ impl MockObject { generator: Arc::new(move |_offset, size| vec![v; size].into_boxed_slice()), size, storage_class: None, + restore_status: None, last_modified: OffsetDateTime::now_utc(), etag, } @@ -161,6 +189,7 @@ impl MockObject { }), size, storage_class: None, + restore_status: None, last_modified: OffsetDateTime::now_utc(), etag, } @@ -174,6 +203,10 @@ impl MockObject { self.storage_class = storage_class; } + pub fn set_restored(&mut self, restore_status: Option) { + self.restore_status = restore_status; + } + pub fn len(&self) -> usize { self.size } @@ -200,13 +233,14 @@ impl std::fmt::Debug for MockObject { .field("storage_class", &self.storage_class) .field("last_modified", &self.last_modified) .field("etag", &self.etag) + .field("restored", &self.restore_status) .finish() } } #[derive(Debug)] pub struct GetObjectResult { - object: Arc, + object: MockObject, next_offset: u64, length: usize, part_size: usize, @@ -316,7 +350,7 @@ impl ObjectClient for MockClient { }; Ok(GetObjectResult { - object: Arc::clone(object), + object: object.clone(), next_offset, length, part_size: self.config.part_size, @@ -347,6 +381,7 @@ impl ObjectClient for MockClient { last_modified: object.last_modified, etag: object.etag.as_str().to_string(), storage_class: object.storage_class.clone(), + restore_status: object.restore_status, }, }) } else { @@ -442,6 +477,7 @@ impl ObjectClient for MockClient { last_modified: object.last_modified, etag: object.etag.as_str().to_string(), storage_class: object.storage_class.clone(), + restore_status: object.restore_status, }); } } @@ -523,7 +559,7 @@ pub struct MockPutObjectRequest { buffer: Vec, part_size: usize, params: PutObjectParams, - objects: Arc>>>, + objects: Arc>>, in_progress_uploads: Arc>>, } @@ -532,7 +568,7 @@ impl MockPutObjectRequest { key: &str, part_size: usize, params: &PutObjectParams, - objects: &Arc>>>, + objects: &Arc>>, in_progress_uploads: &Arc>>, ) -> Self { in_progress_uploads.write().unwrap().insert(key.to_owned()); diff --git a/mountpoint-s3-client/src/object_client.rs b/mountpoint-s3-client/src/object_client.rs index d64e55d58..d5457d754 100644 --- a/mountpoint-s3-client/src/object_client.rs +++ b/mountpoint-s3-client/src/object_client.rs @@ -2,6 +2,7 @@ use async_trait::async_trait; use auto_impl::auto_impl; use futures::Stream; use std::str::FromStr; +use std::time::SystemTime; use std::{ fmt::{self, Debug}, ops::Range, @@ -315,6 +316,19 @@ pub enum PutObjectError { NoSuchBucket, } +/// Restoration status for S3 objects in GLACIER/DEEP_ARCHIVE storage class +/// See https://docs.aws.amazon.com/AmazonS3/latest/userguide/restoring-objects.html#restore-archived-objects-status for more details. +#[derive(Debug, Clone, Copy)] +pub enum RestoreStatus { + /// S3 returns this status after it accepted a restoration request, but not have completed it yet. + /// Objects with this status are not readable. + InProgress, + + /// This status means that restoration is fully completed. Note that restored objects are stored only + /// for the number of days that was specified in the request. + Restored { expiry: SystemTime }, +} + /// Metadata about a single S3 object. /// See https://docs.aws.amazon.com/AmazonS3/latest/API/API_Object.html for more details. #[derive(Debug)] @@ -333,6 +347,9 @@ pub struct ObjectInfo { /// https://docs.aws.amazon.com/AmazonS3/latest/API/API_HeadObject.html#API_HeadObject_Examples pub storage_class: Option, + /// Objects with GLACIER or DEEP_ARCHIVE storage classes are only acessable after restoration + pub restore_status: Option, + /// Entity tag of this object. pub etag: String, } diff --git a/mountpoint-s3-client/src/s3_crt_client/head_object.rs b/mountpoint-s3-client/src/s3_crt_client/head_object.rs index 22d80c2b3..3386da97d 100644 --- a/mountpoint-s3-client/src/s3_crt_client/head_object.rs +++ b/mountpoint-s3-client/src/s3_crt_client/head_object.rs @@ -2,8 +2,10 @@ use std::ffi::OsString; use std::str::FromStr; use std::sync::{Arc, Mutex}; +use lazy_static::lazy_static; use mountpoint_s3_crt::http::request_response::{Headers, HeadersError}; use mountpoint_s3_crt::s3::client::{MetaRequestResult, MetaRequestType}; +use regex::Regex; use thiserror::Error; use time::format_description::well_known::Rfc2822; use time::OffsetDateTime; @@ -11,7 +13,7 @@ use tracing::error; use crate::object_client::{HeadObjectError, HeadObjectResult, ObjectClientError, ObjectClientResult, ObjectInfo}; use crate::s3_crt_client::S3RequestError; -use crate::S3CrtClient; +use crate::{RestoreStatus, S3CrtClient}; #[derive(Error, Debug)] #[non_exhaustive] @@ -27,6 +29,9 @@ pub enum ParseError { #[error("Failed to parse field {1} as an int: {0:?}")] Int(#[source] std::num::ParseIntError, String), + + #[error("Header x-amz-restore is invalid: {0:?}")] + InvalidRestore(String), } fn get_field(headers: &Headers, name: &str) -> Result { @@ -47,7 +52,38 @@ fn get_optional_field(headers: &Headers, name: &str) -> Result, P }) } +lazy_static! { + // Example: ongoing-request="true" + static ref RESTORE_IN_PROGRESS_RE: Regex = Regex::new(r#"^ongoing-request="(?[^"]*)"$"#).unwrap(); + + // Example: ongoing-request="false", expiry-date="Fri, 21 Dec 2012 00:00:00 GMT" + static ref RESTORE_DONE_RE: Regex = + Regex::new(r#"^ongoing-request="[^"]*",\s*expiry-date="(?[^"]*)"$"#).unwrap(); +} + impl HeadObjectResult { + fn parse_restore_status(headers: &Headers) -> Result, ParseError> { + let Some(header) = get_optional_field(headers, "x-amz-restore")? else { + return Ok(None); + }; + + if let Some(caps) = RESTORE_IN_PROGRESS_RE.captures(&header) { + let ongoing = bool::from_str(&caps["ongoing"]).map_err(|_| ParseError::InvalidRestore(header.clone()))?; + return if ongoing { + Ok(Some(RestoreStatus::InProgress)) + } else { + Err(ParseError::InvalidRestore(header.clone())) + }; + }; + + let Some(caps) = RESTORE_DONE_RE.captures(&header) else { + return Err(ParseError::InvalidRestore(header)); + }; + let expiry = OffsetDateTime::parse(&caps["expiry"], &Rfc2822) + .map_err(|e| ParseError::OffsetDateTime(e, "x-amz-restore::expiry".into()))?; + Ok(Some(RestoreStatus::Restored { expiry: expiry.into() })) + } + fn parse_from_hdr(bucket: String, key: String, headers: &Headers) -> Result { let last_modified = OffsetDateTime::parse(&get_field(headers, "Last-Modified")?, &Rfc2822) .map_err(|e| ParseError::OffsetDateTime(e, "LastModified".into()))?; @@ -55,11 +91,13 @@ impl HeadObjectResult { .map_err(|e| ParseError::Int(e, "ContentLength".into()))?; let etag = get_field(headers, "Etag")?; let storage_class = get_optional_field(headers, "x-amz-storage-class")?; + let restore_status = Self::parse_restore_status(headers)?; let object = ObjectInfo { key, size, last_modified, storage_class, + restore_status, etag, }; Ok(HeadObjectResult { bucket, object }) @@ -131,10 +169,13 @@ fn parse_head_object_error(result: &MetaRequestResult) -> Option) -> MetaRequestResult { MetaRequestResult { response_status, @@ -150,4 +191,48 @@ mod tests { let result = parse_head_object_error(&result); assert_eq!(result, Some(HeadObjectError::NotFound)); } + + #[test_case(r#"ongoing-request="false", expiry-date="Fri, 21 Dec 2012 00:00:00 GMT""#; "from documentation")] + #[test_case(r#"ongoing-request="false",expiry-date="Fri, 21 Dec 2012 00:00:00 GMT""#; "no whitespace")] + #[test_case("ongoing-request=\"false\", \t \t expiry-date=\"Fri, 21 Dec 2012 00:00:00 GMT\""; "lots of whitespaces")] + fn test_parse_restore_status_done(value: &str) { + let mut headers = Headers::new(&Allocator::default()).unwrap(); + let header = Header::new("x-amz-restore", value.to_owned()); + headers.add_header(&header).unwrap(); + let restore_status = HeadObjectResult::parse_restore_status(&headers).expect("failed to parse headers"); + match restore_status { + Some(RestoreStatus::Restored { expiry }) => assert_eq!( + OffsetDateTime::format(expiry.into(), &Rfc2822).unwrap(), + "Fri, 21 Dec 2012 00:00:00 +0000" + ), + _ => panic!("unexpected restore_status"), + }; + } + + #[test_case(r#"ongoing-request="false", expiry-date="not a date""#; "not a date")] + #[test_case(r#"ongoing-request="false""#; "done without expiry")] + fn test_parse_restore_status_invalid(value: &str) { + let mut headers = Headers::new(&Allocator::default()).unwrap(); + let header = Header::new("x-amz-restore", value.to_owned()); + headers.add_header(&header).unwrap(); + assert!(HeadObjectResult::parse_restore_status(&headers).is_err()); + } + + #[test_case(r#"ongoing-request="true""#; "from documentation")] + fn test_parse_restore_in_progress(value: &str) { + let mut headers = Headers::new(&Allocator::default()).unwrap(); + let header = Header::new("x-amz-restore", value.to_owned()); + headers.add_header(&header).unwrap(); + let restore_status = HeadObjectResult::parse_restore_status(&headers).expect("failed to parse headers"); + let Some(RestoreStatus::InProgress) = restore_status else { + panic!("unexpected restore_status"); + }; + } + + #[test] + fn test_parse_restore_empty() { + let headers = Headers::new(&Allocator::default()).unwrap(); + let restore_status = HeadObjectResult::parse_restore_status(&headers).expect("failed to parse headers"); + assert!(restore_status.is_none()); + } } diff --git a/mountpoint-s3-client/src/s3_crt_client/list_objects.rs b/mountpoint-s3-client/src/s3_crt_client/list_objects.rs index b634325be..dbe3b0d9a 100644 --- a/mountpoint-s3-client/src/s3_crt_client/list_objects.rs +++ b/mountpoint-s3-client/src/s3_crt_client/list_objects.rs @@ -2,6 +2,7 @@ use std::ops::Deref; use std::os::unix::prelude::OsStrExt; use std::str::FromStr; +use mountpoint_s3_crt::http::request_response::Header; use mountpoint_s3_crt::s3::client::{MetaRequestResult, MetaRequestType}; use thiserror::Error; use time::format_description::well_known::Rfc3339; @@ -10,7 +11,7 @@ use tracing::error; use crate::object_client::{ListObjectsError, ListObjectsResult, ObjectClientError, ObjectClientResult, ObjectInfo}; use crate::s3_crt_client::S3RequestError; -use crate::S3CrtClient; +use crate::{RestoreStatus, S3CrtClient}; #[derive(Error, Debug)] #[non_exhaustive] @@ -94,6 +95,24 @@ fn parse_result_from_xml(element: &mut xmltree::Element) -> Result Result, ParseError> { + let Some(restore_status) = element.get_child("RestoreStatus") else { + return Ok(None); + }; + + let restore_in_progress = bool::from_str(&get_field(restore_status, "IsRestoreInProgress")?) + .map_err(|e| ParseError::Bool(e, "IsRestoreInProgress".to_string()))?; + if restore_in_progress { + return Ok(Some(RestoreStatus::InProgress)); + } + + Ok(Some(RestoreStatus::Restored { + expiry: OffsetDateTime::parse(&get_field(restore_status, "RestoreExpiryDate")?, &Rfc3339) + .map_err(|e| ParseError::OffsetDateTime(e, "RestoreExpiryDate".to_string()))? + .into(), + })) +} + fn parse_object_info_from_xml(element: &xmltree::Element) -> Result { let key = get_field(element, "Key")?; @@ -110,6 +129,8 @@ fn parse_object_info_from_xml(element: &xmltree::Element) -> Result Result FileHandleType { } async fn new_read_handle(lookup: &LookedUp) -> Result, Error> { - if is_flexible_retrieval_storage_class(lookup) { + if !lookup.stat.is_readable { return Err(err!( libc::EACCES, "objects in flexible retrieval storage classes are not accessible", @@ -353,25 +353,6 @@ pub trait ReadReplier { fn error(self, error: Error) -> Self::Replied; } -/// Objects in flexible retrieval storage classes can't be accessed via GetObject, and so we -/// override their permissions to 000 and reject reads to them. We also warn the first time we see -/// an object like this, because FUSE enforces the 000 permissions on our behalf so we might not -/// see an attempted `open` call. -fn is_flexible_retrieval_storage_class(lookup: &LookedUp) -> bool { - static HAS_SENT_WARNING: AtomicBool = AtomicBool::new(false); - match lookup.stat.storage_class() { - Some("GLACIER") | Some("DEEP_ARCHIVE") => { - if !HAS_SENT_WARNING.swap(true, Ordering::SeqCst) { - tracing::warn!( - "objects in the GLACIER and DEEP_ARCHIVE storage classes are not readable with Mountpoint" - ); - } - true - } - _ => false, - } -} - impl S3Filesystem where Client: ObjectClient + Send + Sync + 'static, @@ -395,10 +376,10 @@ where // (itself + the "." link). let (perm, nlink) = match lookup.inode.kind() { InodeKind::File => { - if is_flexible_retrieval_storage_class(lookup) { - (0o000, 1) - } else { + if lookup.stat.is_readable { (self.config.file_mode, 1) + } else { + (0o000, 1) } } InodeKind::Directory => (self.config.dir_mode, 2), diff --git a/mountpoint-s3/src/inode.rs b/mountpoint-s3/src/inode.rs index d2d2ab173..d8b93da82 100644 --- a/mountpoint-s3/src/inode.rs +++ b/mountpoint-s3/src/inode.rs @@ -25,14 +25,13 @@ use std::collections::{HashMap, HashSet}; use std::ffi::{OsStr, OsString}; use std::fmt::{Debug, Display}; use std::os::unix::prelude::OsStrExt; -use std::time::{Duration, Instant}; +use std::sync::atomic::AtomicBool; +use std::time::{Duration, Instant, SystemTime}; use anyhow::anyhow; use fuser::FileType; use futures::{select_biased, FutureExt}; -use lasso::{Key, MiniSpur, ThreadedRodeo}; -use lazy_static::lazy_static; -use mountpoint_s3_client::{HeadObjectError, HeadObjectResult, ObjectClient, ObjectClientError}; +use mountpoint_s3_client::{HeadObjectError, HeadObjectResult, ObjectClient, ObjectClientError, RestoreStatus}; use mountpoint_s3_crt::checksums::crc32c::{self, Crc32c}; use thiserror::Error; use time::OffsetDateTime; @@ -55,28 +54,6 @@ pub const ROOT_INODE_NO: InodeNo = 1; // 200 years seems long enough const NEVER_EXPIRE_TTL: Duration = Duration::from_secs(200 * 365 * 24 * 60 * 60); -lazy_static! { - /// There's a small number of possible storage classes, so avoid allocating a string for every - /// inode by interning their string representations. - static ref STORAGE_CLASS_INTERN: ThreadedRodeo = ThreadedRodeo::new(); -} - -/// Key for an interned storage class -#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] -#[repr(transparent)] -struct StorageClass(MiniSpur); - -// SAFETY: just delegating to the underlying impl -unsafe impl Key for StorageClass { - fn into_usize(self) -> usize { - self.0.into_usize() - } - - fn try_from_usize(int: usize) -> Option { - MiniSpur::try_from_usize(int).map(Self) - } -} - pub fn valid_inode_name>(name: T) -> bool { let name = name.as_ref(); // Names cannot be empty @@ -336,7 +313,7 @@ impl Superblock { // Local inode stats never expire, because they can't be looked up remotely let stat = match kind { // Objects don't have an ETag until they are uploaded to S3 - InodeKind::File => InodeStat::for_file(0, OffsetDateTime::now_utc(), None, None, NEVER_EXPIRE_TTL), + InodeKind::File => InodeStat::for_file(0, OffsetDateTime::now_utc(), None, None, None, NEVER_EXPIRE_TTL), InodeKind::Directory => InodeStat::for_directory(self.inner.mount_time, NEVER_EXPIRE_TTL), }; @@ -355,7 +332,7 @@ impl Superblock { } /// Remove local-only empty directory, i.e., the ones created by mkdir. - /// It does not affect empty directories represented remotely with directory markers. + /// It does not affect empty directories represented remotely with directory markers. pub async fn rmdir( &self, client: &OC, @@ -602,7 +579,7 @@ impl SuperblockInner { result = file_lookup => { match result { Ok(HeadObjectResult { object, .. }) => { - let stat = InodeStat::for_file(object.size as usize, object.last_modified, Some(object.etag.clone()), object.storage_class, self.cache_config.file_ttl); + let stat = InodeStat::for_file(object.size as usize, object.last_modified, Some(object.etag.clone()), object.storage_class, object.restore_status, self.cache_config.file_ttl); file_state = Some(stat); } // If the object is not found, might be a directory, so keep going @@ -1263,8 +1240,10 @@ pub struct InodeStat { pub atime: OffsetDateTime, /// Etag for the file (object) pub etag: Option, - /// Storage class for the file (object), if known - storage_class: Option, + /// Inodes corresponding to S3 objects with GLACIER or DEEP_ARCHIVE storage classes + /// are only readable after restoration. For objects with other storage classes + /// this field should be always `true`. + pub is_readable: bool, } /// Inode write status (local vs remote) @@ -1283,18 +1262,40 @@ impl InodeStat { self.expiry >= Instant::now() } + /// Objects in flexible retrieval storage classes can't be accessed via GetObject unless they are + /// restored, and so we override their permissions to 000 and reject reads to them. We also warn + /// the first time we see an object like this, because FUSE enforces the 000 permissions on our + /// behalf so we might not see an attempted `open` call. + fn is_readable(storage_class: Option, restore_status: Option) -> bool { + static HAS_SENT_WARNING: AtomicBool = AtomicBool::new(false); + match storage_class.as_deref() { + Some("GLACIER") | Some("DEEP_ARCHIVE") => { + let restored = + matches!(restore_status, Some(RestoreStatus::Restored { expiry }) if expiry > SystemTime::now()); + if !restored && !HAS_SENT_WARNING.swap(true, Ordering::SeqCst) { + tracing::warn!( + "objects in the GLACIER and DEEP_ARCHIVE storage classes are only accessible if restored" + ); + } + restored + } + _ => true, + } + } + /// Initialize an [InodeStat] for a file, given some metadata. fn for_file( size: usize, datetime: OffsetDateTime, etag: Option, storage_class: Option, + restore_status: Option, validity: Duration, ) -> InodeStat { let expiry = Instant::now() .checked_add(validity) .expect("64-bit time shouldn't overflow"); - let storage_class = storage_class.map(|sc| STORAGE_CLASS_INTERN.get_or_intern(sc)); + let is_readable = Self::is_readable(storage_class, restore_status); InodeStat { expiry, size, @@ -1302,7 +1303,7 @@ impl InodeStat { ctime: datetime, mtime: datetime, etag, - storage_class, + is_readable, } } @@ -1318,7 +1319,7 @@ impl InodeStat { ctime: datetime, mtime: datetime, etag: None, - storage_class: None, + is_readable: true, } } @@ -1327,10 +1328,6 @@ impl InodeStat { .checked_add(validity) .expect("64-bit time shouldn't overflow"); } - - pub fn storage_class(&self) -> Option<&str> { - self.storage_class.map(|sc| STORAGE_CLASS_INTERN.resolve(&sc)) - } } #[derive(Debug, Error)] @@ -1524,7 +1521,7 @@ mod tests { InodeKind::File, InodeState { write_status: WriteStatus::Remote, - stat: InodeStat::for_file(0, OffsetDateTime::now_utc(), None, None, Default::default()), + stat: InodeStat::for_file(0, OffsetDateTime::now_utc(), None, None, None, Default::default()), kind_data: InodeKindData::File {}, lookup_count: 5, }, @@ -2034,6 +2031,7 @@ mod tests { OffsetDateTime::now_utc(), Some(ETag::for_tests().as_str().to_owned()), None, + None, NEVER_EXPIRE_TTL, ), write_status: WriteStatus::Remote, @@ -2335,7 +2333,7 @@ mod tests { checksum, sync: RwLock::new(InodeState { write_status: WriteStatus::LocalOpen, - stat: InodeStat::for_file(0, OffsetDateTime::UNIX_EPOCH, None, None, Default::default()), + stat: InodeStat::for_file(0, OffsetDateTime::UNIX_EPOCH, None, None, None, Default::default()), kind_data: InodeKindData::File {}, lookup_count: 5, }), @@ -2358,7 +2356,7 @@ mod tests { #[test] fn test_inodestat_constructors() { let ts = OffsetDateTime::UNIX_EPOCH + Duration::days(90); - let file_inodestat = InodeStat::for_file(128, ts, None, None, Default::default()); + let file_inodestat = InodeStat::for_file(128, ts, None, None, None, Default::default()); assert_eq!(file_inodestat.size, 128); assert_eq!(file_inodestat.atime, ts); assert_eq!(file_inodestat.ctime, ts); diff --git a/mountpoint-s3/src/inode/readdir.rs b/mountpoint-s3/src/inode/readdir.rs index 7b6c7ec8a..b3c29a278 100644 --- a/mountpoint-s3/src/inode/readdir.rs +++ b/mountpoint-s3/src/inode/readdir.rs @@ -169,6 +169,7 @@ impl ReaddirHandle { object_info.last_modified, Some(object_info.etag.clone()), object_info.storage_class.clone(), + object_info.restore_status, self.inner.cache_config.file_ttl, ); Some(RemoteLookup { diff --git a/mountpoint-s3/tests/fs.rs b/mountpoint-s3/tests/fs.rs index b4b3cd23f..1252b2aef 100644 --- a/mountpoint-s3/tests/fs.rs +++ b/mountpoint-s3/tests/fs.rs @@ -5,15 +5,17 @@ use mountpoint_s3::fs::{ToErrno, FUSE_ROOT_INODE}; use mountpoint_s3::prefix::Prefix; use mountpoint_s3_client::failure_client::countdown_failure_client; use mountpoint_s3_client::mock_client::{MockClient, MockClientConfig, MockClientError}; -use mountpoint_s3_client::ObjectClient; use mountpoint_s3_client::{mock_client::MockObject, ETag}; +use mountpoint_s3_client::{ObjectClient, RestoreStatus}; use nix::unistd::{getgid, getuid}; use rand::{Rng, SeedableRng}; use rand_chacha::ChaCha20Rng; use std::collections::HashMap; use std::ffi::OsString; +use std::ops::Add; use std::str::FromStr; use std::sync::Arc; +use std::time::{Duration, SystemTime}; use test_case::test_case; mod common; @@ -889,7 +891,13 @@ async fn test_readdir_vs_readdirplus() { #[tokio::test] async fn test_flexible_retrieval_objects() { - const NAMES: &[&str] = &["GLACIER", "GLACIER_IR", "DEEP_ARCHIVE"]; + const NAMES: &[&str] = &[ + "GLACIER", + "GLACIER_IR", + "DEEP_ARCHIVE", + "GLACIER_RESTORED", + "DEEP_ARCHIVE_RESTORED", + ]; let (client, fs) = make_test_filesystem( "test_flexible_retrieval_objects", @@ -899,7 +907,14 @@ async fn test_flexible_retrieval_objects() { for name in NAMES { let mut object = MockObject::from(b"hello world"); - object.set_storage_class(Some(name.to_string())); + object.set_storage_class(Some(name.to_string().replace("_RESTORED", ""))); + object.set_restored(if name.contains("_RESTORED") { + Some(RestoreStatus::Restored { + expiry: SystemTime::now().add(Duration::from_secs(3600)), + }) + } else { + None + }); client.add_object(name, object); } @@ -936,7 +951,14 @@ async fn test_flexible_retrieval_objects() { let file_name = format!("{name}2"); let mut object = MockObject::from(b"hello world"); - object.set_storage_class(Some(name.to_string())); + object.set_storage_class(Some(name.to_string().replace("_RESTORED", ""))); + object.set_restored(if name.contains("_RESTORED") { + Some(RestoreStatus::Restored { + expiry: SystemTime::now().add(Duration::from_secs(3600)), + }) + } else { + None + }); client.add_object(&file_name, object); let lookup = fs.lookup(FUSE_ROOT_INODE, file_name.as_ref()).await.unwrap(); diff --git a/mountpoint-s3/tests/fuse_tests/mod.rs b/mountpoint-s3/tests/fuse_tests/mod.rs index cfa69dc36..a6fc21651 100644 --- a/mountpoint-s3/tests/fuse_tests/mod.rs +++ b/mountpoint-s3/tests/fuse_tests/mod.rs @@ -43,6 +43,10 @@ pub trait TestClient { fn is_upload_in_progress(&self, key: &str) -> Result>; fn get_object_storage_class(&self, key: &str) -> Result, Box>; + + fn restore_object(&mut self, key: &str, expedited: bool) -> Result<(), Box>; + + fn is_object_restored(&mut self, key: &str) -> Result>; } pub type TestClientBox = Box; @@ -166,6 +170,20 @@ mod mock_session { .get_object_storage_class(&full_key) .map_err(|e| Box::new(e) as Box) } + + fn restore_object(&mut self, key: &str, _expedited: bool) -> Result<(), Box> { + let full_key = format!("{}{}", self.prefix, key); + self.client + .restore_object(&full_key) + .map_err(|e| Box::new(e) as Box) + } + + fn is_object_restored(&mut self, key: &str) -> Result> { + let full_key = format!("{}{}", self.prefix, key); + self.client + .is_object_restored(&full_key) + .map_err(|e| Box::new(e) as Box) + } } } @@ -177,11 +195,11 @@ mod s3_session { use std::future::Future; - use aws_sdk_s3::config::Region; use aws_sdk_s3::operation::head_object::HeadObjectError; use aws_sdk_s3::primitives::ByteStream; - use aws_sdk_s3::types::ChecksumAlgorithm; + use aws_sdk_s3::types::{ChecksumAlgorithm, RestoreRequest, Tier}; use aws_sdk_s3::Client; + use aws_sdk_s3::{config::Region, types::GlacierJobParameters}; use mountpoint_s3_client::{EndpointConfig, S3ClientConfig, S3CrtClient}; /// Create a FUSE mount backed by a real S3 client @@ -342,6 +360,37 @@ mod s3_session { .map(|output| output.storage_class().map(|s| s.as_str().to_string())) .map_err(|e| Box::new(e) as Box) } + + // Schudule restoration of an object, do not wait until completion. Expidited restoration completes within 1-5 min for GLACIER and is not available for DEEP_ARCHIVE. + // https://docs.aws.amazon.com/AmazonS3/latest/userguide/restoring-objects-retrieval-options.html?icmpid=docs_amazons3_console#restoring-objects-upgrade-tier + fn restore_object(&mut self, key: &str, expedited: bool) -> Result<(), Box> { + let full_key = format!("{}{}", self.prefix, key); + let tier = if expedited { Tier::Expedited } else { Tier::Bulk }; + tokio_block_on( + self.sdk_client + .restore_object() + .bucket(&self.bucket) + .key(full_key) + .set_restore_request(Some( + RestoreRequest::builder() + .set_days(Some(1)) + .set_glacier_job_parameters(Some( + GlacierJobParameters::builder().set_tier(Some(tier)).build(), + )) + .build(), + )) + .send(), + ) + .map(|_| ()) + .map_err(|e| Box::new(e) as Box) + } + + fn is_object_restored(&mut self, key: &str) -> Result> { + let full_key = format!("{}{}", self.prefix, key); + tokio_block_on(self.sdk_client.head_object().bucket(&self.bucket).key(full_key).send()) + .map(|output| output.restore().unwrap().contains("ongoing-request=\"false\"")) + .map_err(|e| Box::new(e) as Box) + } } } /// Take a `read_dir` iterator and return the entry names diff --git a/mountpoint-s3/tests/fuse_tests/read_test.rs b/mountpoint-s3/tests/fuse_tests/read_test.rs index 01e957d33..fdd69181c 100644 --- a/mountpoint-s3/tests/fuse_tests/read_test.rs +++ b/mountpoint-s3/tests/fuse_tests/read_test.rs @@ -1,6 +1,7 @@ use std::fs::{read_dir, File}; use std::io::{Read as _, Seek, SeekFrom}; use std::os::unix::prelude::PermissionsExt; +use std::time::{Duration, Instant}; use fuser::BackgroundSession; use mountpoint_s3_client::PutObjectParams; @@ -75,22 +76,47 @@ fn basic_read_test_mock_prefix() { basic_read_test(crate::fuse_tests::mock_session::new, "basic_read_test"); } -fn read_flexible_retrieval_test(creator_fn: F, prefix: &str) +#[derive(PartialEq)] +enum RestorationOptions { + None, + RestoreAndWait, + RestoreInProgress, +} + +fn read_flexible_retrieval_test(creator_fn: F, prefix: &str, files: &[&str], restore: RestorationOptions) where F: FnOnce(&str, TestSessionConfig) -> (TempDir, BackgroundSession, TestClientBox), { - const FILES: &[&str] = &["STANDARD", "GLACIER_IR", "GLACIER", "DEEP_ARCHIVE"]; - let (mount_point, _session, mut test_client) = creator_fn(prefix, Default::default()); - for file in FILES { + for file in files { let mut put_params = PutObjectParams::default(); if *file != "STANDARD" { put_params.storage_class = Some(file.to_string()); } - test_client - .put_object_params(&format!("{file}.txt"), b"hello world", put_params) - .unwrap(); + let key = format!("{file}.txt"); + test_client.put_object_params(&key, b"hello world", put_params).unwrap(); + match restore { + RestorationOptions::None => (), + RestorationOptions::RestoreAndWait => { + test_client.restore_object(&key, true).unwrap(); + let timeout = Duration::from_secs(300); + let start = Instant::now(); + let mut timeouted = true; + while start.elapsed() < timeout { + if test_client + .is_object_restored(&key) + .expect("failed to check restoration status") + { + timeouted = false; + break; + } + std::thread::sleep(Duration::from_secs(1)); + } + assert!(!timeouted, "timeouted while waiting for object become restored"); + } + RestorationOptions::RestoreInProgress => test_client.restore_object(&key, false).unwrap(), + } } let read_dir_iter = read_dir(mount_point.path()).unwrap(); @@ -99,7 +125,9 @@ where let file_name = file.file_name().to_string_lossy().into_owned(); let metadata = file.metadata().unwrap(); - if file_name == "GLACIER.txt" || file_name == "DEEP_ARCHIVE.txt" { + if (file_name == "GLACIER.txt" || file_name == "DEEP_ARCHIVE.txt") + && restore != RestorationOptions::RestoreAndWait + { assert_eq!(metadata.permissions().mode() as libc::mode_t & !libc::S_IFMT, 0o000); let err = File::open(file.path()).expect_err("read of flexible retrieval object should fail"); assert_eq!(err.raw_os_error(), Some(libc::EACCES)); @@ -115,11 +143,62 @@ where #[cfg(feature = "s3_tests")] #[test] fn read_flexible_retrieval_test_s3() { - read_flexible_retrieval_test(crate::fuse_tests::s3_session::new, "read_flexible_retrieval_test"); + const FILES: &[&str] = &["STANDARD", "GLACIER_IR", "GLACIER", "DEEP_ARCHIVE"]; + read_flexible_retrieval_test( + crate::fuse_tests::s3_session::new, + "read_flexible_retrieval_test", + FILES, + RestorationOptions::None, + ); } #[test_case(""; "no prefix")] #[test_case("read_flexible_retrieval_test"; "prefix")] fn read_flexible_retrieval_test_mock(prefix: &str) { - read_flexible_retrieval_test(crate::fuse_tests::mock_session::new, prefix); + const FILES: &[&str] = &["STANDARD", "GLACIER_IR", "GLACIER", "DEEP_ARCHIVE"]; + read_flexible_retrieval_test( + crate::fuse_tests::mock_session::new, + prefix, + FILES, + RestorationOptions::None, + ); +} + +#[test_case(""; "no prefix")] +#[test_case("read_flexible_retrieval_test"; "prefix")] +fn read_flexible_retrieval_restored_test_mock(prefix: &str) { + const FILES: &[&str] = &["GLACIER", "DEEP_ARCHIVE"]; + read_flexible_retrieval_test( + crate::fuse_tests::mock_session::new, + prefix, + FILES, + RestorationOptions::RestoreAndWait, + ); +} + +// We do not run this test for objects in DEEP_ARCHIVE storage class because +// it does not support expedited retrieval option. It would take 12 hours to +// restore object from DEEP_ARCHIVE. +#[cfg(feature = "s3_tests")] +#[test] +fn read_flexible_retrieval_restored_test_s3() { + const RESTORED_FILES: &[&str] = &["GLACIER"]; + read_flexible_retrieval_test( + crate::fuse_tests::s3_session::new, + "read_flexible_retrieval_restored_test_s3", + RESTORED_FILES, + RestorationOptions::RestoreAndWait, + ); +} + +#[cfg(feature = "s3_tests")] +#[test] +fn read_flexible_retrieval_restoring_test_s3() { + const RESTORING_FILES: &[&str] = &["GLACIER", "DEEP_ARCHIVE"]; + read_flexible_retrieval_test( + crate::fuse_tests::s3_session::new, + "read_flexible_retrieval_restoring_test_s3", + RESTORING_FILES, + RestorationOptions::RestoreInProgress, + ); }