From d54c56b5c805b3ab1e894a2923f293b267188119 Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Sun, 5 May 2024 18:42:51 -0400 Subject: [PATCH 01/91] Add new file section to deepwell config. --- deepwell/config.example.toml | 16 ++++++++++++++++ install/files/dev/deepwell.toml | 4 ++++ install/files/local/deepwell.toml | 4 ++++ install/files/prod/deepwell.toml | 4 ++++ 4 files changed, 28 insertions(+) diff --git a/deepwell/config.example.toml b/deepwell/config.example.toml index 69ee918a70..73b4911d1b 100644 --- a/deepwell/config.example.toml +++ b/deepwell/config.example.toml @@ -355,6 +355,22 @@ minimum-name-bytes = 3 # Set to 0 to disable. refill-name-change-days = 90 + +[file] + +# The length of paths used for S3 presigned URLs. +# +# The value doesn't particularly matter so long as it is sufficiently long +# to avoid collisions. +# +# Just to be safe, the generation mechanism is the same as for session tokens. +presigned-path-length = 32 + +# How long a presigned URL lasts before expiry. +# +# The value should only be a few minutes, and no longer than 12 hours. +presigned-expiration-minutes = 5 + [message] # The maximum size of a message's subject line, in bytes. diff --git a/install/files/dev/deepwell.toml b/install/files/dev/deepwell.toml index 2a0ccb8fb8..4024040eb8 100644 --- a/install/files/dev/deepwell.toml +++ b/install/files/dev/deepwell.toml @@ -67,6 +67,10 @@ maximum-name-changes = 3 minimum-name-bytes = 3 refill-name-change-days = 90 +[file] +presigned-path-length = 32 +presigned-expiration-minutes = 10 + [message] maximum-subject-bytes = 128 maximum-body-bytes = 200000 diff --git a/install/files/local/deepwell.toml b/install/files/local/deepwell.toml index cd49015359..b06ad2bb23 100644 --- a/install/files/local/deepwell.toml +++ b/install/files/local/deepwell.toml @@ -67,6 +67,10 @@ maximum-name-changes = 3 minimum-name-bytes = 3 refill-name-change-days = 90 +[file] +presigned-path-length = 32 +presigned-expiration-minutes = 10 + [message] maximum-subject-bytes = 128 maximum-body-bytes = 200000 diff --git a/install/files/prod/deepwell.toml b/install/files/prod/deepwell.toml index 386e863321..583182474f 100644 --- a/install/files/prod/deepwell.toml +++ b/install/files/prod/deepwell.toml @@ -67,6 +67,10 @@ maximum-name-changes = 3 minimum-name-bytes = 3 refill-name-change-days = 90 +[file] +presigned-path-length = 32 +presigned-expiration-minutes = 5 + [message] maximum-subject-bytes = 128 maximum-body-bytes = 200000 From 5cc26a27e4f4fdb60514e584c63066520939e99f Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Sun, 5 May 2024 21:19:57 -0400 Subject: [PATCH 02/91] Add file section to configuration. --- deepwell/src/config/file.rs | 16 ++++++++++++++++ deepwell/src/config/object.rs | 6 ++++++ 2 files changed, 22 insertions(+) diff --git a/deepwell/src/config/file.rs b/deepwell/src/config/file.rs index e73f40a2c6..b4424fcc81 100644 --- a/deepwell/src/config/file.rs +++ b/deepwell/src/config/file.rs @@ -53,6 +53,7 @@ pub struct ConfigFile { ftml: Ftml, special_pages: SpecialPages, user: User, + file: FileSection, message: Message, } @@ -181,6 +182,14 @@ struct User { minimum_name_bytes: usize, } +// NOTE: Name conflict with std::fs::File +#[derive(Serialize, Deserialize, Debug, Clone)] +#[serde(rename_all = "kebab-case")] +struct FileSection { + presigned_path_length: usize, + presigned_expiration_minutes: u64, +} + #[derive(Serialize, Deserialize, Debug, Clone)] #[serde(rename_all = "kebab-case")] struct Message { @@ -303,6 +312,11 @@ impl ConfigFile { refill_name_change_days, minimum_name_bytes, }, + file: + FileSection { + presigned_path_length, + presigned_expiration_minutes, + }, message: Message { maximum_subject_bytes: maximum_message_subject_bytes, @@ -424,6 +438,8 @@ impl ConfigFile { )) }, minimum_name_bytes, + presigned_path_length, + presigned_duration: StdDuration::from_secs(presigned_expiration_minutes * 60), maximum_message_subject_bytes, maximum_message_body_bytes, maximum_message_recipients, diff --git a/deepwell/src/config/object.rs b/deepwell/src/config/object.rs index e97e4e38eb..7091686515 100644 --- a/deepwell/src/config/object.rs +++ b/deepwell/src/config/object.rs @@ -200,6 +200,12 @@ pub struct Config { /// Minimum length of bytes in a username. pub minimum_name_bytes: usize, + /// Length of randomly-generated portion of S3 presigned URLs. + pub presigned_path_length: usize, + + /// How long S3 presigned URLs will last before expiry. + pub presigned_duration: StdDuration, + /// Maximum size of the subject line allowed in a direct message. pub maximum_message_subject_bytes: usize, From da8a53379051238472532e741cbb3197857475d1 Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Sun, 5 May 2024 21:52:14 -0400 Subject: [PATCH 03/91] Change field to seconds, not Duration. --- deepwell/src/config/file.rs | 4 ++-- deepwell/src/config/object.rs | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/deepwell/src/config/file.rs b/deepwell/src/config/file.rs index b4424fcc81..b04142336a 100644 --- a/deepwell/src/config/file.rs +++ b/deepwell/src/config/file.rs @@ -187,7 +187,7 @@ struct User { #[serde(rename_all = "kebab-case")] struct FileSection { presigned_path_length: usize, - presigned_expiration_minutes: u64, + presigned_expiration_minutes: u32, } #[derive(Serialize, Deserialize, Debug, Clone)] @@ -439,7 +439,7 @@ impl ConfigFile { }, minimum_name_bytes, presigned_path_length, - presigned_duration: StdDuration::from_secs(presigned_expiration_minutes * 60), + presigned_expiry_secs: presigned_expiration_minutes * 60, maximum_message_subject_bytes, maximum_message_body_bytes, maximum_message_recipients, diff --git a/deepwell/src/config/object.rs b/deepwell/src/config/object.rs index 7091686515..d042c623b2 100644 --- a/deepwell/src/config/object.rs +++ b/deepwell/src/config/object.rs @@ -204,7 +204,7 @@ pub struct Config { pub presigned_path_length: usize, /// How long S3 presigned URLs will last before expiry. - pub presigned_duration: StdDuration, + pub presigned_expiry_secs: u32, /// Maximum size of the subject line allowed in a direct message. pub maximum_message_subject_bytes: usize, From b8003b2f46a6c9144901e62e4c3ea8db56c9c978 Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Sun, 5 May 2024 21:59:29 -0400 Subject: [PATCH 04/91] Begin BlobService::upload_url(). --- deepwell/src/services/blob/service.rs | 36 +++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/deepwell/src/services/blob/service.rs b/deepwell/src/services/blob/service.rs index 2f478589c7..bd4a0354f5 100644 --- a/deepwell/src/services/blob/service.rs +++ b/deepwell/src/services/blob/service.rs @@ -22,6 +22,9 @@ #![allow(dead_code)] use super::prelude::*; +use crate::utils::assert_is_csprng; +use rand::distributions::{Alphanumeric, DistString}; +use rand::thread_rng; use s3::request_trait::ResponseData; use s3::serde_types::HeadObjectResult; use std::str; @@ -48,10 +51,43 @@ pub const EMPTY_BLOB_MIME: &str = "inode/x-empty; charset=binary"; /// Timestamp is 2019/01/18 at midnight, the date of the first Wikijump commit. pub const EMPTY_BLOB_TIMESTAMP: i64 = 1547769600; +/// The subdirectory in the S3 bucket where all pending uploads are kept. +pub const PRESIGN_DIRECTORY: &str = "uploads"; + #[derive(Debug)] pub struct BlobService; impl BlobService { + /// Creates an S3 presign URL to allow an end user to upload a blob. + /// + /// # Returns + /// The generated presign URL that can be uploaded to. + pub async fn upload_url(ctx: &ServiceContext<'_>) -> Result { + info!("Creating presign upload URL for blob"); + + let config = ctx.config(); + let path = { + let mut path = format!("{PRESIGN_DIRECTORY}/"); + + { + let mut rng = thread_rng(); + assert_is_csprng(&rng); + Alphanumeric.append_string( + &mut rng, + &mut path, + config.presigned_path_length, + ); + } + + path + }; + + let bucket = ctx.s3_bucket(); + let url = bucket.presign_put(&path, config.presigned_expiry_secs, None)?; + + todo!() + } + /// Creates a blob with this data, if it does not already exist. pub async fn create>( ctx: &ServiceContext<'_>, From e68cf45afc3d7befc9c5c7cf454b9134fac987b9 Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Sun, 5 May 2024 22:47:16 -0400 Subject: [PATCH 05/91] Add blob_upload table. --- deepwell/migrations/20220906103252_deepwell.sql | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/deepwell/migrations/20220906103252_deepwell.sql b/deepwell/migrations/20220906103252_deepwell.sql index d05b602bd9..2026ff0e89 100644 --- a/deepwell/migrations/20220906103252_deepwell.sql +++ b/deepwell/migrations/20220906103252_deepwell.sql @@ -415,6 +415,15 @@ CREATE TABLE page_vote ( -- Files -- +-- Pending uploads to S3 +-- +-- Stores the presign URL along with the path in the bucket it corresponds to. +-- After processing, it is moved to be a real blob (if new) or deleted (if duplicate). +CREATE TABLE blob_upload ( + s3_path TEXT NOT NULL PRIMARY KEY, + presign_url TEXT NOT NULL UNIQUE, +); + -- Enum types for file_revision CREATE TYPE file_revision_type AS ENUM ( 'create', From 78397fe5cea51746b745a63f79ca16820848e0b3 Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Sun, 5 May 2024 22:51:05 -0400 Subject: [PATCH 06/91] Add timestamp for partial upload pruning. --- deepwell/migrations/20220906103252_deepwell.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/deepwell/migrations/20220906103252_deepwell.sql b/deepwell/migrations/20220906103252_deepwell.sql index 2026ff0e89..977a67b32f 100644 --- a/deepwell/migrations/20220906103252_deepwell.sql +++ b/deepwell/migrations/20220906103252_deepwell.sql @@ -422,6 +422,7 @@ CREATE TABLE page_vote ( CREATE TABLE blob_upload ( s3_path TEXT NOT NULL PRIMARY KEY, presign_url TEXT NOT NULL UNIQUE, + created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT now() -- TODO add job to prune dead upload jobs ); -- Enum types for file_revision From cf50e9b566ef3dc03710f151ba38524144395f52 Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Sun, 5 May 2024 23:30:18 -0400 Subject: [PATCH 07/91] Start BlobService changes for presign URL system. --- deepwell/src/services/blob/service.rs | 84 ++++++++++++++++++--------- deepwell/src/services/blob/structs.rs | 8 ++- deepwell/src/services/error.rs | 4 ++ 3 files changed, 67 insertions(+), 29 deletions(-) diff --git a/deepwell/src/services/blob/service.rs b/deepwell/src/services/blob/service.rs index bd4a0354f5..cb8e05a088 100644 --- a/deepwell/src/services/blob/service.rs +++ b/deepwell/src/services/blob/service.rs @@ -62,11 +62,9 @@ impl BlobService { /// /// # Returns /// The generated presign URL that can be uploaded to. - pub async fn upload_url(ctx: &ServiceContext<'_>) -> Result { - info!("Creating presign upload URL for blob"); - + pub async fn create_upload(ctx: &ServiceContext<'_>) -> Result { let config = ctx.config(); - let path = { + let s3_path = { let mut path = format!("{PRESIGN_DIRECTORY}/"); { @@ -81,69 +79,100 @@ impl BlobService { path }; + info!("Creating presign upload URL for blob at path {s3_path}"); let bucket = ctx.s3_bucket(); - let url = bucket.presign_put(&path, config.presigned_expiry_secs, None)?; + let presign_url = bucket.presign_put(&s3_path, config.presigned_expiry_secs, None)?; - todo!() + Ok(CreateUploadOutput { + s3_path, + presign_url, + }) } - /// Creates a blob with this data, if it does not already exist. - pub async fn create>( - ctx: &ServiceContext<'_>, - data: B, - ) -> Result { - let data = data.as_ref(); - info!("Creating blob (length {})", data.len()); + pub async fn finish_upload(ctx: &ServiceContext<'_>, upload_path: &str) -> Result { + info!("Finishing upload for blob for path {upload_path}"); + let bucket = ctx.s3_bucket(); + + debug!("Download uploaded blob from S3 uploads to get metadata"); + let response = bucket.get_object(upload_path).await?; + let data: Vec = match response.status_code() { + 200 => response.into(), + _ => { + error!("Cannot find blob at presign path {upload_path}"); + return Err(Error::FileNotUploaded); + } + }; // Special handling for empty blobs if data.is_empty() { debug!("File being created is empty, special case"); - return Ok(CreateBlobOutput { + return Ok(FinalizeUploadOutput { hash: EMPTY_BLOB_HASH, mime: str!(EMPTY_BLOB_MIME), size: 0, }); } - // Upload blob - let bucket = ctx.s3_bucket(); - let hash = sha512_hash(data); - let hex_hash = blob_hash_to_hex(&hash); + debug!("Updating blob metadata in database and S3"); // Convert size to correct integer type let size: i64 = data.len().try_into().expect("Buffer size exceeds i64"); - match Self::head(ctx, &hex_hash).await? { + let hash = sha512_hash(&data); + let hex_hash = blob_hash_to_hex(&hash); + + // If the blob exists, then just delete the uploaded one. + // + // If it doesn't, then we need to move it. However, within + // S3 we cannot "move" objects, we have to upload and delete the original. + + let result = match Self::head(ctx, &hex_hash).await? { // Blob exists, copy metadata and return that Some(result) => { debug!("Blob with hash {hex_hash} already exists"); - // Content-Type header should be passed in + // Content-Type header should be returned let mime = result.content_type.ok_or(Error::S3Response)?; - Ok(CreateBlobOutput { hash, mime, size }) + Ok(FinalizeUploadOutput { + hash, + mime, + size, + created: false, + }) } - // Blob doesn't exist, insert it + // Blob doesn't exist, move the uploaded file None => { debug!("Blob with hash {hex_hash} to be created"); // Determine MIME type for the new file let mime = ctx.mime().get_mime_type(data.to_vec()).await?; - // Put into S3 + // Upload S3 object to final destination let response = bucket - .put_object_with_content_type(&hex_hash, data, &mime) + .put_object_with_content_type(&hex_hash, &data, &mime) .await?; // We assume all unexpected statuses are errors, even if 1XX or 2XX match response.status_code() { - 200 => Ok(CreateBlobOutput { hash, mime, size }), - _ => s3_error(&response, "creating S3 blob"), + 200 => Ok(FinalizeUploadOutput { + hash, + mime, + size, + created: true, + }), + _ => s3_error(&response, "creating final S3 blob"), } } - } + }; + + // Delete uploaded version, in either case + bucket.delete_object(upload_path).await?; + + // Return result based on blob status + result } pub async fn get_optional( @@ -160,7 +189,6 @@ impl BlobService { let bucket = ctx.s3_bucket(); let hex_hash = blob_hash_to_hex(hash); let response = bucket.get_object(&hex_hash).await?; - match response.status_code() { 200 => Ok(Some(response.into())), 404 => Ok(None), diff --git a/deepwell/src/services/blob/structs.rs b/deepwell/src/services/blob/structs.rs index aa018d9405..c4e58841de 100644 --- a/deepwell/src/services/blob/structs.rs +++ b/deepwell/src/services/blob/structs.rs @@ -22,7 +22,13 @@ use super::prelude::*; use time::OffsetDateTime; #[derive(Debug)] -pub struct CreateBlobOutput { +pub struct CreateUploadOutput { + pub s3_path: String, + pub presign_url: String, +} + +#[derive(Debug)] +pub struct FinalizeUploadOutput { pub hash: BlobHash, pub mime: String, pub size: i64, diff --git a/deepwell/src/services/error.rs b/deepwell/src/services/error.rs index 01f6188bdc..33c74d2759 100644 --- a/deepwell/src/services/error.rs +++ b/deepwell/src/services/error.rs @@ -218,6 +218,9 @@ pub enum Error { #[error("File revision does not exist")] FileRevisionNotFound, + #[error("File not uploaded")] + FileNotUploaded, // occurs when presign URL is not uploaded to + #[error("Vote does not exist")] VoteNotFound, @@ -315,6 +318,7 @@ impl Error { Error::MessageDraftNotFound => 2015, Error::BlobNotFound => 2016, Error::TextNotFound => 2017, + Error::FileNotUploaded => 2018, // 2100 -- Existing data Error::UserExists => 2100, From ac80c13e917d9569a706fb8a87e9a21f670d1a8f Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Sat, 11 May 2024 23:19:40 -0400 Subject: [PATCH 08/91] Add file_pending migration and pending model. --- .../migrations/20220906103252_deepwell.sql | 9 ++++- deepwell/src/models/file_pending.rs | 35 +++++++++++++++++++ deepwell/src/models/mod.rs | 1 + 3 files changed, 44 insertions(+), 1 deletion(-) create mode 100644 deepwell/src/models/file_pending.rs diff --git a/deepwell/migrations/20220906103252_deepwell.sql b/deepwell/migrations/20220906103252_deepwell.sql index 977a67b32f..71365be413 100644 --- a/deepwell/migrations/20220906103252_deepwell.sql +++ b/deepwell/migrations/20220906103252_deepwell.sql @@ -440,6 +440,12 @@ CREATE TYPE file_revision_change AS ENUM ( 'licensing' ); +CREATE TABLE file_pending ( + pending_file_id BIGSERIAL PRIMARY KEY, + s3_path TEXT NOT NULL CHECK length(s3_path) > 1, + presign_url TEXT NOT NULL CHECK length(presign_url) > 1 +); + CREATE TABLE file ( file_id BIGSERIAL PRIMARY KEY, created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT now(), @@ -449,6 +455,7 @@ CREATE TABLE file ( name TEXT NOT NULL, page_id BIGINT NOT NULL REFERENCES page(page_id), site_id BIGINT NOT NULL REFERENCES site(site_id), + pending_file_id BIGINT REFERENCES file_pending(pending_file_id), UNIQUE (page_id, name, deleted_at) ); @@ -524,7 +531,7 @@ CREATE TYPE message_recipient_type AS ENUM ( -- A "record" is the underlying message data, with its contents, attachments, -- and associated metadata such as sender and recipient(s). CREATE TABLE message_record ( - external_id TEXT PRIMARY KEY, + external_id TEXT PRIMARY KEY, -- ID comes from message_draft created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT now(), drafted_at TIMESTAMP WITH TIME ZONE NOT NULL, retracted_at TIMESTAMP WITH TIME ZONE, diff --git a/deepwell/src/models/file_pending.rs b/deepwell/src/models/file_pending.rs new file mode 100644 index 0000000000..3cc805f3a4 --- /dev/null +++ b/deepwell/src/models/file_pending.rs @@ -0,0 +1,35 @@ +use sea_orm::entity::prelude::*; +use serde::{Deserialize, Serialize}; + +#[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq, Serialize, Deserialize)] +#[sea_orm(table_name = "file_pending")] +pub struct Model { + #[sea_orm(primary_key)] + pub pending_file_id: i64, + + #[sea_orm(column_type = "Text")] + pub s3_path: String, + + #[sea_orm(column_type = "Text")] + pub presign_url: String, +} + +#[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] +pub enum Relation { + #[sea_orm( + belongs_to = "super::file::Entity", + from = "Column::FileId", + to = "super::file::Column::FileId", + on_update = "NoAction", + on_delete = "NoAction" + )] + File, +} + +impl Related for Entity { + fn to() -> RelationDef { + Relation::File.def() + } +} + +impl ActiveModelBehavior for ActiveModel {} diff --git a/deepwell/src/models/mod.rs b/deepwell/src/models/mod.rs index 238e1d33ff..af50caf834 100644 --- a/deepwell/src/models/mod.rs +++ b/deepwell/src/models/mod.rs @@ -4,6 +4,7 @@ pub mod prelude; pub mod alias; pub mod file; +pub mod file_pending; pub mod file_revision; pub mod filter; pub mod message; From ee6aedb63ef7a0be909d406a8b16cbc6d45be8b4 Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Sun, 12 May 2024 02:17:54 -0400 Subject: [PATCH 09/91] Start upload code. --- deepwell/src/services/blob/mod.rs | 8 ++- deepwell/src/services/blob/service.rs | 50 +++++++++++--- deepwell/src/services/blob/structs.rs | 8 +-- deepwell/src/services/file/service.rs | 98 +++++++++++++++++++++------ deepwell/src/services/file/structs.rs | 15 +++- 5 files changed, 139 insertions(+), 40 deletions(-) diff --git a/deepwell/src/services/blob/mod.rs b/deepwell/src/services/blob/mod.rs index 1411669dc5..31986dee0c 100644 --- a/deepwell/src/services/blob/mod.rs +++ b/deepwell/src/services/blob/mod.rs @@ -27,6 +27,9 @@ #[allow(unused_imports)] mod prelude { pub use super::super::prelude::*; + pub use super::service::{ + EMPTY_BLOB_HASH, EMPTY_BLOB_MIME, EMPTY_BLOB_TIMESTAMP, PRESIGN_DIRECTORY, + }; pub use super::structs::*; pub use crate::hash::{blob_hash_to_hex, sha512_hash, BlobHash}; } @@ -36,5 +39,8 @@ mod service; mod structs; pub use self::mime::MimeAnalyzer; -pub use self::service::BlobService; +pub use self::service::{ + BlobService, EMPTY_BLOB_HASH, EMPTY_BLOB_MIME, EMPTY_BLOB_TIMESTAMP, + PRESIGN_DIRECTORY, +}; pub use self::structs::*; diff --git a/deepwell/src/services/blob/service.rs b/deepwell/src/services/blob/service.rs index cb8e05a088..a776f63428 100644 --- a/deepwell/src/services/blob/service.rs +++ b/deepwell/src/services/blob/service.rs @@ -22,6 +22,9 @@ #![allow(dead_code)] use super::prelude::*; +use crate::models::file_pending::{ + self, Entity as FilePending, Model as FilePendingModel, +}; use crate::utils::assert_is_csprng; use rand::distributions::{Alphanumeric, DistString}; use rand::thread_rng; @@ -60,10 +63,16 @@ pub struct BlobService; impl BlobService { /// Creates an S3 presign URL to allow an end user to upload a blob. /// + /// Also adds an entry for the pending file upload (`file_pending`), + /// so it can be used by the main `file` table. + /// /// # Returns /// The generated presign URL that can be uploaded to. - pub async fn create_upload(ctx: &ServiceContext<'_>) -> Result { + pub async fn create_upload(ctx: &ServiceContext<'_>) -> Result { let config = ctx.config(); + let txn = ctx.transaction(); + + // Generate random S3 path let s3_path = { let mut path = format!("{PRESIGN_DIRECTORY}/"); @@ -81,21 +90,42 @@ impl BlobService { }; info!("Creating presign upload URL for blob at path {s3_path}"); + // Create presign URL let bucket = ctx.s3_bucket(); - let presign_url = bucket.presign_put(&s3_path, config.presigned_expiry_secs, None)?; - - Ok(CreateUploadOutput { - s3_path, - presign_url, - }) + let presign_url = + bucket.presign_put(&s3_path, config.presigned_expiry_secs, None)?; + + // Add pending file entry + let model = file_pending::ActiveModel { + s3_path: Set(s3_path), + presign_url: Set(presign_url), + ..Default::default() + }; + let output = model.insert(txn)?; + Ok(output) } - pub async fn finish_upload(ctx: &ServiceContext<'_>, upload_path: &str) -> Result { - info!("Finishing upload for blob for path {upload_path}"); + pub async fn finish_upload( + ctx: &ServiceContext<'_>, + pending_file_id: i64, + ) -> Result { + info!("Finishing upload for blob for pending file ID {pending_file_id}"); let bucket = ctx.s3_bucket(); + let txn = ctx.transaction(); + + debug!("Getting pending file info"); + let row = FilePending::find() + .filter(file_pending::Column::PendingFileId.eq(pending_file_id)) + .one(txn) + .await?; + + let pending = match row { + Some(pending) => pending, + None => return Err(Error::GeneralNotFound), + }; debug!("Download uploaded blob from S3 uploads to get metadata"); - let response = bucket.get_object(upload_path).await?; + let response = bucket.get_object(&pending.s3_path).await?; let data: Vec = match response.status_code() { 200 => response.into(), _ => { diff --git a/deepwell/src/services/blob/structs.rs b/deepwell/src/services/blob/structs.rs index c4e58841de..653f5eef51 100644 --- a/deepwell/src/services/blob/structs.rs +++ b/deepwell/src/services/blob/structs.rs @@ -22,13 +22,7 @@ use super::prelude::*; use time::OffsetDateTime; #[derive(Debug)] -pub struct CreateUploadOutput { - pub s3_path: String, - pub presign_url: String, -} - -#[derive(Debug)] -pub struct FinalizeUploadOutput { +pub struct FinalizeBlobUploadOutput { pub hash: BlobHash, pub mime: String, pub size: i64, diff --git a/deepwell/src/services/file/service.rs b/deepwell/src/services/file/service.rs index 12eefe1838..290854c1c2 100644 --- a/deepwell/src/services/file/service.rs +++ b/deepwell/src/services/file/service.rs @@ -20,7 +20,7 @@ use super::prelude::*; use crate::models::file::{self, Entity as File, Model as FileModel}; -use crate::services::blob::CreateBlobOutput; +use crate::services::blob::{FinalizeBlobUploadOutput, EMPTY_BLOB_HASH, EMPTY_BLOB_MIME}; use crate::services::file_revision::{ CreateFileRevision, CreateFileRevisionBody, CreateFirstFileRevision, CreateResurrectionFileRevision, CreateTombstoneFileRevision, FileBlob, @@ -32,11 +32,11 @@ use crate::services::{BlobService, FileRevisionService, FilterService}; pub struct FileService; impl FileService { - /// Uploads a file and tracks it as a separate file entity. + /// Starts a file upload and tracks it as a distinct file entity. /// /// In the background, this stores the blob via content addressing, /// meaning that duplicates are not uploaded twice. - pub async fn upload( + pub async fn start_upload( ctx: &ServiceContext<'_>, UploadFile { site_id, @@ -44,18 +44,11 @@ impl FileService { name, revision_comments, user_id, - data, licensing, bypass_filter, }: UploadFile, ) -> Result { - let txn = ctx.transaction(); - - info!( - "Creating file with name '{}', content length {}", - name, - data.len(), - ); + info!("Creating file with name '{}'", name); // Ensure row consistency Self::check_conflicts(ctx, page_id, &name, "create").await?; @@ -65,20 +58,20 @@ impl FileService { Self::run_filter(ctx, site_id, Some(&name)).await?; } - // Upload to S3, get derived metadata - let CreateBlobOutput { hash, mime, size } = - BlobService::create(ctx, &data).await?; + // Add pending file + let pending = BlobService::create_upload(ctx).await?; // Add new file let model = file::ActiveModel { name: Set(name.clone()), site_id: Set(site_id), page_id: Set(page_id), + pending_file_id: Set(Some(pending.pending_file_id)), ..Default::default() }; let file = model.insert(txn).await?; - // Add new file revision + // Add new file revision (with dummy data) let revision_output = FileRevisionService::create_first( ctx, CreateFirstFileRevision { @@ -87,9 +80,9 @@ impl FileService { file_id: file.file_id, user_id, name, - s3_hash: hash, - size_hint: size, - mime_hint: mime, + s3_hash: EMPTY_BLOB_HASH, + mime_hint: EMPTY_BLOB_MIME, + size_hint: 0, licensing, comments: revision_comments, }, @@ -99,6 +92,69 @@ impl FileService { Ok(revision_output) } + pub async fn finish_upload( + ctx: &ServiceContext<'_>, + FinishUploadFile { + site_id, + page_id, + file_id, + pending_file_id, + }: FinishUploadFile, + ) -> Result { + info!( + "Finishing file upload with site ID {} page ID {} file ID {} pending ID {}", + site_id, page_id, file_id, pending_file_id, + ); + + // Ensure file exists + let txn = ctx.transaction(); + let row = File::find() + .filter( + Condition::all() + .add(file::Column::SiteId.eq(site_id)) + .add(file::Column::PageId.eq(page_id)) + .add(file::Column::FileId.eq(file_id)) + .add(file::Column::DeletedAt.is_null()) + .add(file::Column::PendingFileId.eq(Some(pending_file_id))), + ) + .one(txn) + .await?; + + if row.is_none() { + error!("No pending file found"); + return Err(Error::FileNotFound); + } + + // Get first file revision + let file_revision = FileRevision::find() + .filter( + Condition::all() + .add(file_revision::Column::FileId.eq(file_id)) + .add(file_revision::Column::RevisionNumber.eq(0)) + .add( + file_revision::Column::RevisionType.eq(FileRevisionType::Create), + ), + ) + .one(txn) + .await?; + + // Update file revision to add the uploaded data + let FinalizeUploadOutput { + hash, + mime, + size, + created, + } = BlobService::finish_upload(ctx, pending_file_id).await?; + + let mut model = file_revision.into_active_model(); + model.s3_hash = Set(hash); + model.mime_hint = Set(mime); + model.size_hint = Set(size); + model.update(txn).await?; + + Ok(FinishUploadFileOutput { created }) + } + /// Edits a file, including the ability to upload a new version. pub async fn edit( ctx: &ServiceContext<'_>, @@ -400,7 +456,8 @@ impl FileService { .add(condition) .add(file::Column::SiteId.eq(site_id)) .add(file::Column::PageId.eq(page_id)) - .add(file::Column::DeletedAt.is_null()), + .add(file::Column::DeletedAt.is_null()) + .add(file::Column::PendingFileId.is_null()), ) .one(txn) .await? @@ -435,7 +492,8 @@ impl FileService { Condition::all() .add(file::Column::PageId.eq(page_id)) .add(file::Column::Name.eq(name)) - .add(file::Column::DeletedAt.is_null()), + .add(file::Column::DeletedAt.is_null()) + .add(file::Column::PendingFileId.is_null()), ) .into_tuple() .one(txn) diff --git a/deepwell/src/services/file/structs.rs b/deepwell/src/services/file/structs.rs index cdb44ea368..b15cf608b1 100644 --- a/deepwell/src/services/file/structs.rs +++ b/deepwell/src/services/file/structs.rs @@ -33,7 +33,6 @@ pub struct UploadFile { pub name: String, pub revision_comments: String, pub user_id: i64, - pub data: Bytes<'static>, pub licensing: JsonValue, // TODO #[serde(default)] @@ -42,6 +41,19 @@ pub struct UploadFile { pub type UploadFileOutput = CreateFirstFileRevisionOutput; +#[derive(Deserialize, Debug, Clone)] +pub struct FinishUploadFile { + pub site_id: i64, + pub page_id: i64, + pub file_id: i64, + pub pending_file_id: i64, +} + +#[derive(Serialize, Debug, Copy, Clone)] +pub struct FinishUploadFileOutput { + pub created: bool, +} + #[derive(Deserialize, Debug, Clone)] pub struct GetFile<'a> { pub site_id: i64, @@ -105,7 +117,6 @@ pub struct EditFile { #[serde(default)] pub struct EditFileBody { pub name: ProvidedValue, - pub data: ProvidedValue>, pub licensing: ProvidedValue, } From e9211e6b2ff6ffd4e0ba9c8d2880dd88295dfad4 Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Sun, 12 May 2024 02:28:19 -0400 Subject: [PATCH 10/91] Rename file_pending -> blob_pending. --- .../migrations/20220906103252_deepwell.sql | 27 ++++++-------- deepwell/src/models/blob_pending.rs | 17 +++++++++ deepwell/src/models/file.rs | 1 + deepwell/src/models/file_pending.rs | 35 ------------------- deepwell/src/models/mod.rs | 2 +- deepwell/src/services/blob/service.rs | 14 ++++---- 6 files changed, 37 insertions(+), 59 deletions(-) create mode 100644 deepwell/src/models/blob_pending.rs delete mode 100644 deepwell/src/models/file_pending.rs diff --git a/deepwell/migrations/20220906103252_deepwell.sql b/deepwell/migrations/20220906103252_deepwell.sql index 71365be413..2525c8bda3 100644 --- a/deepwell/migrations/20220906103252_deepwell.sql +++ b/deepwell/migrations/20220906103252_deepwell.sql @@ -412,19 +412,20 @@ CREATE TABLE page_vote ( ); -- --- Files +-- Blobs -- --- Pending uploads to S3 --- --- Stores the presign URL along with the path in the bucket it corresponds to. --- After processing, it is moved to be a real blob (if new) or deleted (if duplicate). -CREATE TABLE blob_upload ( - s3_path TEXT NOT NULL PRIMARY KEY, - presign_url TEXT NOT NULL UNIQUE, - created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT now() -- TODO add job to prune dead upload jobs +-- Manages blobs that are being uploaded by the user +CREATE TABLE blob_pending ( + pending_file_id BIGSERIAL PRIMARY KEY, + s3_path TEXT NOT NULL CHECK length(s3_path) > 1, + presign_url TEXT NOT NULL CHECK length(presign_url) > 1 ); +-- +-- Files +-- + -- Enum types for file_revision CREATE TYPE file_revision_type AS ENUM ( 'create', @@ -440,12 +441,6 @@ CREATE TYPE file_revision_change AS ENUM ( 'licensing' ); -CREATE TABLE file_pending ( - pending_file_id BIGSERIAL PRIMARY KEY, - s3_path TEXT NOT NULL CHECK length(s3_path) > 1, - presign_url TEXT NOT NULL CHECK length(presign_url) > 1 -); - CREATE TABLE file ( file_id BIGSERIAL PRIMARY KEY, created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT now(), @@ -455,7 +450,7 @@ CREATE TABLE file ( name TEXT NOT NULL, page_id BIGINT NOT NULL REFERENCES page(page_id), site_id BIGINT NOT NULL REFERENCES site(site_id), - pending_file_id BIGINT REFERENCES file_pending(pending_file_id), + pending_blob_id BIGINT REFERENCES file_pending(pending_file_id), UNIQUE (page_id, name, deleted_at) ); diff --git a/deepwell/src/models/blob_pending.rs b/deepwell/src/models/blob_pending.rs new file mode 100644 index 0000000000..70ea08d4cd --- /dev/null +++ b/deepwell/src/models/blob_pending.rs @@ -0,0 +1,17 @@ +use sea_orm::entity::prelude::*; +use serde::{Deserialize, Serialize}; + +#[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq, Serialize, Deserialize)] +#[sea_orm(table_name = "blob_pending")] +pub struct Model { + #[sea_orm(primary_key)] + pub pending_blob_id: i64, + + #[sea_orm(column_type = "Text")] + pub s3_path: String, + + #[sea_orm(column_type = "Text")] + pub presign_url: String, +} + +impl ActiveModelBehavior for ActiveModel {} diff --git a/deepwell/src/models/file.rs b/deepwell/src/models/file.rs index ce05ac6f77..76e961ce75 100644 --- a/deepwell/src/models/file.rs +++ b/deepwell/src/models/file.rs @@ -16,6 +16,7 @@ pub struct Model { pub name: String, pub page_id: i64, pub site_id: i64, + pub pending_blob_id: i64, } #[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] diff --git a/deepwell/src/models/file_pending.rs b/deepwell/src/models/file_pending.rs deleted file mode 100644 index 3cc805f3a4..0000000000 --- a/deepwell/src/models/file_pending.rs +++ /dev/null @@ -1,35 +0,0 @@ -use sea_orm::entity::prelude::*; -use serde::{Deserialize, Serialize}; - -#[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq, Serialize, Deserialize)] -#[sea_orm(table_name = "file_pending")] -pub struct Model { - #[sea_orm(primary_key)] - pub pending_file_id: i64, - - #[sea_orm(column_type = "Text")] - pub s3_path: String, - - #[sea_orm(column_type = "Text")] - pub presign_url: String, -} - -#[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] -pub enum Relation { - #[sea_orm( - belongs_to = "super::file::Entity", - from = "Column::FileId", - to = "super::file::Column::FileId", - on_update = "NoAction", - on_delete = "NoAction" - )] - File, -} - -impl Related for Entity { - fn to() -> RelationDef { - Relation::File.def() - } -} - -impl ActiveModelBehavior for ActiveModel {} diff --git a/deepwell/src/models/mod.rs b/deepwell/src/models/mod.rs index af50caf834..3ba7524a08 100644 --- a/deepwell/src/models/mod.rs +++ b/deepwell/src/models/mod.rs @@ -3,8 +3,8 @@ pub mod prelude; pub mod alias; +pub mod blob_pending; pub mod file; -pub mod file_pending; pub mod file_revision; pub mod filter; pub mod message; diff --git a/deepwell/src/services/blob/service.rs b/deepwell/src/services/blob/service.rs index a776f63428..736971298e 100644 --- a/deepwell/src/services/blob/service.rs +++ b/deepwell/src/services/blob/service.rs @@ -22,8 +22,8 @@ #![allow(dead_code)] use super::prelude::*; -use crate::models::file_pending::{ - self, Entity as FilePending, Model as FilePendingModel, +use crate::models::blob_pending::{ + self, Entity as BlobPending, Model as BlobPendingModel, }; use crate::utils::assert_is_csprng; use rand::distributions::{Alphanumeric, DistString}; @@ -68,7 +68,7 @@ impl BlobService { /// /// # Returns /// The generated presign URL that can be uploaded to. - pub async fn create_upload(ctx: &ServiceContext<'_>) -> Result { + pub async fn create_upload(ctx: &ServiceContext<'_>) -> Result { let config = ctx.config(); let txn = ctx.transaction(); @@ -109,13 +109,13 @@ impl BlobService { ctx: &ServiceContext<'_>, pending_file_id: i64, ) -> Result { - info!("Finishing upload for blob for pending file ID {pending_file_id}"); + info!("Finishing upload for blob for pending blob ID {pending_blob_id}"); let bucket = ctx.s3_bucket(); let txn = ctx.transaction(); - debug!("Getting pending file info"); - let row = FilePending::find() - .filter(file_pending::Column::PendingFileId.eq(pending_file_id)) + debug!("Getting pending blob info"); + let row = BlobPending::find() + .filter(file_pending::Column::PendingBlobId.eq(pending_file_id)) .one(txn) .await?; From 8f852daccaceff1c3c991a5c5e375f8588ba98d7 Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Sun, 12 May 2024 03:58:28 -0400 Subject: [PATCH 11/91] Fix compilation. --- deepwell/src/endpoints/file.rs | 4 ++ deepwell/src/models/blob_pending.rs | 3 ++ deepwell/src/models/file.rs | 2 +- deepwell/src/services/blob/service.rs | 34 +++++++------- deepwell/src/services/file/service.rs | 61 +++++++++++++++++-------- deepwell/src/services/file/structs.rs | 2 +- deepwell/src/services/import/service.rs | 4 ++ deepwell/src/services/user/service.rs | 6 ++- 8 files changed, 77 insertions(+), 39 deletions(-) diff --git a/deepwell/src/endpoints/file.rs b/deepwell/src/endpoints/file.rs index 773f19865e..9fcb7a1014 100644 --- a/deepwell/src/endpoints/file.rs +++ b/deepwell/src/endpoints/file.rs @@ -83,6 +83,8 @@ pub async fn file_upload( ctx: &ServiceContext<'_>, params: Params<'static>, ) -> Result { + // FIXME file upload endpoint + /* let input: UploadFile = params.parse()?; info!( @@ -94,6 +96,8 @@ pub async fn file_upload( ); FileService::upload(ctx, input).await + */ + todo!() } pub async fn file_edit( diff --git a/deepwell/src/models/blob_pending.rs b/deepwell/src/models/blob_pending.rs index 70ea08d4cd..8d06fd4e65 100644 --- a/deepwell/src/models/blob_pending.rs +++ b/deepwell/src/models/blob_pending.rs @@ -14,4 +14,7 @@ pub struct Model { pub presign_url: String, } +#[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] +pub enum Relation {} + impl ActiveModelBehavior for ActiveModel {} diff --git a/deepwell/src/models/file.rs b/deepwell/src/models/file.rs index 76e961ce75..b4d97b2aa0 100644 --- a/deepwell/src/models/file.rs +++ b/deepwell/src/models/file.rs @@ -16,7 +16,7 @@ pub struct Model { pub name: String, pub page_id: i64, pub site_id: i64, - pub pending_blob_id: i64, + pub pending_blob_id: Option, } #[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] diff --git a/deepwell/src/services/blob/service.rs b/deepwell/src/services/blob/service.rs index 736971298e..c92244bf27 100644 --- a/deepwell/src/services/blob/service.rs +++ b/deepwell/src/services/blob/service.rs @@ -38,7 +38,7 @@ use time::OffsetDateTime; /// /// Even though it is not the SHA-512 hash, for simplicity we treat the hash /// value with all zeroes to be the blob address for the empty blob. -/// This empty file is not actually stored in S3 but instead is a "virtual file", +/// This empty blob is not actually stored in S3 but instead is a "virtual blob", /// considered to have always been present in `BlobService`. pub const EMPTY_BLOB_HASH: BlobHash = [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -63,8 +63,8 @@ pub struct BlobService; impl BlobService { /// Creates an S3 presign URL to allow an end user to upload a blob. /// - /// Also adds an entry for the pending file upload (`file_pending`), - /// so it can be used by the main `file` table. + /// Also adds an entry for the pending blob upload (`blob_pending`), + /// so it can be used by the main `blob` table. /// /// # Returns /// The generated presign URL that can be uploaded to. @@ -95,19 +95,19 @@ impl BlobService { let presign_url = bucket.presign_put(&s3_path, config.presigned_expiry_secs, None)?; - // Add pending file entry - let model = file_pending::ActiveModel { + // Add pending blob entry + let model = blob_pending::ActiveModel { s3_path: Set(s3_path), presign_url: Set(presign_url), ..Default::default() }; - let output = model.insert(txn)?; + let output = model.insert(txn).await?; Ok(output) } pub async fn finish_upload( ctx: &ServiceContext<'_>, - pending_file_id: i64, + pending_blob_id: i64, ) -> Result { info!("Finishing upload for blob for pending blob ID {pending_blob_id}"); let bucket = ctx.s3_bucket(); @@ -115,21 +115,21 @@ impl BlobService { debug!("Getting pending blob info"); let row = BlobPending::find() - .filter(file_pending::Column::PendingBlobId.eq(pending_file_id)) + .filter(blob_pending::Column::PendingBlobId.eq(pending_blob_id)) .one(txn) .await?; - let pending = match row { + let BlobPendingModel { s3_path, .. } = match row { Some(pending) => pending, None => return Err(Error::GeneralNotFound), }; debug!("Download uploaded blob from S3 uploads to get metadata"); - let response = bucket.get_object(&pending.s3_path).await?; + let response = bucket.get_object(&s3_path).await?; let data: Vec = match response.status_code() { 200 => response.into(), _ => { - error!("Cannot find blob at presign path {upload_path}"); + error!("Cannot find blob at presign path {s3_path}"); return Err(Error::FileNotUploaded); } }; @@ -137,7 +137,7 @@ impl BlobService { // Special handling for empty blobs if data.is_empty() { debug!("File being created is empty, special case"); - return Ok(FinalizeUploadOutput { + return Ok(FinalizeBlobUploadOutput { hash: EMPTY_BLOB_HASH, mime: str!(EMPTY_BLOB_MIME), size: 0, @@ -165,7 +165,7 @@ impl BlobService { // Content-Type header should be returned let mime = result.content_type.ok_or(Error::S3Response)?; - Ok(FinalizeUploadOutput { + Ok(FinalizeBlobUploadOutput { hash, mime, size, @@ -173,11 +173,11 @@ impl BlobService { }) } - // Blob doesn't exist, move the uploaded file + // Blob doesn't exist, move it from uploaded None => { debug!("Blob with hash {hex_hash} to be created"); - // Determine MIME type for the new file + // Determine MIME type for the new blob let mime = ctx.mime().get_mime_type(data.to_vec()).await?; // Upload S3 object to final destination @@ -187,7 +187,7 @@ impl BlobService { // We assume all unexpected statuses are errors, even if 1XX or 2XX match response.status_code() { - 200 => Ok(FinalizeUploadOutput { + 200 => Ok(FinalizeBlobUploadOutput { hash, mime, size, @@ -199,7 +199,7 @@ impl BlobService { }; // Delete uploaded version, in either case - bucket.delete_object(upload_path).await?; + bucket.delete_object(&s3_path).await?; // Return result based on blob status result diff --git a/deepwell/src/services/file/service.rs b/deepwell/src/services/file/service.rs index 290854c1c2..da4c8be92b 100644 --- a/deepwell/src/services/file/service.rs +++ b/deepwell/src/services/file/service.rs @@ -20,6 +20,10 @@ use super::prelude::*; use crate::models::file::{self, Entity as File, Model as FileModel}; +use crate::models::file_revision::{ + self, Entity as FileRevision, Model as FileRevisionModel, +}; +use crate::models::sea_orm_active_enums::FileRevisionType; use crate::services::blob::{FinalizeBlobUploadOutput, EMPTY_BLOB_HASH, EMPTY_BLOB_MIME}; use crate::services::file_revision::{ CreateFileRevision, CreateFileRevisionBody, CreateFirstFileRevision, @@ -66,9 +70,11 @@ impl FileService { name: Set(name.clone()), site_id: Set(site_id), page_id: Set(page_id), - pending_file_id: Set(Some(pending.pending_file_id)), + pending_blob_id: Set(Some(pending.pending_blob_id)), ..Default::default() }; + + let txn = ctx.transaction(); let file = model.insert(txn).await?; // Add new file revision (with dummy data) @@ -81,7 +87,7 @@ impl FileService { user_id, name, s3_hash: EMPTY_BLOB_HASH, - mime_hint: EMPTY_BLOB_MIME, + mime_hint: str!(EMPTY_BLOB_MIME), size_hint: 0, licensing, comments: revision_comments, @@ -98,12 +104,12 @@ impl FileService { site_id, page_id, file_id, - pending_file_id, + pending_blob_id, }: FinishUploadFile, ) -> Result { info!( "Finishing file upload with site ID {} page ID {} file ID {} pending ID {}", - site_id, page_id, file_id, pending_file_id, + site_id, page_id, file_id, pending_blob_id, ); // Ensure file exists @@ -115,7 +121,7 @@ impl FileService { .add(file::Column::PageId.eq(page_id)) .add(file::Column::FileId.eq(file_id)) .add(file::Column::DeletedAt.is_null()) - .add(file::Column::PendingFileId.eq(Some(pending_file_id))), + .add(file::Column::PendingBlobId.eq(Some(pending_blob_id))), ) .one(txn) .await?; @@ -138,16 +144,31 @@ impl FileService { .one(txn) .await?; + let file_revision = match file_revision { + Some(file_revision) => file_revision, + None => return Err(Error::FileNotFound), + }; + + // Delete the pending blob row + let mut model = file::ActiveModel { + file_id: Set(file_id), + pending_blob_id: Set(None), + ..Default::default() + }; + model.update(txn).await?; + + File::delete_by_id(pending_blob_id).exec(txn).await?; + // Update file revision to add the uploaded data - let FinalizeUploadOutput { + let FinalizeBlobUploadOutput { hash, mime, size, created, - } = BlobService::finish_upload(ctx, pending_file_id).await?; + } = BlobService::finish_upload(ctx, pending_blob_id).await?; let mut model = file_revision.into_active_model(); - model.s3_hash = Set(hash); + model.s3_hash = Set(hash.to_vec()); model.mime_hint = Set(mime); model.size_hint = Set(size); model.update(txn).await?; @@ -174,11 +195,7 @@ impl FileService { let last_revision = FileRevisionService::get_latest(ctx, site_id, page_id, file_id).await?; - let EditFileBody { - name, - data, - licensing, - } = body; + let EditFileBody { name, licensing } = body; // Verify name change // @@ -193,11 +210,17 @@ impl FileService { } // Upload to S3, get derived metadata + // FIXME upload new file revision + /* let blob = match data { ProvidedValue::Unset => ProvidedValue::Unset, ProvidedValue::Set(bytes) => { - let CreateBlobOutput { hash, mime, size } = - BlobService::create(ctx, &bytes).await?; + let FinalizeBlobUploadOutput { + hash, + mime, + size, + created: _, + } = BlobService::finalize_upload(ctx, &bytes).await?; ProvidedValue::Set(FileBlob { s3_hash: hash, @@ -206,8 +229,8 @@ impl FileService { }) } }; - - // Make database changes + */ + let blob = ProvidedValue::Unset; // Update file metadata let model = file::ActiveModel { @@ -457,7 +480,7 @@ impl FileService { .add(file::Column::SiteId.eq(site_id)) .add(file::Column::PageId.eq(page_id)) .add(file::Column::DeletedAt.is_null()) - .add(file::Column::PendingFileId.is_null()), + .add(file::Column::PendingBlobId.is_null()), ) .one(txn) .await? @@ -493,7 +516,7 @@ impl FileService { .add(file::Column::PageId.eq(page_id)) .add(file::Column::Name.eq(name)) .add(file::Column::DeletedAt.is_null()) - .add(file::Column::PendingFileId.is_null()), + .add(file::Column::PendingBlobId.is_null()), ) .into_tuple() .one(txn) diff --git a/deepwell/src/services/file/structs.rs b/deepwell/src/services/file/structs.rs index b15cf608b1..bb15ff97f9 100644 --- a/deepwell/src/services/file/structs.rs +++ b/deepwell/src/services/file/structs.rs @@ -46,7 +46,7 @@ pub struct FinishUploadFile { pub site_id: i64, pub page_id: i64, pub file_id: i64, - pub pending_file_id: i64, + pub pending_blob_id: i64, } #[derive(Serialize, Debug, Copy, Clone)] diff --git a/deepwell/src/services/import/service.rs b/deepwell/src/services/import/service.rs index a0827f7ac4..f3ec50cb27 100644 --- a/deepwell/src/services/import/service.rs +++ b/deepwell/src/services/import/service.rs @@ -68,8 +68,12 @@ impl ImportService { let avatar_s3_hash = match avatar { None => None, Some(bytes) => { + // FIXME import - uploading avatars + /* let output = BlobService::create(ctx, &bytes).await?; Some(output.hash.to_vec()) + */ + todo!() } }; diff --git a/deepwell/src/services/user/service.rs b/deepwell/src/services/user/service.rs index fb5bff0b2a..a88069f938 100644 --- a/deepwell/src/services/user/service.rs +++ b/deepwell/src/services/user/service.rs @@ -22,7 +22,7 @@ use super::prelude::*; use crate::models::sea_orm_active_enums::{AliasType, UserType}; use crate::models::user::{self, Entity as User, Model as UserModel}; use crate::services::alias::CreateAlias; -use crate::services::blob::{BlobService, CreateBlobOutput}; +use crate::services::blob::{BlobService, FinalizeBlobUploadOutput}; use crate::services::email::{EmailClassification, EmailService}; use crate::services::filter::{FilterClass, FilterType}; use crate::services::{AliasService, FilterService, PasswordService}; @@ -425,10 +425,14 @@ impl UserService { let s3_hash = match avatar { None => None, Some(blob) => { + // FIXME blob upload + /* let CreateBlobOutput { hash, .. } = BlobService::create(ctx, &blob).await?; Some(hash.to_vec()) + */ + todo!() } }; From f89f871fbccebdbb5c5124df250bb214e009db12 Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Wed, 12 Jun 2024 07:21:42 -0400 Subject: [PATCH 12/91] [WIP] Start division between new and edit file uploads. --- deepwell/src/services/blob/service.rs | 3 + deepwell/src/services/file/service.rs | 123 ++++++++++++++++++-------- deepwell/src/services/file/structs.rs | 29 +++++- 3 files changed, 115 insertions(+), 40 deletions(-) diff --git a/deepwell/src/services/blob/service.rs b/deepwell/src/services/blob/service.rs index c92244bf27..91c71bd767 100644 --- a/deepwell/src/services/blob/service.rs +++ b/deepwell/src/services/blob/service.rs @@ -134,6 +134,9 @@ impl BlobService { } }; + debug!("Deleting pending blob"); + BlobPending::delete_by_id(pending_blob_id).exec(txn).await?; + // Special handling for empty blobs if data.is_empty() { debug!("File being created is empty, special case"); diff --git a/deepwell/src/services/file/service.rs b/deepwell/src/services/file/service.rs index da4c8be92b..a025ec65a9 100644 --- a/deepwell/src/services/file/service.rs +++ b/deepwell/src/services/file/service.rs @@ -36,13 +36,15 @@ use crate::services::{BlobService, FileRevisionService, FilterService}; pub struct FileService; impl FileService { + /// Creates a new file. + /// /// Starts a file upload and tracks it as a distinct file entity. /// /// In the background, this stores the blob via content addressing, /// meaning that duplicates are not uploaded twice. - pub async fn start_upload( + pub async fn start_new_upload( ctx: &ServiceContext<'_>, - UploadFile { + UploadNewFile { site_id, page_id, name, @@ -50,9 +52,10 @@ impl FileService { user_id, licensing, bypass_filter, - }: UploadFile, + }: UploadNewFile, ) -> Result { info!("Creating file with name '{}'", name); + let txn = ctx.transaction(); // Ensure row consistency Self::check_conflicts(ctx, page_id, &name, "create").await?; @@ -74,10 +77,9 @@ impl FileService { ..Default::default() }; - let txn = ctx.transaction(); let file = model.insert(txn).await?; - // Add new file revision (with dummy data) + // Add file revision (with dummy file data) let revision_output = FileRevisionService::create_first( ctx, CreateFirstFileRevision { @@ -98,17 +100,17 @@ impl FileService { Ok(revision_output) } - pub async fn finish_upload( + pub async fn finish_new_upload( ctx: &ServiceContext<'_>, - FinishUploadFile { + FinishUploadNewFile { site_id, page_id, file_id, pending_blob_id, - }: FinishUploadFile, + }: FinishUploadNewFile, ) -> Result { info!( - "Finishing file upload with site ID {} page ID {} file ID {} pending ID {}", + "Finishing new file upload with site ID {} page ID {} file ID {} pending ID {}", site_id, page_id, file_id, pending_blob_id, ); @@ -157,8 +159,6 @@ impl FileService { }; model.update(txn).await?; - File::delete_by_id(pending_blob_id).exec(txn).await?; - // Update file revision to add the uploaded data let FinalizeBlobUploadOutput { hash, @@ -176,7 +176,82 @@ impl FileService { Ok(FinishUploadFileOutput { created }) } - /// Edits a file, including the ability to upload a new version. + /// Edits a file, uploading a new file version. + pub async fn start_edit_upload( + ctx: &ServiceContext<'_>, + UploadFileEdit { + site_id, + page_id, + file_id, + user_id, + revision_comments, + }: UploadFileEdit, + ) -> Result<_UploadFileEditOutput> { + info!("Uploading new version to file ID {file_id}"); + + let txn = ctx.transaction(); + let last_revision = + FileRevisionService::get_latest(ctx, site_id, page_id, file_id).await?; + + // Add pending file + let pending = BlobService::create_upload(ctx).await?; + + // Add file revision (with dummy file data) + let revision_output = FileRevisionService::create( + ctx, + CreateFileRevision { + site_id, + page_id, + file_id, + user_id, + comments: revision_comments, + body: CreateFileRevisionBody { + blob: FileBlob { + s3_hash: EMPTY_BLOB_HASH, + mime_hint: str!(EMPTY_BLOB_MIME), + size_hint: 0, + }, + ..Default::default() + }, + }, + last_revision, + ) + .await?; + + Ok(revision_output) + } + + pub async fn finish_edit_upload( + ctx: &ServiceContext<'_>, + FinishUploadFileEdit { + site_id, + page_id, + file_id, + pending_blob_id, + }: FinishUploadFileEdit, + ) -> Result<_> { + info!( + "Finishing file edit upload with site ID {} page ID {} file ID {} pending ID {}", + site_id, page_id, file_id, pending_blob_id, + ); + + // Get latest file revision + // TODO + + // Update file metadata + let model = file::ActiveModel { + file_id: Set(file_id), + updated_at: Set(Some(now())), + ..Default::default() + }; + model.update(txn).await?; + + todo!() + } + + /// Edits a file, creating a new revision. + /// + /// Cannot be used to upload a new file version. pub async fn edit( ctx: &ServiceContext<'_>, EditFile { @@ -209,29 +284,6 @@ impl FileService { } } - // Upload to S3, get derived metadata - // FIXME upload new file revision - /* - let blob = match data { - ProvidedValue::Unset => ProvidedValue::Unset, - ProvidedValue::Set(bytes) => { - let FinalizeBlobUploadOutput { - hash, - mime, - size, - created: _, - } = BlobService::finalize_upload(ctx, &bytes).await?; - - ProvidedValue::Set(FileBlob { - s3_hash: hash, - size_hint: size, - mime_hint: mime, - }) - } - }; - */ - let blob = ProvidedValue::Unset; - // Update file metadata let model = file::ActiveModel { file_id: Set(file_id), @@ -251,7 +303,6 @@ impl FileService { comments: revision_comments, body: CreateFileRevisionBody { name, - blob, licensing, ..Default::default() }, diff --git a/deepwell/src/services/file/structs.rs b/deepwell/src/services/file/structs.rs index bb15ff97f9..3e05110d0b 100644 --- a/deepwell/src/services/file/structs.rs +++ b/deepwell/src/services/file/structs.rs @@ -27,7 +27,7 @@ use serde_json::Value as JsonValue; use time::OffsetDateTime; #[derive(Deserialize, Debug, Clone)] -pub struct UploadFile { +pub struct UploadNewFile { pub site_id: i64, pub page_id: i64, pub name: String, @@ -39,10 +39,11 @@ pub struct UploadFile { pub bypass_filter: bool, } -pub type UploadFileOutput = CreateFirstFileRevisionOutput; +// TODO +pub type UploadNewFileOutput = CreateFirstFileRevisionOutput; #[derive(Deserialize, Debug, Clone)] -pub struct FinishUploadFile { +pub struct FinishUploadNewFile { pub site_id: i64, pub page_id: i64, pub file_id: i64, @@ -50,10 +51,30 @@ pub struct FinishUploadFile { } #[derive(Serialize, Debug, Copy, Clone)] -pub struct FinishUploadFileOutput { +pub struct FinishUploadNewFileOutput { pub created: bool, } +#[derive(Deserialize, Debug, Clone)] +pub struct UploadFileEdit { + pub site_id: i64, + pub page_id: i64, + pub file_id: i64, + pub user_id: i64, + pub revision_comments: String, +} + +pub type UploadFileEditOutput = CreateFileRevisionOutput; + +#[derive(Deserialize, Debug, Clone)] +pub struct FinishUploadFileEdit { +} + +#[derive(Serialize, Debug, Clone)] +pub struct FinishUploadFileEditOutput { + // TODO +} + #[derive(Deserialize, Debug, Clone)] pub struct GetFile<'a> { pub site_id: i64, From 7e25aef9af56c6506cbd492310293c4db29fcffb Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Sun, 8 Sep 2024 19:18:27 -0400 Subject: [PATCH 13/91] Add created_at column to blob_pending. --- deepwell/migrations/20220906103252_deepwell.sql | 1 + deepwell/src/models/blob_pending.rs | 1 + 2 files changed, 2 insertions(+) diff --git a/deepwell/migrations/20220906103252_deepwell.sql b/deepwell/migrations/20220906103252_deepwell.sql index 2525c8bda3..423eef789e 100644 --- a/deepwell/migrations/20220906103252_deepwell.sql +++ b/deepwell/migrations/20220906103252_deepwell.sql @@ -418,6 +418,7 @@ CREATE TABLE page_vote ( -- Manages blobs that are being uploaded by the user CREATE TABLE blob_pending ( pending_file_id BIGSERIAL PRIMARY KEY, + created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT now(), s3_path TEXT NOT NULL CHECK length(s3_path) > 1, presign_url TEXT NOT NULL CHECK length(presign_url) > 1 ); diff --git a/deepwell/src/models/blob_pending.rs b/deepwell/src/models/blob_pending.rs index 8d06fd4e65..a03282ac97 100644 --- a/deepwell/src/models/blob_pending.rs +++ b/deepwell/src/models/blob_pending.rs @@ -6,6 +6,7 @@ use serde::{Deserialize, Serialize}; pub struct Model { #[sea_orm(primary_key)] pub pending_blob_id: i64, + pub created_at: TimeDateTimeWithTimeZone, #[sea_orm(column_type = "Text")] pub s3_path: String, From b9813c93ad7d5aa2651c46c7a539812931ab83c6 Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Sun, 8 Sep 2024 19:21:51 -0400 Subject: [PATCH 14/91] Use find_by_id() instead of find(). --- deepwell/src/services/blob/service.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/deepwell/src/services/blob/service.rs b/deepwell/src/services/blob/service.rs index 91c71bd767..b647a71ef5 100644 --- a/deepwell/src/services/blob/service.rs +++ b/deepwell/src/services/blob/service.rs @@ -114,8 +114,7 @@ impl BlobService { let txn = ctx.transaction(); debug!("Getting pending blob info"); - let row = BlobPending::find() - .filter(blob_pending::Column::PendingBlobId.eq(pending_blob_id)) + let row = BlobPending::find_by_id(pending_blob_id) .one(txn) .await?; From b36b64c2a812f72d0fa2f2f90d96cb325123c78a Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Sun, 8 Sep 2024 21:15:31 -0400 Subject: [PATCH 15/91] Run rustfmt. --- deepwell/src/services/blob/service.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/deepwell/src/services/blob/service.rs b/deepwell/src/services/blob/service.rs index b647a71ef5..7b13752d8f 100644 --- a/deepwell/src/services/blob/service.rs +++ b/deepwell/src/services/blob/service.rs @@ -114,10 +114,7 @@ impl BlobService { let txn = ctx.transaction(); debug!("Getting pending blob info"); - let row = BlobPending::find_by_id(pending_blob_id) - .one(txn) - .await?; - + let row = BlobPending::find_by_id(pending_blob_id).one(txn).await?; let BlobPendingModel { s3_path, .. } = match row { Some(pending) => pending, None => return Err(Error::GeneralNotFound), From 8d967add0fdc590b070891829541a8917d53a6de Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Sun, 8 Sep 2024 23:11:48 -0400 Subject: [PATCH 16/91] Add FileRevisionService::create_pending(). --- deepwell/src/services/file/service.rs | 12 ++----- deepwell/src/services/file/structs.rs | 3 +- .../src/services/file_revision/service.rs | 36 ++++++++++++++++--- .../src/services/file_revision/structs.rs | 11 ++++++ 4 files changed, 47 insertions(+), 15 deletions(-) diff --git a/deepwell/src/services/file/service.rs b/deepwell/src/services/file/service.rs index a025ec65a9..c5f05e4b30 100644 --- a/deepwell/src/services/file/service.rs +++ b/deepwell/src/services/file/service.rs @@ -79,25 +79,19 @@ impl FileService { let file = model.insert(txn).await?; - // Add file revision (with dummy file data) - let revision_output = FileRevisionService::create_first( + FileRevisionService::create_pending( ctx, - CreateFirstFileRevision { + CreatePendingFileRevision { site_id, page_id, file_id: file.file_id, user_id, name, - s3_hash: EMPTY_BLOB_HASH, - mime_hint: str!(EMPTY_BLOB_MIME), - size_hint: 0, licensing, comments: revision_comments, }, ) - .await?; - - Ok(revision_output) + .await } pub async fn finish_new_upload( diff --git a/deepwell/src/services/file/structs.rs b/deepwell/src/services/file/structs.rs index 3e05110d0b..5450c5bab2 100644 --- a/deepwell/src/services/file/structs.rs +++ b/deepwell/src/services/file/structs.rs @@ -67,8 +67,7 @@ pub struct UploadFileEdit { pub type UploadFileEditOutput = CreateFileRevisionOutput; #[derive(Deserialize, Debug, Clone)] -pub struct FinishUploadFileEdit { -} +pub struct FinishUploadFileEdit {} #[derive(Serialize, Debug, Clone)] pub struct FinishUploadFileEditOutput { diff --git a/deepwell/src/services/file_revision/service.rs b/deepwell/src/services/file_revision/service.rs index 3af73a3d6d..d0431e388d 100644 --- a/deepwell/src/services/file_revision/service.rs +++ b/deepwell/src/services/file_revision/service.rs @@ -169,12 +169,40 @@ impl FileRevisionService { })) } - /// Creates the first revision for a newly-uploaded file. + /// Creates a dummy first revision for a file pending upload. + pub async fn create_pending( + ctx: &ServiceContext<'_>, + CreatePendingFileRevision { + site_id, + page_id, + file_id, + user_id, + name, + licensing, + comments, + }: CreatePendingFileRevision, + ) -> Result { + FileRevisionService::create_first( + ctx, + CreateFirstFileRevision { + site_id, + page_id, + file_id, + user_id, + name, + s3_hash: EMPTY_BLOB_HASH, + mime_hint: str!(EMPTY_BLOB_MIME), + size_hint: 0, + licensing, + comments, + }, + ) + .await + } + + /// Creates the first revision for an already-uploaded file. /// /// See `RevisionService::create_first()`. - /// - /// # Panics - /// If the given previous revision is for a different file or page, this method will panic. pub async fn create_first( ctx: &ServiceContext<'_>, CreateFirstFileRevision { diff --git a/deepwell/src/services/file_revision/structs.rs b/deepwell/src/services/file_revision/structs.rs index 3044bb52a8..e8d791d45b 100644 --- a/deepwell/src/services/file_revision/structs.rs +++ b/deepwell/src/services/file_revision/structs.rs @@ -68,6 +68,17 @@ pub struct CreateFirstFileRevision { pub comments: String, } +#[derive(Debug, Clone)] +pub struct CreatePendingFileRevision { + pub site_id: i64, + pub page_id: i64, + pub file_id: i64, + pub user_id: i64, + pub name: String, + pub licensing: serde_json::Value, + pub comments: String, +} + #[derive(Serialize, Debug, Clone, Default)] pub struct CreateFirstFileRevisionOutput { pub file_id: i64, From 2d27a6869d04baabc5ab44b577c4d7fb24931370 Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Sun, 8 Sep 2024 23:17:30 -0400 Subject: [PATCH 17/91] Add FileRevisionService::get_first(). --- deepwell/src/services/file/service.rs | 17 +------------ .../src/services/file_revision/service.rs | 25 +++++++++++++++++++ 2 files changed, 26 insertions(+), 16 deletions(-) diff --git a/deepwell/src/services/file/service.rs b/deepwell/src/services/file/service.rs index c5f05e4b30..a57914daf1 100644 --- a/deepwell/src/services/file/service.rs +++ b/deepwell/src/services/file/service.rs @@ -128,22 +128,7 @@ impl FileService { } // Get first file revision - let file_revision = FileRevision::find() - .filter( - Condition::all() - .add(file_revision::Column::FileId.eq(file_id)) - .add(file_revision::Column::RevisionNumber.eq(0)) - .add( - file_revision::Column::RevisionType.eq(FileRevisionType::Create), - ), - ) - .one(txn) - .await?; - - let file_revision = match file_revision { - Some(file_revision) => file_revision, - None => return Err(Error::FileNotFound), - }; + let file_revision = FileRevisionService::get_first(ctx, site_id, page_id, file_id).await?; // Delete the pending blob row let mut model = file::ActiveModel { diff --git a/deepwell/src/services/file_revision/service.rs b/deepwell/src/services/file_revision/service.rs index d0431e388d..cf5f1b34b0 100644 --- a/deepwell/src/services/file_revision/service.rs +++ b/deepwell/src/services/file_revision/service.rs @@ -443,6 +443,31 @@ impl FileRevisionService { Ok(revision) } + /// Get the first revision for this file. + pub async fn get_first( + ctx: &ServiceContext<'_>, + site_id: i64, + page_id: i64, + file_id: i64, + ) -> Result { + let model = FileRevision::find() + .filter( + Condition::all() + .add(file_revision::Column::SiteId.eq(site_id)) + .add(file_revision::Column::PageId.eq(page_id)) + .add(file_revision::Column::FileId.eq(file_id)) + .add(file_revision::Column::RevisionNumber.eq(0)) + .add( + file_revision::Column::RevisionType.eq(FileRevisionType::Create), + ), + ) + .one(txn) + .await? + .ok_or(Error::FileRevisionNotFound)?; + + Ok(model) + } + /// Get the latest revision for this file. /// /// See `RevisionService::get_latest()`. From e11c41637cd6b2b5156e00cfced9ffa63e051fa8 Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Sun, 8 Sep 2024 23:28:24 -0400 Subject: [PATCH 18/91] Update comment. --- deepwell/src/services/file/service.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deepwell/src/services/file/service.rs b/deepwell/src/services/file/service.rs index a57914daf1..dfec6ef99c 100644 --- a/deepwell/src/services/file/service.rs +++ b/deepwell/src/services/file/service.rs @@ -130,7 +130,7 @@ impl FileService { // Get first file revision let file_revision = FileRevisionService::get_first(ctx, site_id, page_id, file_id).await?; - // Delete the pending blob row + // Remove pending_blob connection let mut model = file::ActiveModel { file_id: Set(file_id), pending_blob_id: Set(None), From 51f3fa05f86920651e68037acc8b6d6702be6e43 Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Sun, 8 Sep 2024 23:44:47 -0400 Subject: [PATCH 19/91] Rename structs. --- deepwell/src/services/file/service.rs | 10 +++++----- deepwell/src/services/file/structs.rs | 8 ++++---- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/deepwell/src/services/file/service.rs b/deepwell/src/services/file/service.rs index dfec6ef99c..9dccb662aa 100644 --- a/deepwell/src/services/file/service.rs +++ b/deepwell/src/services/file/service.rs @@ -44,7 +44,7 @@ impl FileService { /// meaning that duplicates are not uploaded twice. pub async fn start_new_upload( ctx: &ServiceContext<'_>, - UploadNewFile { + StartFileUpload { site_id, page_id, name, @@ -52,8 +52,8 @@ impl FileService { user_id, licensing, bypass_filter, - }: UploadNewFile, - ) -> Result { + }: StartFileUpload, + ) -> Result { info!("Creating file with name '{}'", name); let txn = ctx.transaction(); @@ -96,12 +96,12 @@ impl FileService { pub async fn finish_new_upload( ctx: &ServiceContext<'_>, - FinishUploadNewFile { + FinishUploadFile { site_id, page_id, file_id, pending_blob_id, - }: FinishUploadNewFile, + }: FinishUploadFile, ) -> Result { info!( "Finishing new file upload with site ID {} page ID {} file ID {} pending ID {}", diff --git a/deepwell/src/services/file/structs.rs b/deepwell/src/services/file/structs.rs index 5450c5bab2..9b52d9ac86 100644 --- a/deepwell/src/services/file/structs.rs +++ b/deepwell/src/services/file/structs.rs @@ -27,7 +27,7 @@ use serde_json::Value as JsonValue; use time::OffsetDateTime; #[derive(Deserialize, Debug, Clone)] -pub struct UploadNewFile { +pub struct StartFileUpload { pub site_id: i64, pub page_id: i64, pub name: String, @@ -40,10 +40,10 @@ pub struct UploadNewFile { } // TODO -pub type UploadNewFileOutput = CreateFirstFileRevisionOutput; +pub type StartFileUploadOutput = CreateFirstFileRevisionOutput; #[derive(Deserialize, Debug, Clone)] -pub struct FinishUploadNewFile { +pub struct FinishUploadFile { pub site_id: i64, pub page_id: i64, pub file_id: i64, @@ -51,7 +51,7 @@ pub struct FinishUploadNewFile { } #[derive(Serialize, Debug, Copy, Clone)] -pub struct FinishUploadNewFileOutput { +pub struct FinishUploadFileOutput { pub created: bool, } From 657818d241da363c51637024656359a48d49baa3 Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Sun, 8 Sep 2024 23:47:02 -0400 Subject: [PATCH 20/91] Run rustfmt. --- deepwell/src/services/file/service.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/deepwell/src/services/file/service.rs b/deepwell/src/services/file/service.rs index 9dccb662aa..b40511f1a2 100644 --- a/deepwell/src/services/file/service.rs +++ b/deepwell/src/services/file/service.rs @@ -128,7 +128,8 @@ impl FileService { } // Get first file revision - let file_revision = FileRevisionService::get_first(ctx, site_id, page_id, file_id).await?; + let file_revision = + FileRevisionService::get_first(ctx, site_id, page_id, file_id).await?; // Remove pending_blob connection let mut model = file::ActiveModel { From ba27c9f6e78aba7a7d4fdc25dcf42540d76cb2bf Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Sun, 8 Sep 2024 23:56:50 -0400 Subject: [PATCH 21/91] Add proper StartFileUploadOutput struct. --- deepwell/src/services/file/service.rs | 13 +++++++++---- deepwell/src/services/file/structs.rs | 8 ++++++-- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/deepwell/src/services/file/service.rs b/deepwell/src/services/file/service.rs index b40511f1a2..6e8380075a 100644 --- a/deepwell/src/services/file/service.rs +++ b/deepwell/src/services/file/service.rs @@ -78,8 +78,7 @@ impl FileService { }; let file = model.insert(txn).await?; - - FileRevisionService::create_pending( + let file_revision = FileRevisionService::create_pending( ctx, CreatePendingFileRevision { site_id, @@ -91,7 +90,13 @@ impl FileService { comments: revision_comments, }, ) - .await + .await?; + + Ok(StartFileUploadOutput { + pending_blob_id: pending.pending_blob_id, + presign_url: pending.presign_url, + file_revision_id: file_revision.file_revision_id, + }) } pub async fn finish_new_upload( @@ -108,7 +113,7 @@ impl FileService { site_id, page_id, file_id, pending_blob_id, ); - // Ensure file exists + // Ensure a pending file exists let txn = ctx.transaction(); let row = File::find() .filter( diff --git a/deepwell/src/services/file/structs.rs b/deepwell/src/services/file/structs.rs index 9b52d9ac86..4cf87c520b 100644 --- a/deepwell/src/services/file/structs.rs +++ b/deepwell/src/services/file/structs.rs @@ -39,8 +39,12 @@ pub struct StartFileUpload { pub bypass_filter: bool, } -// TODO -pub type StartFileUploadOutput = CreateFirstFileRevisionOutput; +#[derive(Serialize, Debug, Clone)] +pub struct StartFileUploadOutput { + pub pending_blob_id: i64, + pub presign_url: String, + pub file_revision_id: i64, +} #[derive(Deserialize, Debug, Clone)] pub struct FinishUploadFile { From 0284984312294a2fb88d6d75277bc6b7a9d8ee38 Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Mon, 9 Sep 2024 00:14:59 -0400 Subject: [PATCH 22/91] Reword column clear again. --- deepwell/src/services/file/service.rs | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/deepwell/src/services/file/service.rs b/deepwell/src/services/file/service.rs index 6e8380075a..67a401a552 100644 --- a/deepwell/src/services/file/service.rs +++ b/deepwell/src/services/file/service.rs @@ -136,13 +136,15 @@ impl FileService { let file_revision = FileRevisionService::get_first(ctx, site_id, page_id, file_id).await?; - // Remove pending_blob connection - let mut model = file::ActiveModel { - file_id: Set(file_id), - pending_blob_id: Set(None), - ..Default::default() - }; - model.update(txn).await?; + // Clear pending_blob column + { + let mut model = file::ActiveModel { + file_id: Set(file_id), + pending_blob_id: Set(None), + ..Default::default() + }; + model.update(txn).await?; + } // Update file revision to add the uploaded data let FinalizeBlobUploadOutput { From bf66088d469652f81b93643d1467e46239958c43 Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Mon, 9 Sep 2024 22:41:13 -0400 Subject: [PATCH 23/91] Update comments. --- deepwell/src/services/file/service.rs | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/deepwell/src/services/file/service.rs b/deepwell/src/services/file/service.rs index 67a401a552..de4b740140 100644 --- a/deepwell/src/services/file/service.rs +++ b/deepwell/src/services/file/service.rs @@ -147,6 +147,7 @@ impl FileService { } // Update file revision to add the uploaded data + // This deletes the pending blob row let FinalizeBlobUploadOutput { hash, mime, @@ -154,11 +155,14 @@ impl FileService { created, } = BlobService::finish_upload(ctx, pending_blob_id).await?; - let mut model = file_revision.into_active_model(); - model.s3_hash = Set(hash.to_vec()); - model.mime_hint = Set(mime); - model.size_hint = Set(size); - model.update(txn).await?; + // Update first file revision with uploaded data + { + let mut model = file_revision.into_active_model(); + model.s3_hash = Set(hash.to_vec()); + model.mime_hint = Set(mime); + model.size_hint = Set(size); + model.update(txn).await?; + } Ok(FinishUploadFileOutput { created }) } From ddba81d0d37607a0114e2d8b2d4caf8bf4b5a99f Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Mon, 9 Sep 2024 22:41:54 -0400 Subject: [PATCH 24/91] Remove dead_code suppression. --- deepwell/src/services/blob/structs.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/deepwell/src/services/blob/structs.rs b/deepwell/src/services/blob/structs.rs index 653f5eef51..4a800818aa 100644 --- a/deepwell/src/services/blob/structs.rs +++ b/deepwell/src/services/blob/structs.rs @@ -29,7 +29,6 @@ pub struct FinalizeBlobUploadOutput { } #[derive(Debug)] -#[allow(dead_code)] // TEMP pub struct BlobMetadata { pub mime: String, pub size: i64, From b0bff98a5c7d3883f685d6b8976bb9d81f96143a Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Mon, 9 Sep 2024 23:36:03 -0400 Subject: [PATCH 25/91] Add TODOs for incomplete file pruning jobs. --- deepwell/src/services/job/structs.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/deepwell/src/services/job/structs.rs b/deepwell/src/services/job/structs.rs index 8f483e98ac..5a863f8630 100644 --- a/deepwell/src/services/job/structs.rs +++ b/deepwell/src/services/job/structs.rs @@ -28,6 +28,8 @@ pub enum Job { }, PruneSessions, PruneText, + // TODO add job for pruning incomplete uploads (pending_blob table) + // TODO also add a job (file table) NameChangeRefill, LiftExpiredPunishments, } From 26f345d6d3a265b5c8e6ab2b5f59700a949fcc2f Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Mon, 9 Sep 2024 23:44:03 -0400 Subject: [PATCH 26/91] Add FileRevisionService::finish_upload(). And complete FileService::finish_new_upload(). --- deepwell/src/services/blob/service.rs | 7 ++-- deepwell/src/services/file/service.rs | 35 +++++++------------ .../src/services/file_revision/service.rs | 34 ++++++++++++++++++ .../src/services/file_revision/structs.rs | 8 +++++ 4 files changed, 59 insertions(+), 25 deletions(-) diff --git a/deepwell/src/services/blob/service.rs b/deepwell/src/services/blob/service.rs index 7b13752d8f..c7d5799d23 100644 --- a/deepwell/src/services/blob/service.rs +++ b/deepwell/src/services/blob/service.rs @@ -109,7 +109,7 @@ impl BlobService { ctx: &ServiceContext<'_>, pending_blob_id: i64, ) -> Result { - info!("Finishing upload for blob for pending blob ID {pending_blob_id}"); + info!("Finishing upload for blob for pending ID {pending_blob_id}"); let bucket = ctx.s3_bucket(); let txn = ctx.transaction(); @@ -161,6 +161,9 @@ impl BlobService { Some(result) => { debug!("Blob with hash {hex_hash} already exists"); + // TODO: Should we ever update the mime type? + // In case of changing file formats, etc. + // Content-Type header should be returned let mime = result.content_type.ok_or(Error::S3Response)?; @@ -172,7 +175,7 @@ impl BlobService { }) } - // Blob doesn't exist, move it from uploaded + // Blob doesn't exist, "move" it None => { debug!("Blob with hash {hex_hash} to be created"); diff --git a/deepwell/src/services/file/service.rs b/deepwell/src/services/file/service.rs index de4b740140..84b709a77b 100644 --- a/deepwell/src/services/file/service.rs +++ b/deepwell/src/services/file/service.rs @@ -132,10 +132,6 @@ impl FileService { return Err(Error::FileNotFound); } - // Get first file revision - let file_revision = - FileRevisionService::get_first(ctx, site_id, page_id, file_id).await?; - // Clear pending_blob column { let mut model = file::ActiveModel { @@ -146,25 +142,18 @@ impl FileService { model.update(txn).await?; } - // Update file revision to add the uploaded data - // This deletes the pending blob row - let FinalizeBlobUploadOutput { - hash, - mime, - size, - created, - } = BlobService::finish_upload(ctx, pending_blob_id).await?; - - // Update first file revision with uploaded data - { - let mut model = file_revision.into_active_model(); - model.s3_hash = Set(hash.to_vec()); - model.mime_hint = Set(mime); - model.size_hint = Set(size); - model.update(txn).await?; - } - - Ok(FinishUploadFileOutput { created }) + // Finally, update the first file revision with the uploaded data. + // This gets the data from BlobService and then deletes the row. + FileRevisionService::finish_upload( + ctx, + FinishUploadFile { + site_id, + page_id, + file_id, + pending_blob_id, + }, + ) + .await } /// Edits a file, uploading a new file version. diff --git a/deepwell/src/services/file_revision/service.rs b/deepwell/src/services/file_revision/service.rs index cf5f1b34b0..9fb70b182f 100644 --- a/deepwell/src/services/file_revision/service.rs +++ b/deepwell/src/services/file_revision/service.rs @@ -443,6 +443,40 @@ impl FileRevisionService { Ok(revision) } + /// For a pending file, fill in the uploaded data fields. + pub async fn finish_upload( + ctx: &ServiceContext<'_>, + FinishUpload { + site_id, + page_id, + file_id, + pending_blob_id, + }: FinishUpload, + ) -> Result { + let txn = ctx.transaction(); + + // Move upload to final location, get its metadata + let FinalizeBlobUploadOutput { + hash, + mime, + size, + created, + } = BlobService::finish_upload(ctx, pending_blob_id).await?; + + // Get first file revision + let file_revision = + FileRevisionService::get_first(ctx, site_id, page_id, file_id).await?; + + // Update it with uploaded data + let mut model = file_revision.into_active_model(); + model.s3_hash = Set(hash.to_vec()); + model.mime_hint = Set(mime); + model.size_hint = Set(size); + + let file_revision = model.update(txn).await?; + Ok(file_revision) + } + /// Get the first revision for this file. pub async fn get_first( ctx: &ServiceContext<'_>, diff --git a/deepwell/src/services/file_revision/structs.rs b/deepwell/src/services/file_revision/structs.rs index e8d791d45b..d5d2f1869c 100644 --- a/deepwell/src/services/file_revision/structs.rs +++ b/deepwell/src/services/file_revision/structs.rs @@ -123,6 +123,14 @@ pub struct UpdateFileRevision { pub hidden: Vec, } +#[derive(Serialize, Debug, Clone)] +pub struct FinishUpload { + pub site_id: i64, + pub page_id: i64, + pub file_id: i64, + pub pending_blob_id: i64, +} + #[derive(Deserialize, Debug, Clone)] pub struct GetFileRevisionRange { pub page_id: i64, From d682fae3ae21006737d725d4648e4e5a13b3c245 Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Tue, 10 Sep 2024 00:17:24 -0400 Subject: [PATCH 27/91] Improve output of finish_new_upload(). --- deepwell/src/endpoints/file.rs | 2 +- deepwell/src/services/blob/structs.rs | 1 + deepwell/src/services/file/service.rs | 7 ++++--- deepwell/src/services/file/structs.rs | 7 ++----- deepwell/src/services/file_revision/service.rs | 17 +++++++++++++---- deepwell/src/services/file_revision/structs.rs | 12 +++++++++++- 6 files changed, 32 insertions(+), 14 deletions(-) diff --git a/deepwell/src/endpoints/file.rs b/deepwell/src/endpoints/file.rs index 9fcb7a1014..18021119f7 100644 --- a/deepwell/src/endpoints/file.rs +++ b/deepwell/src/endpoints/file.rs @@ -25,7 +25,7 @@ use crate::services::blob::BlobService; use crate::services::file::{ DeleteFile, DeleteFileOutput, EditFile, EditFileOutput, GetBlobOutput, GetFileDetails, GetFileOutput, MoveFile, MoveFileOutput, RestoreFile, - RestoreFileOutput, UploadFile, UploadFileOutput, + RestoreFileOutput, }; use crate::services::Result; use crate::web::{Bytes, FileDetails}; diff --git a/deepwell/src/services/blob/structs.rs b/deepwell/src/services/blob/structs.rs index 4a800818aa..d931ac264a 100644 --- a/deepwell/src/services/blob/structs.rs +++ b/deepwell/src/services/blob/structs.rs @@ -26,6 +26,7 @@ pub struct FinalizeBlobUploadOutput { pub hash: BlobHash, pub mime: String, pub size: i64, + pub created: bool, } #[derive(Debug)] diff --git a/deepwell/src/services/file/service.rs b/deepwell/src/services/file/service.rs index 84b709a77b..9799d6d89f 100644 --- a/deepwell/src/services/file/service.rs +++ b/deepwell/src/services/file/service.rs @@ -27,7 +27,8 @@ use crate::models::sea_orm_active_enums::FileRevisionType; use crate::services::blob::{FinalizeBlobUploadOutput, EMPTY_BLOB_HASH, EMPTY_BLOB_MIME}; use crate::services::file_revision::{ CreateFileRevision, CreateFileRevisionBody, CreateFirstFileRevision, - CreateResurrectionFileRevision, CreateTombstoneFileRevision, FileBlob, + CreatePendingFileRevision, CreateResurrectionFileRevision, + CreateTombstoneFileRevision, FileBlob, }; use crate::services::filter::{FilterClass, FilterType}; use crate::services::{BlobService, FileRevisionService, FilterService}; @@ -146,7 +147,7 @@ impl FileService { // This gets the data from BlobService and then deletes the row. FileRevisionService::finish_upload( ctx, - FinishUploadFile { + FinishUpload { site_id, page_id, file_id, @@ -166,7 +167,7 @@ impl FileService { user_id, revision_comments, }: UploadFileEdit, - ) -> Result<_UploadFileEditOutput> { + ) -> Result { info!("Uploading new version to file ID {file_id}"); let txn = ctx.transaction(); diff --git a/deepwell/src/services/file/structs.rs b/deepwell/src/services/file/structs.rs index 4cf87c520b..185ed80f24 100644 --- a/deepwell/src/services/file/structs.rs +++ b/deepwell/src/services/file/structs.rs @@ -20,7 +20,7 @@ use crate::models::sea_orm_active_enums::FileRevisionType; use crate::services::file_revision::{ - CreateFileRevisionOutput, CreateFirstFileRevisionOutput, + CreateFileRevisionOutput, CreateFirstFileRevisionOutput, FinishUploadOutput, }; use crate::web::{Bytes, FileDetails, ProvidedValue, Reference}; use serde_json::Value as JsonValue; @@ -54,10 +54,7 @@ pub struct FinishUploadFile { pub pending_blob_id: i64, } -#[derive(Serialize, Debug, Copy, Clone)] -pub struct FinishUploadFileOutput { - pub created: bool, -} +pub type FinishUploadFileOutput = FinishUploadOutput; #[derive(Deserialize, Debug, Clone)] pub struct UploadFileEdit { diff --git a/deepwell/src/services/file_revision/service.rs b/deepwell/src/services/file_revision/service.rs index 9fb70b182f..184b3ff728 100644 --- a/deepwell/src/services/file_revision/service.rs +++ b/deepwell/src/services/file_revision/service.rs @@ -22,7 +22,8 @@ use super::prelude::*; use crate::models::file_revision::{ self, Entity as FileRevision, Model as FileRevisionModel, }; -use crate::services::{OutdateService, PageService}; +use crate::services::blob::{FinalizeBlobUploadOutput, EMPTY_BLOB_HASH, EMPTY_BLOB_MIME}; +use crate::services::{BlobService, OutdateService, PageService}; use crate::web::FetchDirection; use once_cell::sync::Lazy; use std::num::NonZeroI32; @@ -452,7 +453,7 @@ impl FileRevisionService { file_id, pending_blob_id, }: FinishUpload, - ) -> Result { + ) -> Result { let txn = ctx.transaction(); // Move upload to final location, get its metadata @@ -472,9 +473,16 @@ impl FileRevisionService { model.s3_hash = Set(hash.to_vec()); model.mime_hint = Set(mime); model.size_hint = Set(size); - let file_revision = model.update(txn).await?; - Ok(file_revision) + + Ok(FinishUploadOutput { + file_id, + file_revision_id: file_revision.revision_id, + s3_hash: Bytes::from(file_revision.s3_hash), + mime_hint: file_revision.mime_hint, + size_hint: file_revision.size_hint, + created, + }) } /// Get the first revision for this file. @@ -484,6 +492,7 @@ impl FileRevisionService { page_id: i64, file_id: i64, ) -> Result { + let txn = ctx.transaction(); let model = FileRevision::find() .filter( Condition::all() diff --git a/deepwell/src/services/file_revision/structs.rs b/deepwell/src/services/file_revision/structs.rs index d5d2f1869c..1be072ec51 100644 --- a/deepwell/src/services/file_revision/structs.rs +++ b/deepwell/src/services/file_revision/structs.rs @@ -123,7 +123,7 @@ pub struct UpdateFileRevision { pub hidden: Vec, } -#[derive(Serialize, Debug, Clone)] +#[derive(Deserialize, Debug, Clone)] pub struct FinishUpload { pub site_id: i64, pub page_id: i64, @@ -131,6 +131,16 @@ pub struct FinishUpload { pub pending_blob_id: i64, } +#[derive(Serialize, Debug, Copy, Clone)] +pub struct FinishUploadOutput { + pub file_id: i64, + pub file_revision_id: i64, + pub s3_hash: Bytes, + pub mime_hint: String, + pub size_hint: i64, + pub created: bool, +} + #[derive(Deserialize, Debug, Clone)] pub struct GetFileRevisionRange { pub page_id: i64, From f6cc6c4bc95fa77bca2a575e3c389e436efc9e2e Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Tue, 10 Sep 2024 00:18:14 -0400 Subject: [PATCH 28/91] Delete dummy structs. --- deepwell/src/services/file/structs.rs | 8 -------- 1 file changed, 8 deletions(-) diff --git a/deepwell/src/services/file/structs.rs b/deepwell/src/services/file/structs.rs index 185ed80f24..25564859e5 100644 --- a/deepwell/src/services/file/structs.rs +++ b/deepwell/src/services/file/structs.rs @@ -67,14 +67,6 @@ pub struct UploadFileEdit { pub type UploadFileEditOutput = CreateFileRevisionOutput; -#[derive(Deserialize, Debug, Clone)] -pub struct FinishUploadFileEdit {} - -#[derive(Serialize, Debug, Clone)] -pub struct FinishUploadFileEditOutput { - // TODO -} - #[derive(Deserialize, Debug, Clone)] pub struct GetFile<'a> { pub site_id: i64, From 74b23ecdf71eba3717a7ec29941e3409f591c62d Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Tue, 10 Sep 2024 00:19:23 -0400 Subject: [PATCH 29/91] Rename types. --- deepwell/src/services/file/service.rs | 2 +- deepwell/src/services/file/structs.rs | 4 ++-- deepwell/src/services/file_revision/service.rs | 8 ++++---- deepwell/src/services/file_revision/structs.rs | 4 ++-- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/deepwell/src/services/file/service.rs b/deepwell/src/services/file/service.rs index 9799d6d89f..c36a5d18cf 100644 --- a/deepwell/src/services/file/service.rs +++ b/deepwell/src/services/file/service.rs @@ -147,7 +147,7 @@ impl FileService { // This gets the data from BlobService and then deletes the row. FileRevisionService::finish_upload( ctx, - FinishUpload { + FinishUploadFile { site_id, page_id, file_id, diff --git a/deepwell/src/services/file/structs.rs b/deepwell/src/services/file/structs.rs index 25564859e5..a808ccb3a9 100644 --- a/deepwell/src/services/file/structs.rs +++ b/deepwell/src/services/file/structs.rs @@ -20,7 +20,7 @@ use crate::models::sea_orm_active_enums::FileRevisionType; use crate::services::file_revision::{ - CreateFileRevisionOutput, CreateFirstFileRevisionOutput, FinishUploadOutput, + CreateFileRevisionOutput, CreateFirstFileRevisionOutput, FinishFileRevisionUploadOutput, }; use crate::web::{Bytes, FileDetails, ProvidedValue, Reference}; use serde_json::Value as JsonValue; @@ -54,7 +54,7 @@ pub struct FinishUploadFile { pub pending_blob_id: i64, } -pub type FinishUploadFileOutput = FinishUploadOutput; +pub type FinishUploadFileOutput = FinishFileRevisionUploadOutput; #[derive(Deserialize, Debug, Clone)] pub struct UploadFileEdit { diff --git a/deepwell/src/services/file_revision/service.rs b/deepwell/src/services/file_revision/service.rs index 184b3ff728..7f2fc8211f 100644 --- a/deepwell/src/services/file_revision/service.rs +++ b/deepwell/src/services/file_revision/service.rs @@ -447,13 +447,13 @@ impl FileRevisionService { /// For a pending file, fill in the uploaded data fields. pub async fn finish_upload( ctx: &ServiceContext<'_>, - FinishUpload { + FinishFileRevisionUpload { site_id, page_id, file_id, pending_blob_id, - }: FinishUpload, - ) -> Result { + }: FinishFileRevisionUpload, + ) -> Result { let txn = ctx.transaction(); // Move upload to final location, get its metadata @@ -475,7 +475,7 @@ impl FileRevisionService { model.size_hint = Set(size); let file_revision = model.update(txn).await?; - Ok(FinishUploadOutput { + Ok(FinishFileRevisionUploadOutput { file_id, file_revision_id: file_revision.revision_id, s3_hash: Bytes::from(file_revision.s3_hash), diff --git a/deepwell/src/services/file_revision/structs.rs b/deepwell/src/services/file_revision/structs.rs index 1be072ec51..4cfd3662db 100644 --- a/deepwell/src/services/file_revision/structs.rs +++ b/deepwell/src/services/file_revision/structs.rs @@ -124,7 +124,7 @@ pub struct UpdateFileRevision { } #[derive(Deserialize, Debug, Clone)] -pub struct FinishUpload { +pub struct FinishFileRevisionUpload { pub site_id: i64, pub page_id: i64, pub file_id: i64, @@ -132,7 +132,7 @@ pub struct FinishUpload { } #[derive(Serialize, Debug, Copy, Clone)] -pub struct FinishUploadOutput { +pub struct FinishFileRevisionUploadOutput { pub file_id: i64, pub file_revision_id: i64, pub s3_hash: Bytes, From cd2b08ed1b3eacaea66a360582037a85f682547c Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Tue, 10 Sep 2024 23:35:37 -0400 Subject: [PATCH 30/91] Merge pending jobs. --- deepwell/src/services/job/structs.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/deepwell/src/services/job/structs.rs b/deepwell/src/services/job/structs.rs index 5a863f8630..63502ea7f4 100644 --- a/deepwell/src/services/job/structs.rs +++ b/deepwell/src/services/job/structs.rs @@ -28,8 +28,7 @@ pub enum Job { }, PruneSessions, PruneText, - // TODO add job for pruning incomplete uploads (pending_blob table) - // TODO also add a job (file table) + // TODO add job for pruning incomplete uploads (pending_blob table and corresponding column in file table) NameChangeRefill, LiftExpiredPunishments, } From 52a03798a97d05945d5b2769d3fc6411912ea53d Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Tue, 10 Sep 2024 23:39:30 -0400 Subject: [PATCH 31/91] Stub out edits for now. First need to figure out initial uploads, then will unify them in some nice, generic way. --- deepwell/src/services/file/service.rs | 55 +++------------------------ 1 file changed, 5 insertions(+), 50 deletions(-) diff --git a/deepwell/src/services/file/service.rs b/deepwell/src/services/file/service.rs index c36a5d18cf..d126b0fc5e 100644 --- a/deepwell/src/services/file/service.rs +++ b/deepwell/src/services/file/service.rs @@ -157,7 +157,8 @@ impl FileService { .await } - /// Edits a file, uploading a new file version. + /// Edits a file by uploading a new file version. + /// TODO needs to be implemented pub async fn start_edit_upload( ctx: &ServiceContext<'_>, UploadFileEdit { @@ -168,40 +169,10 @@ impl FileService { revision_comments, }: UploadFileEdit, ) -> Result { - info!("Uploading new version to file ID {file_id}"); - - let txn = ctx.transaction(); - let last_revision = - FileRevisionService::get_latest(ctx, site_id, page_id, file_id).await?; - - // Add pending file - let pending = BlobService::create_upload(ctx).await?; - - // Add file revision (with dummy file data) - let revision_output = FileRevisionService::create( - ctx, - CreateFileRevision { - site_id, - page_id, - file_id, - user_id, - comments: revision_comments, - body: CreateFileRevisionBody { - blob: FileBlob { - s3_hash: EMPTY_BLOB_HASH, - mime_hint: str!(EMPTY_BLOB_MIME), - size_hint: 0, - }, - ..Default::default() - }, - }, - last_revision, - ) - .await?; - - Ok(revision_output) + todo!() } + // TODO pub async fn finish_edit_upload( ctx: &ServiceContext<'_>, FinishUploadFileEdit { @@ -210,23 +181,7 @@ impl FileService { file_id, pending_blob_id, }: FinishUploadFileEdit, - ) -> Result<_> { - info!( - "Finishing file edit upload with site ID {} page ID {} file ID {} pending ID {}", - site_id, page_id, file_id, pending_blob_id, - ); - - // Get latest file revision - // TODO - - // Update file metadata - let model = file::ActiveModel { - file_id: Set(file_id), - updated_at: Set(Some(now())), - ..Default::default() - }; - model.update(txn).await?; - + ) -> Result<()> { todo!() } From 8020aa16ba3249b4b1d02017186ab57d688e702c Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Wed, 18 Sep 2024 03:14:42 -0400 Subject: [PATCH 32/91] Fix build errors. --- deepwell/src/endpoints/file.rs | 4 ++-- deepwell/src/services/blob/service.rs | 1 + deepwell/src/services/file/service.rs | 4 ++-- deepwell/src/services/file/structs.rs | 5 ++++- deepwell/src/services/file_revision/service.rs | 2 +- deepwell/src/services/file_revision/structs.rs | 6 +++--- 6 files changed, 13 insertions(+), 9 deletions(-) diff --git a/deepwell/src/endpoints/file.rs b/deepwell/src/endpoints/file.rs index 18021119f7..57102b43f0 100644 --- a/deepwell/src/endpoints/file.rs +++ b/deepwell/src/endpoints/file.rs @@ -25,7 +25,7 @@ use crate::services::blob::BlobService; use crate::services::file::{ DeleteFile, DeleteFileOutput, EditFile, EditFileOutput, GetBlobOutput, GetFileDetails, GetFileOutput, MoveFile, MoveFileOutput, RestoreFile, - RestoreFileOutput, + RestoreFileOutput, StartFileUploadOutput, }; use crate::services::Result; use crate::web::{Bytes, FileDetails}; @@ -82,7 +82,7 @@ pub async fn file_get( pub async fn file_upload( ctx: &ServiceContext<'_>, params: Params<'static>, -) -> Result { +) -> Result { // FIXME file upload endpoint /* let input: UploadFile = params.parse()?; diff --git a/deepwell/src/services/blob/service.rs b/deepwell/src/services/blob/service.rs index c7d5799d23..42c670a5c1 100644 --- a/deepwell/src/services/blob/service.rs +++ b/deepwell/src/services/blob/service.rs @@ -140,6 +140,7 @@ impl BlobService { hash: EMPTY_BLOB_HASH, mime: str!(EMPTY_BLOB_MIME), size: 0, + created: false, }); } diff --git a/deepwell/src/services/file/service.rs b/deepwell/src/services/file/service.rs index d126b0fc5e..4555613470 100644 --- a/deepwell/src/services/file/service.rs +++ b/deepwell/src/services/file/service.rs @@ -28,7 +28,7 @@ use crate::services::blob::{FinalizeBlobUploadOutput, EMPTY_BLOB_HASH, EMPTY_BLO use crate::services::file_revision::{ CreateFileRevision, CreateFileRevisionBody, CreateFirstFileRevision, CreatePendingFileRevision, CreateResurrectionFileRevision, - CreateTombstoneFileRevision, FileBlob, + CreateTombstoneFileRevision, FileBlob, FinishFileRevisionUpload, }; use crate::services::filter::{FilterClass, FilterType}; use crate::services::{BlobService, FileRevisionService, FilterService}; @@ -147,7 +147,7 @@ impl FileService { // This gets the data from BlobService and then deletes the row. FileRevisionService::finish_upload( ctx, - FinishUploadFile { + FinishFileRevisionUpload { site_id, page_id, file_id, diff --git a/deepwell/src/services/file/structs.rs b/deepwell/src/services/file/structs.rs index a808ccb3a9..4ecf6d7253 100644 --- a/deepwell/src/services/file/structs.rs +++ b/deepwell/src/services/file/structs.rs @@ -20,7 +20,8 @@ use crate::models::sea_orm_active_enums::FileRevisionType; use crate::services::file_revision::{ - CreateFileRevisionOutput, CreateFirstFileRevisionOutput, FinishFileRevisionUploadOutput, + CreateFileRevisionOutput, CreateFirstFileRevisionOutput, + FinishFileRevisionUploadOutput, }; use crate::web::{Bytes, FileDetails, ProvidedValue, Reference}; use serde_json::Value as JsonValue; @@ -67,6 +68,8 @@ pub struct UploadFileEdit { pub type UploadFileEditOutput = CreateFileRevisionOutput; +pub type FinishUploadFileEdit = FinishUploadFile; + #[derive(Deserialize, Debug, Clone)] pub struct GetFile<'a> { pub site_id: i64, diff --git a/deepwell/src/services/file_revision/service.rs b/deepwell/src/services/file_revision/service.rs index 7f2fc8211f..46c5a5fbda 100644 --- a/deepwell/src/services/file_revision/service.rs +++ b/deepwell/src/services/file_revision/service.rs @@ -24,7 +24,7 @@ use crate::models::file_revision::{ }; use crate::services::blob::{FinalizeBlobUploadOutput, EMPTY_BLOB_HASH, EMPTY_BLOB_MIME}; use crate::services::{BlobService, OutdateService, PageService}; -use crate::web::FetchDirection; +use crate::web::{Bytes, FetchDirection}; use once_cell::sync::Lazy; use std::num::NonZeroI32; diff --git a/deepwell/src/services/file_revision/structs.rs b/deepwell/src/services/file_revision/structs.rs index 4cfd3662db..737f29a6ab 100644 --- a/deepwell/src/services/file_revision/structs.rs +++ b/deepwell/src/services/file_revision/structs.rs @@ -21,7 +21,7 @@ use super::prelude::*; use crate::hash::BlobHash; use crate::services::page_revision::PageRevisionCountOutput; -use crate::web::FetchDirection; +use crate::web::{Bytes, FetchDirection}; #[derive(Debug, Clone)] pub struct CreateFileRevision { @@ -131,11 +131,11 @@ pub struct FinishFileRevisionUpload { pub pending_blob_id: i64, } -#[derive(Serialize, Debug, Copy, Clone)] +#[derive(Serialize, Debug, Clone)] pub struct FinishFileRevisionUploadOutput { pub file_id: i64, pub file_revision_id: i64, - pub s3_hash: Bytes, + pub s3_hash: Bytes<'static>, pub mime_hint: String, pub size_hint: i64, pub created: bool, From d5db6acda2933bccf6f2392a0dca24eef02c2c71 Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Wed, 18 Sep 2024 22:14:23 -0400 Subject: [PATCH 33/91] Fix CHECK constraints. --- deepwell/migrations/20220906103252_deepwell.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/deepwell/migrations/20220906103252_deepwell.sql b/deepwell/migrations/20220906103252_deepwell.sql index 423eef789e..ff229d4d4e 100644 --- a/deepwell/migrations/20220906103252_deepwell.sql +++ b/deepwell/migrations/20220906103252_deepwell.sql @@ -419,8 +419,8 @@ CREATE TABLE page_vote ( CREATE TABLE blob_pending ( pending_file_id BIGSERIAL PRIMARY KEY, created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT now(), - s3_path TEXT NOT NULL CHECK length(s3_path) > 1, - presign_url TEXT NOT NULL CHECK length(presign_url) > 1 + s3_path TEXT NOT NULL CHECK (length(s3_path) > 1), + presign_url TEXT NOT NULL CHECK (length(presign_url) > 1) ); -- From 5bd2568cfae4830e7bcff28dbc73ff5510c2dabb Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Fri, 20 Sep 2024 04:09:43 -0400 Subject: [PATCH 34/91] Implement file_upload_* API methods. --- deepwell/src/api.rs | 3 ++- deepwell/src/endpoints/file.rs | 37 +++++++++++++++++---------- deepwell/src/services/file/service.rs | 2 +- 3 files changed, 27 insertions(+), 15 deletions(-) diff --git a/deepwell/src/api.rs b/deepwell/src/api.rs index 5f5fd75434..83ae19375a 100644 --- a/deepwell/src/api.rs +++ b/deepwell/src/api.rs @@ -262,7 +262,8 @@ async fn build_module(app_state: ServerState) -> anyhow::Result, params: Params<'static>, ) -> Result { - // FIXME file upload endpoint - /* - let input: UploadFile = params.parse()?; + let input: StartFileUpload = params.parse()?; info!( - "Uploading file '{}' ({} bytes) to page ID {} in site ID {}", - input.name, - input.data.len(), + "Starting file upload '{}' to page ID {} in site ID {}", + input.name, input.page_id, input.site_id, + ); + + FileService::start_new_upload(ctx, input).await +} + +pub async fn file_upload_finish( + ctx: &ServiceContext<'_>, + params: Params<'static>, +) -> Result { + let input: FinishUploadFile = params.parse()?; + + info!( + "Finishing file upload (pending blob ID {} for file ID {} in page ID {} in site ID {}", + input.pending_blob_id, + input.file_id, input.page_id, input.site_id, ); - FileService::upload(ctx, input).await - */ - todo!() + FileService::finish_new_upload(ctx, input).await } pub async fn file_edit( diff --git a/deepwell/src/services/file/service.rs b/deepwell/src/services/file/service.rs index 4555613470..af03fa9050 100644 --- a/deepwell/src/services/file/service.rs +++ b/deepwell/src/services/file/service.rs @@ -135,7 +135,7 @@ impl FileService { // Clear pending_blob column { - let mut model = file::ActiveModel { + let model = file::ActiveModel { file_id: Set(file_id), pending_blob_id: Set(None), ..Default::default() From 57f4d966f321fdcd8c21f0a14408552f1521cef7 Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Fri, 20 Sep 2024 04:25:47 -0400 Subject: [PATCH 35/91] Rename file creation structs. --- deepwell/src/endpoints/file.rs | 16 +++++++------- deepwell/src/services/file/service.rs | 31 +++++++-------------------- deepwell/src/services/file/structs.rs | 10 ++++----- 3 files changed, 20 insertions(+), 37 deletions(-) diff --git a/deepwell/src/endpoints/file.rs b/deepwell/src/endpoints/file.rs index f7c68c8922..c520b870f5 100644 --- a/deepwell/src/endpoints/file.rs +++ b/deepwell/src/endpoints/file.rs @@ -23,10 +23,10 @@ use crate::models::file::Model as FileModel; use crate::models::file_revision::Model as FileRevisionModel; use crate::services::blob::BlobService; use crate::services::file::{ - DeleteFile, DeleteFileOutput, EditFile, EditFileOutput, FinishUploadFile, - FinishUploadFileOutput, GetBlobOutput, GetFileDetails, GetFileOutput, MoveFile, - MoveFileOutput, RestoreFile, RestoreFileOutput, StartFileUpload, - StartFileUploadOutput, + DeleteFile, DeleteFileOutput, EditFile, EditFileOutput, FinishFileCreation, + FinishFileCreationOutput, GetBlobOutput, GetFileDetails, GetFileOutput, MoveFile, + MoveFileOutput, RestoreFile, RestoreFileOutput, StartFileCreation, + StartFileCreationOutput, }; use crate::services::Result; use crate::web::{Bytes, FileDetails}; @@ -83,8 +83,8 @@ pub async fn file_get( pub async fn file_upload_start( ctx: &ServiceContext<'_>, params: Params<'static>, -) -> Result { - let input: StartFileUpload = params.parse()?; +) -> Result { + let input: StartFileCreation = params.parse()?; info!( "Starting file upload '{}' to page ID {} in site ID {}", @@ -97,8 +97,8 @@ pub async fn file_upload_start( pub async fn file_upload_finish( ctx: &ServiceContext<'_>, params: Params<'static>, -) -> Result { - let input: FinishUploadFile = params.parse()?; +) -> Result { + let input: FinishFileCreation = params.parse()?; info!( "Finishing file upload (pending blob ID {} for file ID {} in page ID {} in site ID {}", diff --git a/deepwell/src/services/file/service.rs b/deepwell/src/services/file/service.rs index af03fa9050..436c2db99c 100644 --- a/deepwell/src/services/file/service.rs +++ b/deepwell/src/services/file/service.rs @@ -45,7 +45,7 @@ impl FileService { /// meaning that duplicates are not uploaded twice. pub async fn start_new_upload( ctx: &ServiceContext<'_>, - StartFileUpload { + StartFileCreation { site_id, page_id, name, @@ -53,8 +53,8 @@ impl FileService { user_id, licensing, bypass_filter, - }: StartFileUpload, - ) -> Result { + }: StartFileCreation, + ) -> Result { info!("Creating file with name '{}'", name); let txn = ctx.transaction(); @@ -93,7 +93,7 @@ impl FileService { ) .await?; - Ok(StartFileUploadOutput { + Ok(StartFileCreationOutput { pending_blob_id: pending.pending_blob_id, presign_url: pending.presign_url, file_revision_id: file_revision.file_revision_id, @@ -102,13 +102,13 @@ impl FileService { pub async fn finish_new_upload( ctx: &ServiceContext<'_>, - FinishUploadFile { + FinishFileCreation { site_id, page_id, file_id, pending_blob_id, - }: FinishUploadFile, - ) -> Result { + }: FinishFileCreation, + ) -> Result { info!( "Finishing new file upload with site ID {} page ID {} file ID {} pending ID {}", site_id, page_id, file_id, pending_blob_id, @@ -161,27 +161,12 @@ impl FileService { /// TODO needs to be implemented pub async fn start_edit_upload( ctx: &ServiceContext<'_>, - UploadFileEdit { - site_id, - page_id, - file_id, - user_id, - revision_comments, - }: UploadFileEdit, ) -> Result { todo!() } // TODO - pub async fn finish_edit_upload( - ctx: &ServiceContext<'_>, - FinishUploadFileEdit { - site_id, - page_id, - file_id, - pending_blob_id, - }: FinishUploadFileEdit, - ) -> Result<()> { + pub async fn finish_edit_upload(ctx: &ServiceContext<'_>) -> Result<()> { todo!() } diff --git a/deepwell/src/services/file/structs.rs b/deepwell/src/services/file/structs.rs index 4ecf6d7253..7e0e5b4091 100644 --- a/deepwell/src/services/file/structs.rs +++ b/deepwell/src/services/file/structs.rs @@ -28,7 +28,7 @@ use serde_json::Value as JsonValue; use time::OffsetDateTime; #[derive(Deserialize, Debug, Clone)] -pub struct StartFileUpload { +pub struct StartFileCreation { pub site_id: i64, pub page_id: i64, pub name: String, @@ -41,21 +41,21 @@ pub struct StartFileUpload { } #[derive(Serialize, Debug, Clone)] -pub struct StartFileUploadOutput { +pub struct StartFileCreationOutput { pub pending_blob_id: i64, pub presign_url: String, pub file_revision_id: i64, } #[derive(Deserialize, Debug, Clone)] -pub struct FinishUploadFile { +pub struct FinishFileCreation { pub site_id: i64, pub page_id: i64, pub file_id: i64, pub pending_blob_id: i64, } -pub type FinishUploadFileOutput = FinishFileRevisionUploadOutput; +pub type FinishFileCreationOutput = FinishFileRevisionUploadOutput; #[derive(Deserialize, Debug, Clone)] pub struct UploadFileEdit { @@ -68,8 +68,6 @@ pub struct UploadFileEdit { pub type UploadFileEditOutput = CreateFileRevisionOutput; -pub type FinishUploadFileEdit = FinishUploadFile; - #[derive(Deserialize, Debug, Clone)] pub struct GetFile<'a> { pub site_id: i64, From f4ed066f71da6a0b4ce4541cd64ad68f3ee4e1df Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Fri, 20 Sep 2024 04:33:41 -0400 Subject: [PATCH 36/91] Rename upload API methods. --- deepwell/src/api.rs | 6 ++++-- deepwell/src/endpoints/file.rs | 20 ++++++++++++++++++-- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/deepwell/src/api.rs b/deepwell/src/api.rs index 83ae19375a..ffdc79f319 100644 --- a/deepwell/src/api.rs +++ b/deepwell/src/api.rs @@ -262,8 +262,10 @@ async fn build_module(app_state: ServerState) -> anyhow::Result, params: Params<'static>, ) -> Result { @@ -94,7 +94,7 @@ pub async fn file_upload_start( FileService::start_new_upload(ctx, input).await } -pub async fn file_upload_finish( +pub async fn file_create_finish( ctx: &ServiceContext<'_>, params: Params<'static>, ) -> Result { @@ -111,6 +111,22 @@ pub async fn file_upload_finish( FileService::finish_new_upload(ctx, input).await } +// TODO +pub async fn file_edit_start( + ctx: &ServiceContext<'_>, + params: Params<'static>, +) -> Result<()> { + todo!() +} + +// TODO +pub async fn file_edit_finish( + ctx: &ServiceContext<'_>, + params: Params<'static>, +) -> Result<()> { + todo!() +} + pub async fn file_edit( ctx: &ServiceContext<'_>, params: Params<'static>, From 3ffb213fc11c23e7dccb274719daa18aa74dda75 Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Fri, 20 Sep 2024 04:38:36 -0400 Subject: [PATCH 37/91] Remove unused struct. --- deepwell/src/services/file/service.rs | 2 +- deepwell/src/services/file/structs.rs | 11 ----------- 2 files changed, 1 insertion(+), 12 deletions(-) diff --git a/deepwell/src/services/file/service.rs b/deepwell/src/services/file/service.rs index 436c2db99c..f33afad558 100644 --- a/deepwell/src/services/file/service.rs +++ b/deepwell/src/services/file/service.rs @@ -161,7 +161,7 @@ impl FileService { /// TODO needs to be implemented pub async fn start_edit_upload( ctx: &ServiceContext<'_>, - ) -> Result { + ) -> Result<()> { todo!() } diff --git a/deepwell/src/services/file/structs.rs b/deepwell/src/services/file/structs.rs index 7e0e5b4091..d499c802d6 100644 --- a/deepwell/src/services/file/structs.rs +++ b/deepwell/src/services/file/structs.rs @@ -57,17 +57,6 @@ pub struct FinishFileCreation { pub type FinishFileCreationOutput = FinishFileRevisionUploadOutput; -#[derive(Deserialize, Debug, Clone)] -pub struct UploadFileEdit { - pub site_id: i64, - pub page_id: i64, - pub file_id: i64, - pub user_id: i64, - pub revision_comments: String, -} - -pub type UploadFileEditOutput = CreateFileRevisionOutput; - #[derive(Deserialize, Debug, Clone)] pub struct GetFile<'a> { pub site_id: i64, From 1a5342eff2ba766b690d55e0a984c2590df04ba1 Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Fri, 20 Sep 2024 04:52:21 -0400 Subject: [PATCH 38/91] Address warnings. --- deepwell/src/endpoints/file.rs | 30 +++++++++++++++---------- deepwell/src/services/blob/structs.rs | 8 +++++++ deepwell/src/services/file/service.rs | 6 ++--- deepwell/src/services/file/structs.rs | 7 ------ deepwell/src/services/import/service.rs | 1 + deepwell/src/services/user/service.rs | 1 + 6 files changed, 30 insertions(+), 23 deletions(-) diff --git a/deepwell/src/endpoints/file.rs b/deepwell/src/endpoints/file.rs index db2d948a7b..c909bdea9d 100644 --- a/deepwell/src/endpoints/file.rs +++ b/deepwell/src/endpoints/file.rs @@ -21,12 +21,11 @@ use super::prelude::*; use crate::models::file::Model as FileModel; use crate::models::file_revision::Model as FileRevisionModel; -use crate::services::blob::BlobService; +use crate::services::blob::{BlobMetadata, BlobService, GetBlobOutput}; use crate::services::file::{ DeleteFile, DeleteFileOutput, EditFile, EditFileOutput, FinishFileCreation, - FinishFileCreationOutput, GetBlobOutput, GetFileDetails, GetFileOutput, MoveFile, - MoveFileOutput, RestoreFile, RestoreFileOutput, StartFileCreation, - StartFileCreationOutput, + FinishFileCreationOutput, GetFileDetails, GetFileOutput, MoveFile, MoveFileOutput, + RestoreFile, RestoreFileOutput, StartFileCreation, StartFileCreationOutput, }; use crate::services::Result; use crate::web::{Bytes, FileDetails}; @@ -41,14 +40,19 @@ pub async fn blob_get( info!("Getting blob for S3 hash"); let hash: Bytes = params.parse()?; let data = BlobService::get(ctx, hash.as_ref()).await?; - let metadata = BlobService::get_metadata(ctx, hash.as_ref()).await?; - let output = GetBlobOutput { + let BlobMetadata { + mime, + size, + created_at, + } = BlobService::get_metadata(ctx, hash.as_ref()).await?; + + Ok(GetBlobOutput { data, - mime: metadata.mime, - size: metadata.size, - }; - Ok(output) + mime, + size, + created_at, + }) } pub async fn file_get( @@ -114,16 +118,18 @@ pub async fn file_create_finish( // TODO pub async fn file_edit_start( ctx: &ServiceContext<'_>, - params: Params<'static>, + _params: Params<'static>, ) -> Result<()> { + let _ = FileService::start_edit_upload(ctx).await?; todo!() } // TODO pub async fn file_edit_finish( ctx: &ServiceContext<'_>, - params: Params<'static>, + _params: Params<'static>, ) -> Result<()> { + let _ = FileService::finish_edit_upload(ctx).await?; todo!() } diff --git a/deepwell/src/services/blob/structs.rs b/deepwell/src/services/blob/structs.rs index d931ac264a..9abc342e79 100644 --- a/deepwell/src/services/blob/structs.rs +++ b/deepwell/src/services/blob/structs.rs @@ -35,3 +35,11 @@ pub struct BlobMetadata { pub size: i64, pub created_at: OffsetDateTime, } + +#[derive(Serialize, Debug, Clone)] +pub struct GetBlobOutput { + pub data: Vec, + pub mime: String, + pub size: i64, + pub created_at: OffsetDateTime, +} diff --git a/deepwell/src/services/file/service.rs b/deepwell/src/services/file/service.rs index f33afad558..4b94e88fe6 100644 --- a/deepwell/src/services/file/service.rs +++ b/deepwell/src/services/file/service.rs @@ -159,14 +159,12 @@ impl FileService { /// Edits a file by uploading a new file version. /// TODO needs to be implemented - pub async fn start_edit_upload( - ctx: &ServiceContext<'_>, - ) -> Result<()> { + pub async fn start_edit_upload(_ctx: &ServiceContext<'_>) -> Result { todo!() } // TODO - pub async fn finish_edit_upload(ctx: &ServiceContext<'_>) -> Result<()> { + pub async fn finish_edit_upload(_ctx: &ServiceContext<'_>) -> Result { todo!() } diff --git a/deepwell/src/services/file/structs.rs b/deepwell/src/services/file/structs.rs index d499c802d6..b510dbef2f 100644 --- a/deepwell/src/services/file/structs.rs +++ b/deepwell/src/services/file/structs.rs @@ -94,13 +94,6 @@ pub struct GetFileOutput { pub hidden_fields: Vec, } -#[derive(Serialize, Debug, Clone)] -pub struct GetBlobOutput { - pub data: Vec, - pub mime: String, - pub size: i64, -} - #[derive(Deserialize, Debug, Clone)] pub struct EditFile { pub site_id: i64, diff --git a/deepwell/src/services/import/service.rs b/deepwell/src/services/import/service.rs index f3ec50cb27..22835d0c71 100644 --- a/deepwell/src/services/import/service.rs +++ b/deepwell/src/services/import/service.rs @@ -73,6 +73,7 @@ impl ImportService { let output = BlobService::create(ctx, &bytes).await?; Some(output.hash.to_vec()) */ + let _ = bytes; todo!() } }; diff --git a/deepwell/src/services/user/service.rs b/deepwell/src/services/user/service.rs index a88069f938..18a80135c5 100644 --- a/deepwell/src/services/user/service.rs +++ b/deepwell/src/services/user/service.rs @@ -432,6 +432,7 @@ impl UserService { Some(hash.to_vec()) */ + let _ = blob; todo!() } }; From 7aa45e282815b1f66def31c092e37d134d8ee2a6 Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Sat, 28 Sep 2024 09:35:16 -0400 Subject: [PATCH 39/91] Update methods for blob create, upload, then file create. --- .../migrations/20220906103252_deepwell.sql | 7 +- deepwell/src/api.rs | 14 +- deepwell/src/endpoints/blob.rs | 61 +++++++++ deepwell/src/endpoints/file.rs | 78 +---------- deepwell/src/endpoints/mod.rs | 1 + deepwell/src/models/blob_pending.rs | 26 +++- deepwell/src/models/file.rs | 1 - deepwell/src/models/prelude.rs | 1 + deepwell/src/models/user.rs | 8 ++ deepwell/src/services/blob/service.rs | 59 ++++++-- deepwell/src/services/blob/structs.rs | 15 ++ deepwell/src/services/error.rs | 4 + deepwell/src/services/file/service.rs | 129 +++++------------- deepwell/src/services/file/structs.rs | 22 +-- .../src/services/file_revision/service.rs | 81 +---------- .../src/services/file_revision/structs.rs | 16 +-- 16 files changed, 217 insertions(+), 306 deletions(-) create mode 100644 deepwell/src/endpoints/blob.rs diff --git a/deepwell/migrations/20220906103252_deepwell.sql b/deepwell/migrations/20220906103252_deepwell.sql index ff229d4d4e..6d58244c76 100644 --- a/deepwell/migrations/20220906103252_deepwell.sql +++ b/deepwell/migrations/20220906103252_deepwell.sql @@ -417,10 +417,12 @@ CREATE TABLE page_vote ( -- Manages blobs that are being uploaded by the user CREATE TABLE blob_pending ( - pending_file_id BIGSERIAL PRIMARY KEY, + external_id TEXT PRIMARY KEY, created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT now(), s3_path TEXT NOT NULL CHECK (length(s3_path) > 1), - presign_url TEXT NOT NULL CHECK (length(presign_url) > 1) + presign_url TEXT NOT NULL CHECK (length(presign_url) > 1), + + CHECK (length(external_id) = 24) -- default length for a cuid2 ); -- @@ -451,7 +453,6 @@ CREATE TABLE file ( name TEXT NOT NULL, page_id BIGINT NOT NULL REFERENCES page(page_id), site_id BIGINT NOT NULL REFERENCES site(site_id), - pending_blob_id BIGINT REFERENCES file_pending(pending_file_id), UNIQUE (page_id, name, deleted_at) ); diff --git a/deepwell/src/api.rs b/deepwell/src/api.rs index ffdc79f319..90931ba39f 100644 --- a/deepwell/src/api.rs +++ b/deepwell/src/api.rs @@ -28,9 +28,9 @@ use crate::config::{Config, Secrets}; use crate::endpoints::{ - auth::*, category::*, domain::*, email::*, file::*, file_revision::*, link::*, - locale::*, message::*, misc::*, page::*, page_revision::*, parent::*, site::*, - site_member::*, text::*, user::*, user_bot::*, view::*, vote::*, + auth::*, blob::*, category::*, domain::*, email::*, file::*, file_revision::*, + link::*, locale::*, message::*, misc::*, page::*, page_revision::*, parent::*, + site::*, site_member::*, text::*, user::*, user_bot::*, view::*, vote::*, }; use crate::locales::Localizations; use crate::services::blob::MimeAnalyzer; @@ -260,14 +260,12 @@ async fn build_module(app_state: ServerState) -> anyhow::Result. + */ + +use super::prelude::*; +use crate::services::blob::{ + BlobMetadata, GetBlobOutput, StartBlobUpload, StartBlobUploadOutput, +}; +use crate::services::Result; +use crate::web::Bytes; + +/// Temporary endpoint to get any blob by hash. +/// Primarily for user avatars, which have no other +/// way of getting the data at the moment. +pub async fn blob_get( + ctx: &ServiceContext<'_>, + params: Params<'static>, +) -> Result { + info!("Getting blob for S3 hash"); + let hash: Bytes = params.parse()?; + let data = BlobService::get(ctx, hash.as_ref()).await?; + + let BlobMetadata { + mime, + size, + created_at, + } = BlobService::get_metadata(ctx, hash.as_ref()).await?; + + Ok(GetBlobOutput { + data, + mime, + size, + created_at, + }) +} + +/// Starts a new upload by creating a pending blob. +pub async fn blob_upload( + ctx: &ServiceContext<'_>, + params: Params<'static>, +) -> Result { + info!("Creating new pending blob upload"); + let input: StartBlobUpload = params.parse()?; + BlobService::start_upload(ctx, input).await +} diff --git a/deepwell/src/endpoints/file.rs b/deepwell/src/endpoints/file.rs index c909bdea9d..ab87a4b050 100644 --- a/deepwell/src/endpoints/file.rs +++ b/deepwell/src/endpoints/file.rs @@ -21,40 +21,14 @@ use super::prelude::*; use crate::models::file::Model as FileModel; use crate::models::file_revision::Model as FileRevisionModel; -use crate::services::blob::{BlobMetadata, BlobService, GetBlobOutput}; +use crate::services::blob::BlobService; use crate::services::file::{ - DeleteFile, DeleteFileOutput, EditFile, EditFileOutput, FinishFileCreation, - FinishFileCreationOutput, GetFileDetails, GetFileOutput, MoveFile, MoveFileOutput, - RestoreFile, RestoreFileOutput, StartFileCreation, StartFileCreationOutput, + DeleteFile, DeleteFileOutput, EditFile, EditFileOutput, GetFileDetails, + GetFileOutput, MoveFile, MoveFileOutput, RestoreFile, RestoreFileOutput, }; use crate::services::Result; use crate::web::{Bytes, FileDetails}; -/// Temporary endpoint to get any blob by hash. -/// Primarily for user avatars, which have no other -/// way of getting the data at the moment. -pub async fn blob_get( - ctx: &ServiceContext<'_>, - params: Params<'static>, -) -> Result { - info!("Getting blob for S3 hash"); - let hash: Bytes = params.parse()?; - let data = BlobService::get(ctx, hash.as_ref()).await?; - - let BlobMetadata { - mime, - size, - created_at, - } = BlobService::get_metadata(ctx, hash.as_ref()).await?; - - Ok(GetBlobOutput { - data, - mime, - size, - created_at, - }) -} - pub async fn file_get( ctx: &ServiceContext<'_>, params: Params<'static>, @@ -84,52 +58,10 @@ pub async fn file_get( } } -pub async fn file_create_start( - ctx: &ServiceContext<'_>, - params: Params<'static>, -) -> Result { - let input: StartFileCreation = params.parse()?; - - info!( - "Starting file upload '{}' to page ID {} in site ID {}", - input.name, input.page_id, input.site_id, - ); - - FileService::start_new_upload(ctx, input).await -} - -pub async fn file_create_finish( - ctx: &ServiceContext<'_>, - params: Params<'static>, -) -> Result { - let input: FinishFileCreation = params.parse()?; - - info!( - "Finishing file upload (pending blob ID {} for file ID {} in page ID {} in site ID {}", - input.pending_blob_id, - input.file_id, - input.page_id, - input.site_id, - ); - - FileService::finish_new_upload(ctx, input).await -} - -// TODO -pub async fn file_edit_start( - ctx: &ServiceContext<'_>, - _params: Params<'static>, -) -> Result<()> { - let _ = FileService::start_edit_upload(ctx).await?; - todo!() -} - -// TODO -pub async fn file_edit_finish( - ctx: &ServiceContext<'_>, +pub async fn file_create( + _ctx: &ServiceContext<'_>, _params: Params<'static>, ) -> Result<()> { - let _ = FileService::finish_edit_upload(ctx).await?; todo!() } diff --git a/deepwell/src/endpoints/mod.rs b/deepwell/src/endpoints/mod.rs index cdaa7fbf09..aaaae95c65 100644 --- a/deepwell/src/endpoints/mod.rs +++ b/deepwell/src/endpoints/mod.rs @@ -43,6 +43,7 @@ mod prelude { } pub mod auth; +pub mod blob; pub mod category; pub mod domain; pub mod email; diff --git a/deepwell/src/models/blob_pending.rs b/deepwell/src/models/blob_pending.rs index a03282ac97..3bd60f7280 100644 --- a/deepwell/src/models/blob_pending.rs +++ b/deepwell/src/models/blob_pending.rs @@ -1,21 +1,37 @@ +//! `SeaORM` Entity, @generated by sea-orm-codegen 1.0.1 + use sea_orm::entity::prelude::*; use serde::{Deserialize, Serialize}; #[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq, Serialize, Deserialize)] #[sea_orm(table_name = "blob_pending")] pub struct Model { - #[sea_orm(primary_key)] - pub pending_blob_id: i64, + #[sea_orm(primary_key, auto_increment = false, column_type = "Text")] + pub external_id: String, pub created_at: TimeDateTimeWithTimeZone, - + pub created_by: i64, #[sea_orm(column_type = "Text")] pub s3_path: String, - #[sea_orm(column_type = "Text")] pub presign_url: String, } #[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] -pub enum Relation {} +pub enum Relation { + #[sea_orm( + belongs_to = "super::user::Entity", + from = "Column::CreatedBy", + to = "super::user::Column::UserId", + on_update = "NoAction", + on_delete = "NoAction" + )] + User, +} + +impl Related for Entity { + fn to() -> RelationDef { + Relation::User.def() + } +} impl ActiveModelBehavior for ActiveModel {} diff --git a/deepwell/src/models/file.rs b/deepwell/src/models/file.rs index b4d97b2aa0..ce05ac6f77 100644 --- a/deepwell/src/models/file.rs +++ b/deepwell/src/models/file.rs @@ -16,7 +16,6 @@ pub struct Model { pub name: String, pub page_id: i64, pub site_id: i64, - pub pending_blob_id: Option, } #[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] diff --git a/deepwell/src/models/prelude.rs b/deepwell/src/models/prelude.rs index 073cd95cfe..169b0d0137 100644 --- a/deepwell/src/models/prelude.rs +++ b/deepwell/src/models/prelude.rs @@ -1,6 +1,7 @@ //! `SeaORM` Entity, @generated by sea-orm-codegen 1.0.1 pub use super::alias::Entity as Alias; +pub use super::blob_pending::Entity as BlobPending; pub use super::file::Entity as File; pub use super::file_revision::Entity as FileRevision; pub use super::filter::Entity as Filter; diff --git a/deepwell/src/models/user.rs b/deepwell/src/models/user.rs index 569ac36070..a985950552 100644 --- a/deepwell/src/models/user.rs +++ b/deepwell/src/models/user.rs @@ -50,6 +50,8 @@ pub struct Model { pub enum Relation { #[sea_orm(has_many = "super::alias::Entity")] Alias, + #[sea_orm(has_many = "super::blob_pending::Entity")] + BlobPending, #[sea_orm(has_many = "super::file_revision::Entity")] FileRevision, #[sea_orm(has_many = "super::message::Entity")] @@ -76,6 +78,12 @@ impl Related for Entity { } } +impl Related for Entity { + fn to() -> RelationDef { + Relation::BlobPending.def() + } +} + impl Related for Entity { fn to() -> RelationDef { Relation::FileRevision.def() diff --git a/deepwell/src/services/blob/service.rs b/deepwell/src/services/blob/service.rs index 42c670a5c1..0ea6a09eab 100644 --- a/deepwell/src/services/blob/service.rs +++ b/deepwell/src/services/blob/service.rs @@ -26,13 +26,14 @@ use crate::models::blob_pending::{ self, Entity as BlobPending, Model as BlobPendingModel, }; use crate::utils::assert_is_csprng; +use cuid2::cuid; use rand::distributions::{Alphanumeric, DistString}; use rand::thread_rng; use s3::request_trait::ResponseData; use s3::serde_types::HeadObjectResult; use std::str; use time::format_description::well_known::Rfc2822; -use time::OffsetDateTime; +use time::{Duration, OffsetDateTime}; /// Hash for empty blobs. /// @@ -62,17 +63,19 @@ pub struct BlobService; impl BlobService { /// Creates an S3 presign URL to allow an end user to upload a blob. - /// - /// Also adds an entry for the pending blob upload (`blob_pending`), - /// so it can be used by the main `blob` table. + /// This is the start to the upload process for any kind of file. /// /// # Returns - /// The generated presign URL that can be uploaded to. - pub async fn create_upload(ctx: &ServiceContext<'_>) -> Result { + /// The generated presign URL, which can be uploaded to. + pub async fn start_upload( + ctx: &ServiceContext<'_>, + StartBlobUpload { user_id }: StartBlobUpload, + ) -> Result { let config = ctx.config(); let txn = ctx.transaction(); - // Generate random S3 path + // Generate primary key and random S3 path + let pending_blob_id = cuid(); let s3_path = { let mut path = format!("{PRESIGN_DIRECTORY}/"); @@ -88,7 +91,8 @@ impl BlobService { path }; - info!("Creating presign upload URL for blob at path {s3_path}"); + + info!("Creating presign upload URL for blob at path {s3_path} with primary key {pending_blob_id}"); // Create presign URL let bucket = ctx.s3_bucket(); @@ -97,17 +101,37 @@ impl BlobService { // Add pending blob entry let model = blob_pending::ActiveModel { + external_id: Set(pending_blob_id), s3_path: Set(s3_path), presign_url: Set(presign_url), + created_by: Set(user_id), ..Default::default() }; - let output = model.insert(txn).await?; - Ok(output) + + let BlobPendingModel { + external_id: pending_blob_id, + presign_url, + created_at, + .. + } = model.insert(txn).await?; + + let expires_at = created_at + .checked_add(Duration::seconds(i64::from(config.presigned_expiry_secs))) + .expect("getting expiration timestamp overflowed"); + + debug!("New presign upload URL will last until {expires_at}"); + + Ok(StartBlobUploadOutput { + pending_blob_id, + presign_url, + expires_at, + }) } pub async fn finish_upload( ctx: &ServiceContext<'_>, - pending_blob_id: i64, + user_id: i64, + pending_blob_id: &str, ) -> Result { info!("Finishing upload for blob for pending ID {pending_blob_id}"); let bucket = ctx.s3_bucket(); @@ -115,17 +139,28 @@ impl BlobService { debug!("Getting pending blob info"); let row = BlobPending::find_by_id(pending_blob_id).one(txn).await?; - let BlobPendingModel { s3_path, .. } = match row { + let BlobPendingModel { + s3_path, + created_by, + .. + } = match row { Some(pending) => pending, None => return Err(Error::GeneralNotFound), }; + if user_id != created_by { + error!("User mismatch, user ID {user_id} is attempting to use blob uploaded by {created_by}"); + return Err(Error::BlobWrongUser); + } + debug!("Download uploaded blob from S3 uploads to get metadata"); let response = bucket.get_object(&s3_path).await?; let data: Vec = match response.status_code() { 200 => response.into(), _ => { error!("Cannot find blob at presign path {s3_path}"); + BlobPending::delete_by_id(pending_blob_id).exec(txn).await?; + info!("Deleted pending blob due to missing presign object in S3"); return Err(Error::FileNotUploaded); } }; diff --git a/deepwell/src/services/blob/structs.rs b/deepwell/src/services/blob/structs.rs index 9abc342e79..c99bc9e052 100644 --- a/deepwell/src/services/blob/structs.rs +++ b/deepwell/src/services/blob/structs.rs @@ -21,6 +21,20 @@ use super::prelude::*; use time::OffsetDateTime; +#[derive(Deserialize, Debug, Clone)] +#[serde(rename_all = "kebab-case")] +pub struct StartBlobUpload { + pub user_id: i64, +} + +#[derive(Serialize, Debug, Clone)] +#[serde(rename_all = "kebab-case")] +pub struct StartBlobUploadOutput { + pub pending_blob_id: String, + pub presign_url: String, + pub expires_at: OffsetDateTime, +} + #[derive(Debug)] pub struct FinalizeBlobUploadOutput { pub hash: BlobHash, @@ -37,6 +51,7 @@ pub struct BlobMetadata { } #[derive(Serialize, Debug, Clone)] +#[serde(rename_all = "kebab-case")] pub struct GetBlobOutput { pub data: Vec, pub mime: String, diff --git a/deepwell/src/services/error.rs b/deepwell/src/services/error.rs index 33c74d2759..9665ffe060 100644 --- a/deepwell/src/services/error.rs +++ b/deepwell/src/services/error.rs @@ -137,6 +137,9 @@ pub enum Error { #[error("Cannot restore a non-deleted filter")] FilterNotDeleted, + #[error("Cannot use blob uploaded by different user")] + BlobWrongUser, + #[error("File name cannot be empty")] FileNameEmpty, @@ -376,6 +379,7 @@ impl Error { Error::MessageBodyTooLong => 4019, Error::MessageNoRecipients => 4020, Error::MessageTooManyRecipients => 4021, + Error::BlobWrongUser => 4022, // 4100 -- Localization Error::LocaleInvalid(_) => 4100, diff --git a/deepwell/src/services/file/service.rs b/deepwell/src/services/file/service.rs index 4b94e88fe6..63a74e5c37 100644 --- a/deepwell/src/services/file/service.rs +++ b/deepwell/src/services/file/service.rs @@ -27,8 +27,8 @@ use crate::models::sea_orm_active_enums::FileRevisionType; use crate::services::blob::{FinalizeBlobUploadOutput, EMPTY_BLOB_HASH, EMPTY_BLOB_MIME}; use crate::services::file_revision::{ CreateFileRevision, CreateFileRevisionBody, CreateFirstFileRevision, - CreatePendingFileRevision, CreateResurrectionFileRevision, - CreateTombstoneFileRevision, FileBlob, FinishFileRevisionUpload, + CreateResurrectionFileRevision, CreateTombstoneFileRevision, FileBlob, + FinishFileRevisionUpload, }; use crate::services::filter::{FilterClass, FilterType}; use crate::services::{BlobService, FileRevisionService, FilterService}; @@ -43,18 +43,19 @@ impl FileService { /// /// In the background, this stores the blob via content addressing, /// meaning that duplicates are not uploaded twice. - pub async fn start_new_upload( + pub async fn create( ctx: &ServiceContext<'_>, - StartFileCreation { + CreateFile { site_id, page_id, name, + pending_blob_id, revision_comments, user_id, licensing, bypass_filter, - }: StartFileCreation, - ) -> Result { + }: CreateFile, + ) -> Result { info!("Creating file with name '{}'", name); let txn = ctx.transaction(); @@ -66,111 +67,43 @@ impl FileService { Self::run_filter(ctx, site_id, Some(&name)).await?; } - // Add pending file - let pending = BlobService::create_upload(ctx).await?; + // Finish blob upload + let FinalizeBlobUploadOutput { + hash: s3_hash, + mime: mime_hint, + size: size_hint, + created: new_blob_created, + } = BlobService::finish_upload(ctx, user_id, &pending_blob_id).await?; // Add new file let model = file::ActiveModel { name: Set(name.clone()), site_id: Set(site_id), page_id: Set(page_id), - pending_blob_id: Set(Some(pending.pending_blob_id)), ..Default::default() }; - let file = model.insert(txn).await?; - let file_revision = FileRevisionService::create_pending( + + FileRevisionService::create_first( ctx, - CreatePendingFileRevision { - site_id, + CreateFirstFileRevision { page_id, + site_id, file_id: file.file_id, user_id, name, + s3_hash, + size_hint, + mime_hint, + new_blob_created, licensing, - comments: revision_comments, - }, - ) - .await?; - - Ok(StartFileCreationOutput { - pending_blob_id: pending.pending_blob_id, - presign_url: pending.presign_url, - file_revision_id: file_revision.file_revision_id, - }) - } - - pub async fn finish_new_upload( - ctx: &ServiceContext<'_>, - FinishFileCreation { - site_id, - page_id, - file_id, - pending_blob_id, - }: FinishFileCreation, - ) -> Result { - info!( - "Finishing new file upload with site ID {} page ID {} file ID {} pending ID {}", - site_id, page_id, file_id, pending_blob_id, - ); - - // Ensure a pending file exists - let txn = ctx.transaction(); - let row = File::find() - .filter( - Condition::all() - .add(file::Column::SiteId.eq(site_id)) - .add(file::Column::PageId.eq(page_id)) - .add(file::Column::FileId.eq(file_id)) - .add(file::Column::DeletedAt.is_null()) - .add(file::Column::PendingBlobId.eq(Some(pending_blob_id))), - ) - .one(txn) - .await?; - - if row.is_none() { - error!("No pending file found"); - return Err(Error::FileNotFound); - } - - // Clear pending_blob column - { - let model = file::ActiveModel { - file_id: Set(file_id), - pending_blob_id: Set(None), - ..Default::default() - }; - model.update(txn).await?; - } - - // Finally, update the first file revision with the uploaded data. - // This gets the data from BlobService and then deletes the row. - FileRevisionService::finish_upload( - ctx, - FinishFileRevisionUpload { - site_id, - page_id, - file_id, - pending_blob_id, + revision_comments, }, ) .await } - /// Edits a file by uploading a new file version. - /// TODO needs to be implemented - pub async fn start_edit_upload(_ctx: &ServiceContext<'_>) -> Result { - todo!() - } - - // TODO - pub async fn finish_edit_upload(_ctx: &ServiceContext<'_>) -> Result { - todo!() - } - /// Edits a file, creating a new revision. - /// - /// Cannot be used to upload a new file version. pub async fn edit( ctx: &ServiceContext<'_>, EditFile { @@ -189,7 +122,11 @@ impl FileService { let last_revision = FileRevisionService::get_latest(ctx, site_id, page_id, file_id).await?; - let EditFileBody { name, licensing } = body; + let EditFileBody { + name, + licensing, + uploaded_blob_id, + } = body; // Verify name change // @@ -219,7 +156,7 @@ impl FileService { page_id, file_id, user_id, - comments: revision_comments, + revision_comments, body: CreateFileRevisionBody { name, licensing, @@ -280,7 +217,7 @@ impl FileService { page_id: current_page_id, file_id, user_id, - comments: revision_comments, + revision_comments, body: CreateFileRevisionBody { page_id: ProvidedValue::Set(destination_page_id), ..Default::default() @@ -449,8 +386,7 @@ impl FileService { .add(condition) .add(file::Column::SiteId.eq(site_id)) .add(file::Column::PageId.eq(page_id)) - .add(file::Column::DeletedAt.is_null()) - .add(file::Column::PendingBlobId.is_null()), + .add(file::Column::DeletedAt.is_null()), ) .one(txn) .await? @@ -485,8 +421,7 @@ impl FileService { Condition::all() .add(file::Column::PageId.eq(page_id)) .add(file::Column::Name.eq(name)) - .add(file::Column::DeletedAt.is_null()) - .add(file::Column::PendingBlobId.is_null()), + .add(file::Column::DeletedAt.is_null()), ) .into_tuple() .one(txn) diff --git a/deepwell/src/services/file/structs.rs b/deepwell/src/services/file/structs.rs index b510dbef2f..1e0a5d8e4e 100644 --- a/deepwell/src/services/file/structs.rs +++ b/deepwell/src/services/file/structs.rs @@ -21,17 +21,17 @@ use crate::models::sea_orm_active_enums::FileRevisionType; use crate::services::file_revision::{ CreateFileRevisionOutput, CreateFirstFileRevisionOutput, - FinishFileRevisionUploadOutput, }; use crate::web::{Bytes, FileDetails, ProvidedValue, Reference}; use serde_json::Value as JsonValue; use time::OffsetDateTime; #[derive(Deserialize, Debug, Clone)] -pub struct StartFileCreation { +pub struct CreateFile { pub site_id: i64, pub page_id: i64, pub name: String, + pub pending_blob_id: String, pub revision_comments: String, pub user_id: i64, pub licensing: JsonValue, // TODO @@ -40,22 +40,7 @@ pub struct StartFileCreation { pub bypass_filter: bool, } -#[derive(Serialize, Debug, Clone)] -pub struct StartFileCreationOutput { - pub pending_blob_id: i64, - pub presign_url: String, - pub file_revision_id: i64, -} - -#[derive(Deserialize, Debug, Clone)] -pub struct FinishFileCreation { - pub site_id: i64, - pub page_id: i64, - pub file_id: i64, - pub pending_blob_id: i64, -} - -pub type FinishFileCreationOutput = FinishFileRevisionUploadOutput; +pub type CreateFileOutput = CreateFirstFileRevisionOutput; #[derive(Deserialize, Debug, Clone)] pub struct GetFile<'a> { @@ -114,6 +99,7 @@ pub struct EditFile { pub struct EditFileBody { pub name: ProvidedValue, pub licensing: ProvidedValue, + pub uploaded_blob_id: ProvidedValue, } pub type EditFileOutput = CreateFileRevisionOutput; diff --git a/deepwell/src/services/file_revision/service.rs b/deepwell/src/services/file_revision/service.rs index 46c5a5fbda..94556dfac9 100644 --- a/deepwell/src/services/file_revision/service.rs +++ b/deepwell/src/services/file_revision/service.rs @@ -59,7 +59,7 @@ impl FileRevisionService { mut page_id, file_id, user_id, - comments, + revision_comments, body, }: CreateFileRevision, previous: FileRevisionModel, @@ -158,7 +158,7 @@ impl FileRevisionService { mime_hint: Set(mime_hint), licensing: Set(licensing), changes: Set(changes), - comments: Set(comments), + comments: Set(revision_comments), hidden: Set(vec![]), ..Default::default() }; @@ -170,37 +170,6 @@ impl FileRevisionService { })) } - /// Creates a dummy first revision for a file pending upload. - pub async fn create_pending( - ctx: &ServiceContext<'_>, - CreatePendingFileRevision { - site_id, - page_id, - file_id, - user_id, - name, - licensing, - comments, - }: CreatePendingFileRevision, - ) -> Result { - FileRevisionService::create_first( - ctx, - CreateFirstFileRevision { - site_id, - page_id, - file_id, - user_id, - name, - s3_hash: EMPTY_BLOB_HASH, - mime_hint: str!(EMPTY_BLOB_MIME), - size_hint: 0, - licensing, - comments, - }, - ) - .await - } - /// Creates the first revision for an already-uploaded file. /// /// See `RevisionService::create_first()`. @@ -215,8 +184,9 @@ impl FileRevisionService { s3_hash, size_hint, mime_hint, + new_blob_created, licensing, - comments, + revision_comments, }: CreateFirstFileRevision, ) -> Result { let txn = ctx.transaction(); @@ -240,7 +210,7 @@ impl FileRevisionService { size_hint: Set(size_hint), licensing: Set(licensing), changes: Set(ALL_CHANGES.clone()), - comments: Set(comments), + comments: Set(revision_comments), hidden: Set(vec![]), ..Default::default() }; @@ -444,47 +414,6 @@ impl FileRevisionService { Ok(revision) } - /// For a pending file, fill in the uploaded data fields. - pub async fn finish_upload( - ctx: &ServiceContext<'_>, - FinishFileRevisionUpload { - site_id, - page_id, - file_id, - pending_blob_id, - }: FinishFileRevisionUpload, - ) -> Result { - let txn = ctx.transaction(); - - // Move upload to final location, get its metadata - let FinalizeBlobUploadOutput { - hash, - mime, - size, - created, - } = BlobService::finish_upload(ctx, pending_blob_id).await?; - - // Get first file revision - let file_revision = - FileRevisionService::get_first(ctx, site_id, page_id, file_id).await?; - - // Update it with uploaded data - let mut model = file_revision.into_active_model(); - model.s3_hash = Set(hash.to_vec()); - model.mime_hint = Set(mime); - model.size_hint = Set(size); - let file_revision = model.update(txn).await?; - - Ok(FinishFileRevisionUploadOutput { - file_id, - file_revision_id: file_revision.revision_id, - s3_hash: Bytes::from(file_revision.s3_hash), - mime_hint: file_revision.mime_hint, - size_hint: file_revision.size_hint, - created, - }) - } - /// Get the first revision for this file. pub async fn get_first( ctx: &ServiceContext<'_>, diff --git a/deepwell/src/services/file_revision/structs.rs b/deepwell/src/services/file_revision/structs.rs index 737f29a6ab..a6bf0fdea1 100644 --- a/deepwell/src/services/file_revision/structs.rs +++ b/deepwell/src/services/file_revision/structs.rs @@ -29,7 +29,7 @@ pub struct CreateFileRevision { pub page_id: i64, pub file_id: i64, pub user_id: i64, - pub comments: String, + pub revision_comments: String, pub body: CreateFileRevisionBody, } @@ -64,19 +64,9 @@ pub struct CreateFirstFileRevision { pub s3_hash: BlobHash, pub size_hint: i64, pub mime_hint: String, + pub new_blob_created: bool, pub licensing: serde_json::Value, - pub comments: String, -} - -#[derive(Debug, Clone)] -pub struct CreatePendingFileRevision { - pub site_id: i64, - pub page_id: i64, - pub file_id: i64, - pub user_id: i64, - pub name: String, - pub licensing: serde_json::Value, - pub comments: String, + pub revision_comments: String, } #[derive(Serialize, Debug, Clone, Default)] From 3f36090b494e61a945d3d858453831770c3cf4f9 Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Sat, 28 Sep 2024 09:53:26 -0400 Subject: [PATCH 40/91] Fix file edit processing. --- deepwell/src/services/file/service.rs | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/deepwell/src/services/file/service.rs b/deepwell/src/services/file/service.rs index 63a74e5c37..10d0bf171a 100644 --- a/deepwell/src/services/file/service.rs +++ b/deepwell/src/services/file/service.rs @@ -140,6 +140,28 @@ impl FileService { } } + // If a new file version was uploaded, then finalize. + // + // Get the blob struct for conditionally adding to + // the CreateFileRevisionBody. + let blob = match uploaded_blob_id { + ProvidedValue::Unset => ProvidedValue::Unset, + ProvidedValue::Set(ref id) => { + let FinalizeBlobUploadOutput { + hash: s3_hash, + mime: mime_hint, + size: size_hint, + created: new_blob_created, + } = BlobService::finish_upload(ctx, user_id, id).await?; + + ProvidedValue::Set(FileBlob { + s3_hash, + mime_hint, + size_hint, + }) + } + }; + // Update file metadata let model = file::ActiveModel { file_id: Set(file_id), @@ -160,6 +182,7 @@ impl FileService { body: CreateFileRevisionBody { name, licensing, + blob, ..Default::default() }, }, From 1d1cd739e0a709068049c646ef7e12b6dff39e5e Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Sat, 28 Sep 2024 22:58:29 -0400 Subject: [PATCH 41/91] Pass out blob_created flag. --- deepwell/src/services/file/service.rs | 7 ++++--- deepwell/src/services/file_revision/service.rs | 8 +++++++- deepwell/src/services/file_revision/structs.rs | 5 ++++- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/deepwell/src/services/file/service.rs b/deepwell/src/services/file/service.rs index 10d0bf171a..50e40571d2 100644 --- a/deepwell/src/services/file/service.rs +++ b/deepwell/src/services/file/service.rs @@ -72,7 +72,7 @@ impl FileService { hash: s3_hash, mime: mime_hint, size: size_hint, - created: new_blob_created, + created: blob_created, } = BlobService::finish_upload(ctx, user_id, &pending_blob_id).await?; // Add new file @@ -95,7 +95,7 @@ impl FileService { s3_hash, size_hint, mime_hint, - new_blob_created, + blob_created, licensing, revision_comments, }, @@ -151,13 +151,14 @@ impl FileService { hash: s3_hash, mime: mime_hint, size: size_hint, - created: new_blob_created, + created: blob_created, } = BlobService::finish_upload(ctx, user_id, id).await?; ProvidedValue::Set(FileBlob { s3_hash, mime_hint, size_hint, + blob_created, }) } }; diff --git a/deepwell/src/services/file_revision/service.rs b/deepwell/src/services/file_revision/service.rs index 94556dfac9..9b44b3c572 100644 --- a/deepwell/src/services/file_revision/service.rs +++ b/deepwell/src/services/file_revision/service.rs @@ -69,6 +69,7 @@ impl FileRevisionService { // Fields to create in the revision let mut changes = Vec::new(); + let mut blob_created = ProvidedValue::Unset; let FileRevisionModel { mut name, mut s3_hash, @@ -106,6 +107,7 @@ impl FileRevisionService { s3_hash = new_blob.s3_hash.to_vec(); size_hint = new_blob.size_hint; mime_hint = new_blob.mime_hint; + blob_created = ProvidedValue::Set(new_blob.blob_created); } } @@ -167,6 +169,7 @@ impl FileRevisionService { Ok(Some(CreateFileRevisionOutput { file_revision_id: revision_id, file_revision_number: revision_number, + blob_created, })) } @@ -184,7 +187,7 @@ impl FileRevisionService { s3_hash, size_hint, mime_hint, - new_blob_created, + blob_created, licensing, revision_comments, }: CreateFirstFileRevision, @@ -219,6 +222,7 @@ impl FileRevisionService { Ok(CreateFirstFileRevisionOutput { file_id, file_revision_id: revision_id, + blob_created, }) } @@ -281,6 +285,7 @@ impl FileRevisionService { Ok(CreateFileRevisionOutput { file_revision_id: revision_id, file_revision_number: revision_number, + blob_created: ProvidedValue::Unset, }) } @@ -368,6 +373,7 @@ impl FileRevisionService { Ok(CreateFileRevisionOutput { file_revision_id: revision_id, file_revision_number: revision_number, + blob_created: ProvidedValue::Unset, }) } diff --git a/deepwell/src/services/file_revision/structs.rs b/deepwell/src/services/file_revision/structs.rs index a6bf0fdea1..db1b73f23b 100644 --- a/deepwell/src/services/file_revision/structs.rs +++ b/deepwell/src/services/file_revision/structs.rs @@ -46,12 +46,14 @@ pub struct FileBlob { pub s3_hash: BlobHash, pub size_hint: i64, pub mime_hint: String, + pub blob_created: bool, } #[derive(Serialize, Debug, Clone, Default)] pub struct CreateFileRevisionOutput { pub file_revision_id: i64, pub file_revision_number: i32, + pub blob_created: ProvidedValue, } #[derive(Debug, Clone)] @@ -64,7 +66,7 @@ pub struct CreateFirstFileRevision { pub s3_hash: BlobHash, pub size_hint: i64, pub mime_hint: String, - pub new_blob_created: bool, + pub blob_created: bool, pub licensing: serde_json::Value, pub revision_comments: String, } @@ -73,6 +75,7 @@ pub struct CreateFirstFileRevision { pub struct CreateFirstFileRevisionOutput { pub file_id: i64, pub file_revision_id: i64, + pub blob_created: bool, } #[derive(Deserialize, Debug, Clone)] From 007de7795849f2c2ec6082493531ea98a1d5a2c4 Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Sat, 28 Sep 2024 23:02:54 -0400 Subject: [PATCH 42/91] Implement file_create() method. --- deepwell/src/endpoints/file.rs | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/deepwell/src/endpoints/file.rs b/deepwell/src/endpoints/file.rs index ab87a4b050..7504d9496f 100644 --- a/deepwell/src/endpoints/file.rs +++ b/deepwell/src/endpoints/file.rs @@ -23,7 +23,7 @@ use crate::models::file::Model as FileModel; use crate::models::file_revision::Model as FileRevisionModel; use crate::services::blob::BlobService; use crate::services::file::{ - DeleteFile, DeleteFileOutput, EditFile, EditFileOutput, GetFileDetails, + CreateFile, CreateFileOutput, DeleteFile, DeleteFileOutput, EditFile, EditFileOutput, GetFileDetails, GetFileOutput, MoveFile, MoveFileOutput, RestoreFile, RestoreFileOutput, }; use crate::services::Result; @@ -59,10 +59,17 @@ pub async fn file_get( } pub async fn file_create( - _ctx: &ServiceContext<'_>, - _params: Params<'static>, -) -> Result<()> { - todo!() + ctx: &ServiceContext<'_>, + params: Params<'static>, +) -> Result { + let input: CreateFile = params.parse()?; + + info!( + "Creating file on page ID {} in site ID {}", + input.page_id, input.site_id, + ); + + FileService::create(ctx, input).await } pub async fn file_edit( From a9132c4ad1430a0356a87069f4b883973d59d6c5 Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Sat, 28 Sep 2024 23:03:36 -0400 Subject: [PATCH 43/91] Remove unused method. This was only used for the bad implementation where we had a weird partial file row in the database. --- .../src/services/file_revision/service.rs | 26 ------------------- 1 file changed, 26 deletions(-) diff --git a/deepwell/src/services/file_revision/service.rs b/deepwell/src/services/file_revision/service.rs index 9b44b3c572..293fef0d58 100644 --- a/deepwell/src/services/file_revision/service.rs +++ b/deepwell/src/services/file_revision/service.rs @@ -420,32 +420,6 @@ impl FileRevisionService { Ok(revision) } - /// Get the first revision for this file. - pub async fn get_first( - ctx: &ServiceContext<'_>, - site_id: i64, - page_id: i64, - file_id: i64, - ) -> Result { - let txn = ctx.transaction(); - let model = FileRevision::find() - .filter( - Condition::all() - .add(file_revision::Column::SiteId.eq(site_id)) - .add(file_revision::Column::PageId.eq(page_id)) - .add(file_revision::Column::FileId.eq(file_id)) - .add(file_revision::Column::RevisionNumber.eq(0)) - .add( - file_revision::Column::RevisionType.eq(FileRevisionType::Create), - ), - ) - .one(txn) - .await? - .ok_or(Error::FileRevisionNotFound)?; - - Ok(model) - } - /// Get the latest revision for this file. /// /// See `RevisionService::get_latest()`. From db5372c3d89dd97abbcf63d3f4a8c9d0a0710d81 Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Sat, 28 Sep 2024 23:04:54 -0400 Subject: [PATCH 44/91] Remove unused structs. Also as part of the old bad implementation. --- deepwell/src/services/file/service.rs | 1 - deepwell/src/services/file_revision/structs.rs | 18 ------------------ 2 files changed, 19 deletions(-) diff --git a/deepwell/src/services/file/service.rs b/deepwell/src/services/file/service.rs index 50e40571d2..944169e7b7 100644 --- a/deepwell/src/services/file/service.rs +++ b/deepwell/src/services/file/service.rs @@ -28,7 +28,6 @@ use crate::services::blob::{FinalizeBlobUploadOutput, EMPTY_BLOB_HASH, EMPTY_BLO use crate::services::file_revision::{ CreateFileRevision, CreateFileRevisionBody, CreateFirstFileRevision, CreateResurrectionFileRevision, CreateTombstoneFileRevision, FileBlob, - FinishFileRevisionUpload, }; use crate::services::filter::{FilterClass, FilterType}; use crate::services::{BlobService, FileRevisionService, FilterService}; diff --git a/deepwell/src/services/file_revision/structs.rs b/deepwell/src/services/file_revision/structs.rs index db1b73f23b..8212eff940 100644 --- a/deepwell/src/services/file_revision/structs.rs +++ b/deepwell/src/services/file_revision/structs.rs @@ -116,24 +116,6 @@ pub struct UpdateFileRevision { pub hidden: Vec, } -#[derive(Deserialize, Debug, Clone)] -pub struct FinishFileRevisionUpload { - pub site_id: i64, - pub page_id: i64, - pub file_id: i64, - pub pending_blob_id: i64, -} - -#[derive(Serialize, Debug, Clone)] -pub struct FinishFileRevisionUploadOutput { - pub file_id: i64, - pub file_revision_id: i64, - pub s3_hash: Bytes<'static>, - pub mime_hint: String, - pub size_hint: i64, - pub created: bool, -} - #[derive(Deserialize, Debug, Clone)] pub struct GetFileRevisionRange { pub page_id: i64, From a63f07d0dade38e0a6cd98b64432c6af76ac158f Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Sat, 28 Sep 2024 23:05:23 -0400 Subject: [PATCH 45/91] Run rustfmt. --- deepwell/src/endpoints/file.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/deepwell/src/endpoints/file.rs b/deepwell/src/endpoints/file.rs index 7504d9496f..cbee032461 100644 --- a/deepwell/src/endpoints/file.rs +++ b/deepwell/src/endpoints/file.rs @@ -23,8 +23,9 @@ use crate::models::file::Model as FileModel; use crate::models::file_revision::Model as FileRevisionModel; use crate::services::blob::BlobService; use crate::services::file::{ - CreateFile, CreateFileOutput, DeleteFile, DeleteFileOutput, EditFile, EditFileOutput, GetFileDetails, - GetFileOutput, MoveFile, MoveFileOutput, RestoreFile, RestoreFileOutput, + CreateFile, CreateFileOutput, DeleteFile, DeleteFileOutput, EditFile, EditFileOutput, + GetFileDetails, GetFileOutput, MoveFile, MoveFileOutput, RestoreFile, + RestoreFileOutput, }; use crate::services::Result; use crate::web::{Bytes, FileDetails}; From 5c3abede9fdc457f316b4c2c98b9063c6e03b1d3 Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Sun, 29 Sep 2024 01:50:25 -0400 Subject: [PATCH 46/91] Don't transform keys or API structs. --- deepwell/src/services/blob/structs.rs | 3 --- deepwell/src/services/email/structs.rs | 1 - 2 files changed, 4 deletions(-) diff --git a/deepwell/src/services/blob/structs.rs b/deepwell/src/services/blob/structs.rs index c99bc9e052..77909c1238 100644 --- a/deepwell/src/services/blob/structs.rs +++ b/deepwell/src/services/blob/structs.rs @@ -22,13 +22,11 @@ use super::prelude::*; use time::OffsetDateTime; #[derive(Deserialize, Debug, Clone)] -#[serde(rename_all = "kebab-case")] pub struct StartBlobUpload { pub user_id: i64, } #[derive(Serialize, Debug, Clone)] -#[serde(rename_all = "kebab-case")] pub struct StartBlobUploadOutput { pub pending_blob_id: String, pub presign_url: String, @@ -51,7 +49,6 @@ pub struct BlobMetadata { } #[derive(Serialize, Debug, Clone)] -#[serde(rename_all = "kebab-case")] pub struct GetBlobOutput { pub data: Vec, pub mime: String, diff --git a/deepwell/src/services/email/structs.rs b/deepwell/src/services/email/structs.rs index f73e275924..37eef2775a 100644 --- a/deepwell/src/services/email/structs.rs +++ b/deepwell/src/services/email/structs.rs @@ -54,7 +54,6 @@ impl Default for EmailValidationOutput { } #[derive(Serialize, Debug, Clone)] -#[serde(rename_all = "kebab-case")] pub enum EmailClassification { Normal, Disposable, From 922f26a38c74dbcf7fa4bdd260551b76410e34f7 Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Sun, 29 Sep 2024 01:52:13 -0400 Subject: [PATCH 47/91] Add created_by column to migration. Not sure how this was missed, I thought it was already here. --- deepwell/migrations/20220906103252_deepwell.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/deepwell/migrations/20220906103252_deepwell.sql b/deepwell/migrations/20220906103252_deepwell.sql index 6d58244c76..98e6455e0f 100644 --- a/deepwell/migrations/20220906103252_deepwell.sql +++ b/deepwell/migrations/20220906103252_deepwell.sql @@ -419,6 +419,7 @@ CREATE TABLE page_vote ( CREATE TABLE blob_pending ( external_id TEXT PRIMARY KEY, created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT now(), + created_by BIGINT NOT NULL REFERENCES "user"(user_id), s3_path TEXT NOT NULL CHECK (length(s3_path) > 1), presign_url TEXT NOT NULL CHECK (length(presign_url) > 1), From 99e84025bbcbf065aad5b89fda7ecb8d3e189c46 Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Sun, 29 Sep 2024 02:01:19 -0400 Subject: [PATCH 48/91] Store explicit expires_at timestamp for easy identification. --- deepwell/migrations/20220906103252_deepwell.sql | 4 +++- deepwell/src/models/blob_pending.rs | 3 ++- deepwell/src/services/blob/service.rs | 13 ++++++++----- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/deepwell/migrations/20220906103252_deepwell.sql b/deepwell/migrations/20220906103252_deepwell.sql index 98e6455e0f..f207ea31dd 100644 --- a/deepwell/migrations/20220906103252_deepwell.sql +++ b/deepwell/migrations/20220906103252_deepwell.sql @@ -418,11 +418,13 @@ CREATE TABLE page_vote ( -- Manages blobs that are being uploaded by the user CREATE TABLE blob_pending ( external_id TEXT PRIMARY KEY, - created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT now(), created_by BIGINT NOT NULL REFERENCES "user"(user_id), + created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT now(), + expires_at TIMESTAMP WITH TIME ZONE NOT NULL, s3_path TEXT NOT NULL CHECK (length(s3_path) > 1), presign_url TEXT NOT NULL CHECK (length(presign_url) > 1), + CHECK (expires_at > created_at), -- expiration time is not in the relative past CHECK (length(external_id) = 24) -- default length for a cuid2 ); diff --git a/deepwell/src/models/blob_pending.rs b/deepwell/src/models/blob_pending.rs index 3bd60f7280..6f08bd316a 100644 --- a/deepwell/src/models/blob_pending.rs +++ b/deepwell/src/models/blob_pending.rs @@ -8,8 +8,9 @@ use serde::{Deserialize, Serialize}; pub struct Model { #[sea_orm(primary_key, auto_increment = false, column_type = "Text")] pub external_id: String, - pub created_at: TimeDateTimeWithTimeZone, pub created_by: i64, + pub created_at: TimeDateTimeWithTimeZone, + pub expires_at: TimeDateTimeWithTimeZone, #[sea_orm(column_type = "Text")] pub s3_path: String, #[sea_orm(column_type = "Text")] diff --git a/deepwell/src/services/blob/service.rs b/deepwell/src/services/blob/service.rs index 0ea6a09eab..e894696b94 100644 --- a/deepwell/src/services/blob/service.rs +++ b/deepwell/src/services/blob/service.rs @@ -99,11 +99,19 @@ impl BlobService { let presign_url = bucket.presign_put(&s3_path, config.presigned_expiry_secs, None)?; + // Get timestamps + let created_at = now(); + let expires_at = created_at + .checked_add(Duration::seconds(i64::from(config.presigned_expiry_secs))) + .expect("getting expiration timestamp overflowed"); + // Add pending blob entry let model = blob_pending::ActiveModel { external_id: Set(pending_blob_id), s3_path: Set(s3_path), presign_url: Set(presign_url), + created_at: Set(created_at), + expires_at: Set(expires_at), created_by: Set(user_id), ..Default::default() }; @@ -111,14 +119,9 @@ impl BlobService { let BlobPendingModel { external_id: pending_blob_id, presign_url, - created_at, .. } = model.insert(txn).await?; - let expires_at = created_at - .checked_add(Duration::seconds(i64::from(config.presigned_expiry_secs))) - .expect("getting expiration timestamp overflowed"); - debug!("New presign upload URL will last until {expires_at}"); Ok(StartBlobUploadOutput { From 873e1a62bf799e546dd8a8cff92ecbf24a6ece1f Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Sun, 29 Sep 2024 12:24:12 -0400 Subject: [PATCH 49/91] Increment file revision number on edit. How did I not catch this before? lol --- deepwell/src/services/file_revision/service.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deepwell/src/services/file_revision/service.rs b/deepwell/src/services/file_revision/service.rs index 293fef0d58..65d5bd52d7 100644 --- a/deepwell/src/services/file_revision/service.rs +++ b/deepwell/src/services/file_revision/service.rs @@ -149,7 +149,7 @@ impl FileRevisionService { // Insert the new revision into the table let model = file_revision::ActiveModel { revision_type: Set(FileRevisionType::Update), - revision_number: Set(0), + revision_number: Set(revision_number), file_id: Set(file_id), page_id: Set(page_id), site_id: Set(site_id), From 5a4cc130a7ffb186f0e6919ddb5786a6efd0e94b Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Sun, 29 Sep 2024 18:53:08 -0400 Subject: [PATCH 50/91] Remove unneded Default fill. --- deepwell/src/services/blob/service.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/deepwell/src/services/blob/service.rs b/deepwell/src/services/blob/service.rs index e894696b94..6da0fe51de 100644 --- a/deepwell/src/services/blob/service.rs +++ b/deepwell/src/services/blob/service.rs @@ -113,7 +113,6 @@ impl BlobService { created_at: Set(created_at), expires_at: Set(expires_at), created_by: Set(user_id), - ..Default::default() }; let BlobPendingModel { From 6de0b30026c88d673f2f19b87f0d9de09904e4fc Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Sun, 29 Sep 2024 20:46:45 -0400 Subject: [PATCH 51/91] Change to uploaded_blob_id in input struct. --- deepwell/src/services/file/service.rs | 4 ++-- deepwell/src/services/file/structs.rs | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/deepwell/src/services/file/service.rs b/deepwell/src/services/file/service.rs index 944169e7b7..efdbf52818 100644 --- a/deepwell/src/services/file/service.rs +++ b/deepwell/src/services/file/service.rs @@ -48,7 +48,7 @@ impl FileService { site_id, page_id, name, - pending_blob_id, + uploaded_blob_id, revision_comments, user_id, licensing, @@ -72,7 +72,7 @@ impl FileService { mime: mime_hint, size: size_hint, created: blob_created, - } = BlobService::finish_upload(ctx, user_id, &pending_blob_id).await?; + } = BlobService::finish_upload(ctx, user_id, &uploaded_blob_id).await?; // Add new file let model = file::ActiveModel { diff --git a/deepwell/src/services/file/structs.rs b/deepwell/src/services/file/structs.rs index 1e0a5d8e4e..8969487426 100644 --- a/deepwell/src/services/file/structs.rs +++ b/deepwell/src/services/file/structs.rs @@ -31,7 +31,7 @@ pub struct CreateFile { pub site_id: i64, pub page_id: i64, pub name: String, - pub pending_blob_id: String, + pub uploaded_blob_id: String, pub revision_comments: String, pub user_id: i64, pub licensing: JsonValue, // TODO From c48eab1bdf1db845a036e987a86d86c305f6e9fd Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Sun, 29 Sep 2024 20:56:19 -0400 Subject: [PATCH 52/91] Move new blob error placement. --- deepwell/src/services/error.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/deepwell/src/services/error.rs b/deepwell/src/services/error.rs index 9665ffe060..97ec4fb2f7 100644 --- a/deepwell/src/services/error.rs +++ b/deepwell/src/services/error.rs @@ -137,9 +137,6 @@ pub enum Error { #[error("Cannot restore a non-deleted filter")] FilterNotDeleted, - #[error("Cannot use blob uploaded by different user")] - BlobWrongUser, - #[error("File name cannot be empty")] FileNameEmpty, @@ -242,6 +239,9 @@ pub enum Error { #[error("Blob item does not exist")] BlobNotFound, + #[error("Cannot use blob uploaded by different user")] + BlobWrongUser, + #[error("Text item does not exist")] TextNotFound, From 495f082a4f0a7cea08e515c4437af2987c92e40d Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Sun, 29 Sep 2024 21:10:58 -0400 Subject: [PATCH 53/91] Add helper for pending blob and check, add cancel_upload(). --- deepwell/src/services/blob/service.rs | 40 ++++++++++++++++++++++----- 1 file changed, 33 insertions(+), 7 deletions(-) diff --git a/deepwell/src/services/blob/service.rs b/deepwell/src/services/blob/service.rs index 6da0fe51de..5e08511edd 100644 --- a/deepwell/src/services/blob/service.rs +++ b/deepwell/src/services/blob/service.rs @@ -18,7 +18,6 @@ * along with this program. If not, see . */ -// TEMP, until https://scuttle.atlassian.net/browse/WJ-1032 #![allow(dead_code)] use super::prelude::*; @@ -130,16 +129,12 @@ impl BlobService { }) } - pub async fn finish_upload( + async fn get_pending_blob_path( ctx: &ServiceContext<'_>, user_id: i64, pending_blob_id: &str, - ) -> Result { - info!("Finishing upload for blob for pending ID {pending_blob_id}"); - let bucket = ctx.s3_bucket(); + ) -> Result { let txn = ctx.transaction(); - - debug!("Getting pending blob info"); let row = BlobPending::find_by_id(pending_blob_id).one(txn).await?; let BlobPendingModel { s3_path, @@ -155,6 +150,37 @@ impl BlobService { return Err(Error::BlobWrongUser); } + Ok(s3_path) + } + + pub async fn cancel_upload( + ctx: &ServiceContext<'_>, + user_id: i64, + pending_blob_id: &str, + ) -> Result<()> { + info!("Cancelling upload for blob for pending ID {pending_blob_id}"); + let bucket = ctx.s3_bucket(); + let txn = ctx.transaction(); + + let s3_path = Self::get_pending_blob_path(ctx, user_id, pending_blob_id).await?; + BlobPending::delete_by_id(pending_blob_id).exec(txn).await?; + bucket.delete_object(&s3_path).await?; + + Ok(()) + } + + pub async fn finish_upload( + ctx: &ServiceContext<'_>, + user_id: i64, + pending_blob_id: &str, + ) -> Result { + info!("Finishing upload for blob for pending ID {pending_blob_id}"); + let bucket = ctx.s3_bucket(); + let txn = ctx.transaction(); + + debug!("Getting pending blob info"); + let s3_path = Self::get_pending_blob_path(ctx, user_id, pending_blob_id).await?; + debug!("Download uploaded blob from S3 uploads to get metadata"); let response = bucket.get_object(&s3_path).await?; let data: Vec = match response.status_code() { From 01344d3217982db6326cb4f96ee57fae15e3f505 Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Sun, 29 Sep 2024 21:15:39 -0400 Subject: [PATCH 54/91] Add blob_cancel API method. --- deepwell/src/api.rs | 1 + deepwell/src/endpoints/blob.rs | 17 ++++++++++++++++- deepwell/src/services/blob/service.rs | 1 + deepwell/src/services/blob/structs.rs | 6 ++++++ 4 files changed, 24 insertions(+), 1 deletion(-) diff --git a/deepwell/src/api.rs b/deepwell/src/api.rs index 90931ba39f..cdc4127122 100644 --- a/deepwell/src/api.rs +++ b/deepwell/src/api.rs @@ -261,6 +261,7 @@ async fn build_module(app_state: ServerState) -> anyhow::Result, + params: Params<'static>, +) -> Result<()> { + info!("Cancelling a pending blob upload"); + + let CancelBlobUpload { + user_id, + pending_blob_id, + } = params.parse()?; + + BlobService::cancel_upload(ctx, user_id, &pending_blob_id).await +} + /// Starts a new upload by creating a pending blob. pub async fn blob_upload( ctx: &ServiceContext<'_>, diff --git a/deepwell/src/services/blob/service.rs b/deepwell/src/services/blob/service.rs index 5e08511edd..ca9720b6d9 100644 --- a/deepwell/src/services/blob/service.rs +++ b/deepwell/src/services/blob/service.rs @@ -164,6 +164,7 @@ impl BlobService { let s3_path = Self::get_pending_blob_path(ctx, user_id, pending_blob_id).await?; BlobPending::delete_by_id(pending_blob_id).exec(txn).await?; + bucket.delete_object(&s3_path).await?; Ok(()) diff --git a/deepwell/src/services/blob/structs.rs b/deepwell/src/services/blob/structs.rs index 77909c1238..51e3005024 100644 --- a/deepwell/src/services/blob/structs.rs +++ b/deepwell/src/services/blob/structs.rs @@ -33,6 +33,12 @@ pub struct StartBlobUploadOutput { pub expires_at: OffsetDateTime, } +#[derive(Deserialize, Debug, Clone)] +pub struct CancelBlobUpload { + pub user_id: i64, + pub pending_blob_id: String, +} + #[derive(Debug)] pub struct FinalizeBlobUploadOutput { pub hash: BlobHash, From 0e76c5300fcf0fc9d6ab4bdc1d3e377a6fca0c8e Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Sun, 29 Sep 2024 21:17:14 -0400 Subject: [PATCH 55/91] Rename argument for head(). --- deepwell/src/services/blob/service.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/deepwell/src/services/blob/service.rs b/deepwell/src/services/blob/service.rs index ca9720b6d9..ff25244e14 100644 --- a/deepwell/src/services/blob/service.rs +++ b/deepwell/src/services/blob/service.rs @@ -378,10 +378,10 @@ impl BlobService { async fn head( ctx: &ServiceContext<'_>, - hex_hash: &str, + path: &str ) -> Result> { let bucket = ctx.s3_bucket(); - let (result, status) = bucket.head_object(hex_hash).await?; + let (result, status) = bucket.head_object(path).await?; match status { 200 | 204 => Ok(Some(result)), From 2619af153be9eea9e825cb95c8f6e3bd16fe5c89 Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Sun, 29 Sep 2024 21:18:28 -0400 Subject: [PATCH 56/91] Only delete from S3 when cancelling if exists. --- deepwell/src/services/blob/service.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/deepwell/src/services/blob/service.rs b/deepwell/src/services/blob/service.rs index ff25244e14..309ac6f575 100644 --- a/deepwell/src/services/blob/service.rs +++ b/deepwell/src/services/blob/service.rs @@ -159,13 +159,14 @@ impl BlobService { pending_blob_id: &str, ) -> Result<()> { info!("Cancelling upload for blob for pending ID {pending_blob_id}"); - let bucket = ctx.s3_bucket(); let txn = ctx.transaction(); - let s3_path = Self::get_pending_blob_path(ctx, user_id, pending_blob_id).await?; BlobPending::delete_by_id(pending_blob_id).exec(txn).await?; - bucket.delete_object(&s3_path).await?; + if Self::head(ctx, &s3_path).await?.is_some() { + let bucket = ctx.s3_bucket(); + bucket.delete_object(&s3_path).await?; + } Ok(()) } @@ -378,7 +379,7 @@ impl BlobService { async fn head( ctx: &ServiceContext<'_>, - path: &str + path: &str, ) -> Result> { let bucket = ctx.s3_bucket(); let (result, status) = bucket.head_object(path).await?; From 3a7097f5def1e540e90a36cac9eee27c03d93308 Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Sun, 29 Sep 2024 21:45:54 -0400 Subject: [PATCH 57/91] Add echo method for testing / documentation. --- deepwell/src/api.rs | 1 + deepwell/src/endpoints/misc.rs | 11 +++++++++++ 2 files changed, 12 insertions(+) diff --git a/deepwell/src/api.rs b/deepwell/src/api.rs index cdc4127122..19606bf5fa 100644 --- a/deepwell/src/api.rs +++ b/deepwell/src/api.rs @@ -174,6 +174,7 @@ async fn build_module(app_state: ServerState) -> anyhow::Result, + params: Params<'static>, +) -> Result { + // Just write out whatever JSON value they put in + let data: JsonValue = params.parse()?; + info!("Got echo request, sending back to caller"); + Ok(data) +} + /// Method which always returns an error. /// For testing. pub async fn yield_error( From 89f69cf39861bb2aaa405a14258296ab60ae9570 Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Sun, 29 Sep 2024 21:55:37 -0400 Subject: [PATCH 58/91] Add instructions for testing requsts and doing uploads. --- deepwell/README.md | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/deepwell/README.md b/deepwell/README.md index b72631beed..3f9c215ac4 100644 --- a/deepwell/README.md +++ b/deepwell/README.md @@ -90,6 +90,36 @@ $ cargo fmt # Ensure code is formatted $ cargo clippy # Check code for lints ``` +### Running requests + +When you have a local instance of DEEPWELL running, probably in the developement `docker-compose` instance, you may want to run requests against it. You can easily accomplish this with a tool like `curl`. The basic format is: + +```sh +$ curl -X POST --json '{"jsonrpc":"2.0","method":"","params":,"id":}' http://localhost:2747/jsonrpc +``` + +Where you pass in the JSONRPC method name and corresponding JSON data. The ID value distinguishes between notices and requests, see the JSONRPC specification for information. + +For instance: + +```sh +$ curl -X POST --json '{"jsonrpc":"2.0","method":"echo","params":{"my":["json","data"]},"id":0}' http://localhost:2747/jsonrpc + +{"jsonrpc":"2.0","id":0,"result":{"my":["json","data"]}} + +$ curl -X POST --json '{"jsonrpc":"2.0","method":"ping","id":0}' http://localhost:2747/jsonrpc + +{"jsonrpc":"2.0","id":0,"result":"Pong!"} +``` + +If you are unfamiliar with JSONRPC, you can read about it [on its website](https://www.jsonrpc.org/specification). For instance, one quirk is that for methods which take a non-list or object argument, you specify it as a list of one element. + +**NOTE:** When you are uploading files to local minio as part of testing file upload flows, **you must leave the URL unmodified**. The host `files` is used as the S3 provider, which is a problem since this is not a valid host on your development machine, which necessitates use of `--connect-to` to tell `curl` to connect to the appropriate location instead: + +```sh +$ curl --connect-to files:9000:localhost:9000 --upload-file +``` + ### Database There are two important directories related to the management of the database (which DEEPWELL can be said to "own"). They are both fairly self-explanatory: From e819c1bad72cdf71ddb0c38fe6c646b1325ee941 Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Sun, 29 Sep 2024 22:13:42 -0400 Subject: [PATCH 59/91] Add test for ProvidedValue serialization as well. Apparently I missed this. Good thing to test for, given that we can return it now for blob_created. --- deepwell/src/web/provided_value.rs | 39 ++++++++++++++++++++++++------ 1 file changed, 31 insertions(+), 8 deletions(-) diff --git a/deepwell/src/web/provided_value.rs b/deepwell/src/web/provided_value.rs index 8b51924063..618753776f 100644 --- a/deepwell/src/web/provided_value.rs +++ b/deepwell/src/web/provided_value.rs @@ -69,16 +69,18 @@ impl From> for Option { } #[test] -fn provided_value_deserialize() { +fn serde() { use serde_json::json; - #[derive(Deserialize, Debug)] + #[derive(Serialize, Deserialize, Debug)] struct Object { #[serde(default)] field: ProvidedValue>, } - macro_rules! check { + // Deserialization + + macro_rules! check_deser { ($value:expr, $expected:expr $(,)?) => {{ let object: Object = serde_json::from_value($value).expect("Unable to deserialize JSON"); @@ -90,10 +92,31 @@ fn provided_value_deserialize() { }}; } - check!(json!({}), ProvidedValue::Unset); - check!(json!({ "field": null }), ProvidedValue::Set(None)); - check!( - json!({"field": "value"}), - ProvidedValue::Set(Some(str!("value"))), + check_deser!(json!({}), ProvidedValue::Unset); + check_deser!(json!({ "field": null }), ProvidedValue::Set(None)); + check_deser!( + json!({"field": "apple"}), + ProvidedValue::Set(Some(str!("apple"))), + ); + + // Serialization + + macro_rules! check_ser { + ($field:expr, $expected:expr $(,)?) => {{ + let object = Object { field: $field }; + let json = serde_json::to_string(&object).expect("Unable to serialize JSON"); + + assert_eq!( + json, $expected, + "Actual generated JSON doesn't match expected", + ); + }}; + } + + check_ser!(ProvidedValue::Unset, "{}"); + check_ser!(ProvidedValue::Set(None), r#"{"field":null}"#); + check_ser!( + ProvidedValue::Set(Some(str!("banana"))), + r#"{"field":"banana"}"#, ); } From 1f79fc1a24345e86f499534ea03528f9ffc247bb Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Sun, 29 Sep 2024 22:32:50 -0400 Subject: [PATCH 60/91] Remove unneeded #[inline] notations. --- deepwell/src/web/provided_value.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/deepwell/src/web/provided_value.rs b/deepwell/src/web/provided_value.rs index 618753776f..6d30d247f3 100644 --- a/deepwell/src/web/provided_value.rs +++ b/deepwell/src/web/provided_value.rs @@ -36,7 +36,6 @@ pub enum ProvidedValue { } impl ProvidedValue { - #[inline] pub fn to_option(&self) -> Option<&T> { match self { ProvidedValue::Set(ref value) => Some(value), @@ -49,7 +48,6 @@ impl ProvidedValue where T: Into, { - #[inline] pub fn into_active_value(self) -> sea_orm::ActiveValue { match self { ProvidedValue::Set(value) => sea_orm::ActiveValue::Set(value), @@ -59,7 +57,6 @@ where } impl From> for Option { - #[inline] fn from(value: ProvidedValue) -> Option { match value { ProvidedValue::Set(value) => Some(value), From 04ad6fad2a1a537411d4b6b3e64e42651d320add Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Sun, 29 Sep 2024 22:33:25 -0400 Subject: [PATCH 61/91] Fix ProvidedValue serialization. --- .../src/services/file_revision/structs.rs | 2 ++ deepwell/src/web/provided_value.rs | 27 ++++++++++++++++++- 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/deepwell/src/services/file_revision/structs.rs b/deepwell/src/services/file_revision/structs.rs index 8212eff940..c56b6f382e 100644 --- a/deepwell/src/services/file_revision/structs.rs +++ b/deepwell/src/services/file_revision/structs.rs @@ -53,6 +53,8 @@ pub struct FileBlob { pub struct CreateFileRevisionOutput { pub file_revision_id: i64, pub file_revision_number: i32, + + #[serde(default, skip_serializing_if = "ProvidedValue::is_unset")] pub blob_created: ProvidedValue, } diff --git a/deepwell/src/web/provided_value.rs b/deepwell/src/web/provided_value.rs index 6d30d247f3..f0ab7350dc 100644 --- a/deepwell/src/web/provided_value.rs +++ b/deepwell/src/web/provided_value.rs @@ -25,6 +25,19 @@ /// it to null (`None`). /// /// The `Unset` variant can only be constructed if the field is absent. +/// +/// ## Notes +/// When serializing or deserializing a field using this enum, you must +/// add the following: +/// ```unchecked +/// #[serde(default, skip_serializing_if = "ProvidedValue::is_unset")] +/// ``` +/// +/// (The `skip_serializing_if` attribute is optional if this is a +/// deserialize-only structure). +/// +/// Otherwise you will get an error mentioning that this enum is impossible +/// to serialize. #[derive(Serialize, Deserialize, Debug, Default, Clone, Hash, PartialEq, Eq)] #[serde(untagged)] pub enum ProvidedValue { @@ -42,6 +55,18 @@ impl ProvidedValue { ProvidedValue::Unset => None, } } + + pub fn is_set(&self) -> bool { + match self { + ProvidedValue::Set(_) => true, + ProvidedValue::Unset => false, + } + } + + #[inline] + pub fn is_unset(&self) -> bool { + !self.is_set() + } } impl ProvidedValue @@ -71,7 +96,7 @@ fn serde() { #[derive(Serialize, Deserialize, Debug)] struct Object { - #[serde(default)] + #[serde(default, skip_serializing_if = "ProvidedValue::is_unset")] field: ProvidedValue>, } From b3b85f463419ffc959285b1e22c4c1c1ceabf736 Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Sun, 29 Sep 2024 22:38:05 -0400 Subject: [PATCH 62/91] Better error for missing pending blob. --- deepwell/src/services/blob/service.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deepwell/src/services/blob/service.rs b/deepwell/src/services/blob/service.rs index 309ac6f575..b55634f2de 100644 --- a/deepwell/src/services/blob/service.rs +++ b/deepwell/src/services/blob/service.rs @@ -142,7 +142,7 @@ impl BlobService { .. } = match row { Some(pending) => pending, - None => return Err(Error::GeneralNotFound), + None => return Err(Error::BlobNotFound), }; if user_id != created_by { From c094229a85fe169a64c1d84adaa96faee72aee3c Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Mon, 30 Sep 2024 01:01:10 -0400 Subject: [PATCH 63/91] Start implementing avatar upload with pending blob pattern. --- deepwell/src/services/user/service.rs | 23 +++++++++++++---------- deepwell/src/services/user/structs.rs | 2 +- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/deepwell/src/services/user/service.rs b/deepwell/src/services/user/service.rs index 18a80135c5..66d451fec5 100644 --- a/deepwell/src/services/user/service.rs +++ b/deepwell/src/services/user/service.rs @@ -421,19 +421,22 @@ impl UserService { model.user_page = Set(user_page); } - if let ProvidedValue::Set(avatar) = input.avatar { - let s3_hash = match avatar { + if let ProvidedValue::Set(uploaded_blob_id) = input.avatar_uploaded_blob_id { + let s3_hash = match uploaded_blob_id { None => None, - Some(blob) => { - // FIXME blob upload - /* - let CreateBlobOutput { hash, .. } = - BlobService::create(ctx, &blob).await?; + Some(uploaded_blob_id) => { + let FinalizeBlobUploadOutput { + hash, + size, + .. + } = BlobService::finish_upload(ctx, user.user_id, &uploaded_blob_id).await?; + + if size > 0 { + // TODO add config setting for max avatar size + } + todo!(); Some(hash.to_vec()) - */ - let _ = blob; - todo!() } }; diff --git a/deepwell/src/services/user/structs.rs b/deepwell/src/services/user/structs.rs index 2bb1c9e1e2..2971721837 100644 --- a/deepwell/src/services/user/structs.rs +++ b/deepwell/src/services/user/structs.rs @@ -73,7 +73,7 @@ pub struct UpdateUserBody { pub email_verified: ProvidedValue, pub password: ProvidedValue, pub locales: ProvidedValue>, - pub avatar: ProvidedValue>>, + pub avatar_uploaded_blob_id: ProvidedValue>, pub real_name: ProvidedValue>, pub gender: ProvidedValue>, pub birthday: ProvidedValue>, From a693c77d0d34a1e1a1893425d7f7d66379797dbe Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Mon, 30 Sep 2024 01:04:08 -0400 Subject: [PATCH 64/91] Amend column for job todo. --- deepwell/src/services/job/structs.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deepwell/src/services/job/structs.rs b/deepwell/src/services/job/structs.rs index 63502ea7f4..c21917108f 100644 --- a/deepwell/src/services/job/structs.rs +++ b/deepwell/src/services/job/structs.rs @@ -28,7 +28,7 @@ pub enum Job { }, PruneSessions, PruneText, - // TODO add job for pruning incomplete uploads (pending_blob table and corresponding column in file table) + // TODO add job for pruning incomplete uploads (pending_blob table and corresponding columns for foreign keys) NameChangeRefill, LiftExpiredPunishments, } From 05803b7b491f6745a1826248ad9af01a8fcb4745 Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Mon, 30 Sep 2024 01:17:39 -0400 Subject: [PATCH 65/91] Add configuration field for maximum avatar size. --- deepwell/config.example.toml | 3 +++ deepwell/src/config/file.rs | 3 +++ deepwell/src/config/object.rs | 3 +++ install/files/dev/deepwell.toml | 1 + install/files/local/deepwell.toml | 1 + install/files/prod/deepwell.toml | 1 + 6 files changed, 12 insertions(+) diff --git a/deepwell/config.example.toml b/deepwell/config.example.toml index 73b4911d1b..99f74ae396 100644 --- a/deepwell/config.example.toml +++ b/deepwell/config.example.toml @@ -371,6 +371,9 @@ presigned-path-length = 32 # The value should only be a few minutes, and no longer than 12 hours. presigned-expiration-minutes = 5 +# The maximum blob size allowed for user avatars, in KiB. +maximum-avatar-size-kb = 250 + [message] # The maximum size of a message's subject line, in bytes. diff --git a/deepwell/src/config/file.rs b/deepwell/src/config/file.rs index b04142336a..ca5ea1e3b0 100644 --- a/deepwell/src/config/file.rs +++ b/deepwell/src/config/file.rs @@ -188,6 +188,7 @@ struct User { struct FileSection { presigned_path_length: usize, presigned_expiration_minutes: u32, + maximum_avatar_size_kb: usize, } #[derive(Serialize, Deserialize, Debug, Clone)] @@ -316,6 +317,7 @@ impl ConfigFile { FileSection { presigned_path_length, presigned_expiration_minutes, + maximum_avatar_size_kb, }, message: Message { @@ -440,6 +442,7 @@ impl ConfigFile { minimum_name_bytes, presigned_path_length, presigned_expiry_secs: presigned_expiration_minutes * 60, + maximum_avatar_size: maximum_avatar_size_kb * 1024, maximum_message_subject_bytes, maximum_message_body_bytes, maximum_message_recipients, diff --git a/deepwell/src/config/object.rs b/deepwell/src/config/object.rs index d042c623b2..e900d5a982 100644 --- a/deepwell/src/config/object.rs +++ b/deepwell/src/config/object.rs @@ -206,6 +206,9 @@ pub struct Config { /// How long S3 presigned URLs will last before expiry. pub presigned_expiry_secs: u32, + /// Maximum size of a user's avatar image. + pub maximum_avatar_size: usize, + /// Maximum size of the subject line allowed in a direct message. pub maximum_message_subject_bytes: usize, diff --git a/install/files/dev/deepwell.toml b/install/files/dev/deepwell.toml index 4024040eb8..65a4949cf8 100644 --- a/install/files/dev/deepwell.toml +++ b/install/files/dev/deepwell.toml @@ -70,6 +70,7 @@ refill-name-change-days = 90 [file] presigned-path-length = 32 presigned-expiration-minutes = 10 +maximum-avatar-size-kb = 100 [message] maximum-subject-bytes = 128 diff --git a/install/files/local/deepwell.toml b/install/files/local/deepwell.toml index b06ad2bb23..273bdce82d 100644 --- a/install/files/local/deepwell.toml +++ b/install/files/local/deepwell.toml @@ -70,6 +70,7 @@ refill-name-change-days = 90 [file] presigned-path-length = 32 presigned-expiration-minutes = 10 +maximum-avatar-size-kb = 4096 [message] maximum-subject-bytes = 128 diff --git a/install/files/prod/deepwell.toml b/install/files/prod/deepwell.toml index 583182474f..e7b7e00cbe 100644 --- a/install/files/prod/deepwell.toml +++ b/install/files/prod/deepwell.toml @@ -70,6 +70,7 @@ refill-name-change-days = 90 [file] presigned-path-length = 32 presigned-expiration-minutes = 5 +maximum-avatar-size-kb = 100 [message] maximum-subject-bytes = 128 From 143c5add2e3c65ba1bbedeec2ddf7a2bf09f7472 Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Mon, 30 Sep 2024 01:20:15 -0400 Subject: [PATCH 66/91] Use new configuration field in avatar update. --- deepwell/src/config/file.rs | 2 +- deepwell/src/config/object.rs | 2 +- deepwell/src/services/user/service.rs | 20 +++++++++++--------- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/deepwell/src/config/file.rs b/deepwell/src/config/file.rs index ca5ea1e3b0..745b6a3002 100644 --- a/deepwell/src/config/file.rs +++ b/deepwell/src/config/file.rs @@ -188,7 +188,7 @@ struct User { struct FileSection { presigned_path_length: usize, presigned_expiration_minutes: u32, - maximum_avatar_size_kb: usize, + maximum_avatar_size_kb: i64, } #[derive(Serialize, Deserialize, Debug, Clone)] diff --git a/deepwell/src/config/object.rs b/deepwell/src/config/object.rs index e900d5a982..172ec9d6fc 100644 --- a/deepwell/src/config/object.rs +++ b/deepwell/src/config/object.rs @@ -207,7 +207,7 @@ pub struct Config { pub presigned_expiry_secs: u32, /// Maximum size of a user's avatar image. - pub maximum_avatar_size: usize, + pub maximum_avatar_size: i64, /// Maximum size of the subject line allowed in a direct message. pub maximum_message_subject_bytes: usize, diff --git a/deepwell/src/services/user/service.rs b/deepwell/src/services/user/service.rs index 66d451fec5..eaa20169c1 100644 --- a/deepwell/src/services/user/service.rs +++ b/deepwell/src/services/user/service.rs @@ -425,16 +425,18 @@ impl UserService { let s3_hash = match uploaded_blob_id { None => None, Some(uploaded_blob_id) => { - let FinalizeBlobUploadOutput { - hash, - size, - .. - } = BlobService::finish_upload(ctx, user.user_id, &uploaded_blob_id).await?; - - if size > 0 { - // TODO add config setting for max avatar size + let config = ctx.config(); + let FinalizeBlobUploadOutput { hash, size, .. } = + BlobService::finish_upload(ctx, user.user_id, &uploaded_blob_id) + .await?; + + if size > config.maximum_avatar_size { + error!( + "Uploaded avatar size is too big {} > {}", + size, config.maximum_avatar_size, + ); + return Err(todo!()); } - todo!(); Some(hash.to_vec()) } From f2cc142c3b92f6fae9ae51d94bdff26ca0dfc270 Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Mon, 30 Sep 2024 01:22:12 -0400 Subject: [PATCH 67/91] Add new error case for blobs being too large. --- deepwell/src/services/error.rs | 4 ++++ deepwell/src/services/user/service.rs | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/deepwell/src/services/error.rs b/deepwell/src/services/error.rs index 97ec4fb2f7..ae8e0075ba 100644 --- a/deepwell/src/services/error.rs +++ b/deepwell/src/services/error.rs @@ -242,6 +242,9 @@ pub enum Error { #[error("Cannot use blob uploaded by different user")] BlobWrongUser, + #[error("Uploaded blob is too big for this operation")] + BlobTooBig, + #[error("Text item does not exist")] TextNotFound, @@ -380,6 +383,7 @@ impl Error { Error::MessageNoRecipients => 4020, Error::MessageTooManyRecipients => 4021, Error::BlobWrongUser => 4022, + Error::BlobTooBig => 4023, // 4100 -- Localization Error::LocaleInvalid(_) => 4100, diff --git a/deepwell/src/services/user/service.rs b/deepwell/src/services/user/service.rs index eaa20169c1..e0c8b54452 100644 --- a/deepwell/src/services/user/service.rs +++ b/deepwell/src/services/user/service.rs @@ -435,7 +435,7 @@ impl UserService { "Uploaded avatar size is too big {} > {}", size, config.maximum_avatar_size, ); - return Err(todo!()); + return Err(Error::BlobTooBig); } Some(hash.to_vec()) From ca16de52e4322fb53edbeb21563e6cd67eed3823 Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Mon, 30 Sep 2024 01:26:22 -0400 Subject: [PATCH 68/91] Remove full BlobService dead_code ignore. --- deepwell/src/services/blob/service.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/deepwell/src/services/blob/service.rs b/deepwell/src/services/blob/service.rs index b55634f2de..c164c76413 100644 --- a/deepwell/src/services/blob/service.rs +++ b/deepwell/src/services/blob/service.rs @@ -18,8 +18,6 @@ * along with this program. If not, see . */ -#![allow(dead_code)] - use super::prelude::*; use crate::models::blob_pending::{ self, Entity as BlobPending, Model as BlobPendingModel, @@ -345,6 +343,7 @@ impl BlobService { find_or_error!(Self::get_metadata_optional(ctx, hash), Blob) } + #[allow(dead_code)] // TEMP pub async fn exists(ctx: &ServiceContext<'_>, hash: &[u8]) -> Result { // Special handling for the empty blob if hash == EMPTY_BLOB_HASH { @@ -391,6 +390,7 @@ impl BlobService { } } + #[allow(dead_code)] // TEMP pub async fn hard_delete(ctx: &ServiceContext<'_>, hash: &[u8]) -> Result<()> { // Special handling for empty blobs // From ee601dfea6b9680f2f4a1c10e56beb01ee95f529 Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Mon, 30 Sep 2024 01:35:22 -0400 Subject: [PATCH 69/91] Add log lines for empty revisions. --- deepwell/src/services/file_revision/service.rs | 1 + deepwell/src/services/page_revision/service.rs | 1 + 2 files changed, 2 insertions(+) diff --git a/deepwell/src/services/file_revision/service.rs b/deepwell/src/services/file_revision/service.rs index 65d5bd52d7..f58eccd4a7 100644 --- a/deepwell/src/services/file_revision/service.rs +++ b/deepwell/src/services/file_revision/service.rs @@ -121,6 +121,7 @@ impl FileRevisionService { // If nothing has changed, then don't create a new revision // Also don't rerender the page, this isn't an edit. if changes.is_empty() { + debug!("No changes in file, performing no action"); return Ok(None); } diff --git a/deepwell/src/services/page_revision/service.rs b/deepwell/src/services/page_revision/service.rs index 5c8b98f3cb..72a9241303 100644 --- a/deepwell/src/services/page_revision/service.rs +++ b/deepwell/src/services/page_revision/service.rs @@ -181,6 +181,7 @@ impl PageRevisionService { // If nothing has changed, then don't create a new revision if changes.is_empty() { + debug!("No changes in edit, only rerendering the page"); Self::rerender(ctx, site_id, page_id, 0).await?; return Ok(None); } From f370f5972c659e2428b57a83a22360e82a51ae89 Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Mon, 30 Sep 2024 01:36:32 -0400 Subject: [PATCH 70/91] Change file length max to constant. --- deepwell/src/services/file_revision/service.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/deepwell/src/services/file_revision/service.rs b/deepwell/src/services/file_revision/service.rs index f58eccd4a7..17b1697f6d 100644 --- a/deepwell/src/services/file_revision/service.rs +++ b/deepwell/src/services/file_revision/service.rs @@ -28,6 +28,8 @@ use crate::web::{Bytes, FetchDirection}; use once_cell::sync::Lazy; use std::num::NonZeroI32; +pub const MAXIMUM_FILE_NAME_LENGTH: usize = 256; + /// The changes for the first revision. /// The first revision is always considered to have changed everything. /// @@ -131,8 +133,12 @@ impl FileRevisionService { return Err(Error::FileNameEmpty); } - if name.len() >= 256 { - error!("File name of invalid length: {}", name.len()); + if name.len() >= MAXIMUM_FILE_NAME_LENGTH { + error!( + "File name of invalid length: {} > {}", + name.len(), + MAXIMUM_FILE_NAME_LENGTH, + ); return Err(Error::FileNameTooLong); } From 528630767380ac290432fdfc30d3a6536808857e Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Mon, 30 Sep 2024 14:03:29 -0400 Subject: [PATCH 71/91] Add column for already-moved blobs. --- deepwell/migrations/20220906103252_deepwell.sql | 6 ++++-- deepwell/src/models/blob_pending.rs | 2 ++ deepwell/src/services/blob/service.rs | 1 + 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/deepwell/migrations/20220906103252_deepwell.sql b/deepwell/migrations/20220906103252_deepwell.sql index f207ea31dd..e88f726e20 100644 --- a/deepwell/migrations/20220906103252_deepwell.sql +++ b/deepwell/migrations/20220906103252_deepwell.sql @@ -422,10 +422,12 @@ CREATE TABLE blob_pending ( created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT now(), expires_at TIMESTAMP WITH TIME ZONE NOT NULL, s3_path TEXT NOT NULL CHECK (length(s3_path) > 1), + s3_hash BYTEA, -- NULL means not yet moved, NOT NULL means deleted from s3_path presign_url TEXT NOT NULL CHECK (length(presign_url) > 1), - CHECK (expires_at > created_at), -- expiration time is not in the relative past - CHECK (length(external_id) = 24) -- default length for a cuid2 + CHECK (expires_at > created_at), -- expiration time is not in the relative past + CHECK (length(external_id) = 24), -- default length for a cuid2 + CHECK (s3_hash IS NULL OR length(s3_hash) = 64) -- SHA-512 hash size, if present ); -- diff --git a/deepwell/src/models/blob_pending.rs b/deepwell/src/models/blob_pending.rs index 6f08bd316a..c5c994d566 100644 --- a/deepwell/src/models/blob_pending.rs +++ b/deepwell/src/models/blob_pending.rs @@ -13,6 +13,8 @@ pub struct Model { pub expires_at: TimeDateTimeWithTimeZone, #[sea_orm(column_type = "Text")] pub s3_path: String, + #[sea_orm(column_type = "VarBinary(StringLen::None)")] + pub s3_hash: Option>, #[sea_orm(column_type = "Text")] pub presign_url: String, } diff --git a/deepwell/src/services/blob/service.rs b/deepwell/src/services/blob/service.rs index c164c76413..b0d8e9f3cd 100644 --- a/deepwell/src/services/blob/service.rs +++ b/deepwell/src/services/blob/service.rs @@ -110,6 +110,7 @@ impl BlobService { created_at: Set(created_at), expires_at: Set(expires_at), created_by: Set(user_id), + ..Default::default() }; let BlobPendingModel { From 3041dc56617ebe72a8e03aaef8f6712fa1174d25 Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Tue, 1 Oct 2024 02:36:57 -0400 Subject: [PATCH 72/91] Add initial support for already-moved pending blob. --- deepwell/src/services/blob/service.rs | 95 +++++++++++++++++++++------ deepwell/src/services/error.rs | 8 +-- 2 files changed, 79 insertions(+), 24 deletions(-) diff --git a/deepwell/src/services/blob/service.rs b/deepwell/src/services/blob/service.rs index b0d8e9f3cd..75da42f6cc 100644 --- a/deepwell/src/services/blob/service.rs +++ b/deepwell/src/services/blob/service.rs @@ -132,11 +132,12 @@ impl BlobService { ctx: &ServiceContext<'_>, user_id: i64, pending_blob_id: &str, - ) -> Result { + ) -> Result { let txn = ctx.transaction(); let row = BlobPending::find_by_id(pending_blob_id).one(txn).await?; let BlobPendingModel { s3_path, + s3_hash, created_by, .. } = match row { @@ -149,7 +150,10 @@ impl BlobService { return Err(Error::BlobWrongUser); } - Ok(s3_path) + Ok(PendingBlob { + s3_path, + moved_hash: s3_hash, + }) } pub async fn cancel_upload( @@ -159,7 +163,9 @@ impl BlobService { ) -> Result<()> { info!("Cancelling upload for blob for pending ID {pending_blob_id}"); let txn = ctx.transaction(); - let s3_path = Self::get_pending_blob_path(ctx, user_id, pending_blob_id).await?; + let PendingBlob { s3_path, .. } = + Self::get_pending_blob_path(ctx, user_id, pending_blob_id).await?; + BlobPending::delete_by_id(pending_blob_id).exec(txn).await?; if Self::head(ctx, &s3_path).await?.is_some() { @@ -170,32 +176,33 @@ impl BlobService { Ok(()) } - pub async fn finish_upload( + /// Helper function to do the actual "move" step of blob finalization. + /// This is where, after uploading to the presign URL, the S3 object is + /// then moved to its permanent location with a hashed name. + async fn move_uploaded( ctx: &ServiceContext<'_>, - user_id: i64, pending_blob_id: &str, + s3_path: &str, ) -> Result { - info!("Finishing upload for blob for pending ID {pending_blob_id}"); let bucket = ctx.s3_bucket(); let txn = ctx.transaction(); - debug!("Getting pending blob info"); - let s3_path = Self::get_pending_blob_path(ctx, user_id, pending_blob_id).await?; - debug!("Download uploaded blob from S3 uploads to get metadata"); - let response = bucket.get_object(&s3_path).await?; + let response = bucket.get_object(s3_path).await?; let data: Vec = match response.status_code() { 200 => response.into(), + 404 => { + error!("No blob uploaded at presign path {s3_path}"); + return Err(Error::BlobNotUploaded); + } _ => { - error!("Cannot find blob at presign path {s3_path}"); - BlobPending::delete_by_id(pending_blob_id).exec(txn).await?; - info!("Deleted pending blob due to missing presign object in S3"); - return Err(Error::FileNotUploaded); + error!("Unable to retrieve uploaded blob at {s3_path} from S3"); + let error = s3_error(&response, "finalizing uploaded blob")?; + return Err(error); } }; - debug!("Deleting pending blob"); - BlobPending::delete_by_id(pending_blob_id).exec(txn).await?; + // TODO compare actual data length to promised length // Special handling for empty blobs if data.is_empty() { @@ -260,18 +267,60 @@ impl BlobService { size, created: true, }), - _ => s3_error(&response, "creating final S3 blob"), + _ => s3_error(&response, "creating final S3 blob")?, } } }; - - // Delete uploaded version, in either case bucket.delete_object(&s3_path).await?; - // Return result based on blob status + // Update pending blob with hash + let model = blob_pending::ActiveModel { + external_id: Set(str!(pending_blob_id)), + s3_hash: Set(Some(hash.to_vec())), + ..Default::default() + }; + model.update(txn).await?; + + // Return result } + pub async fn finish_upload( + ctx: &ServiceContext<'_>, + user_id: i64, + pending_blob_id: &str, + ) -> Result { + info!("Finishing upload for blob for pending ID {pending_blob_id}"); + + let PendingBlob { + s3_path, + moved_hash, + } = Self::get_pending_blob_path(ctx, user_id, pending_blob_id).await?; + + let output = match moved_hash { + // Need to move from pending to main hash area + None => Self::move_uploaded(ctx, pending_blob_id, &s3_path).await?, + + // Already moved + Some(hash_vec) => { + let BlobMetadata { mime, size, .. } = + Self::get_metadata(ctx, &hash_vec).await?; + let mut hash = [0; 64]; + hash.copy_from_slice(&hash_vec); + + FinalizeBlobUploadOutput { + hash, + mime, + size, + created: false, + } + } + }; + + // Return result based on blob status + Ok(output) + } + pub async fn get_optional( ctx: &ServiceContext<'_>, hash: &[u8], @@ -432,3 +481,9 @@ fn s3_error(response: &ResponseData, action: &str) -> Result { // TODO replace with S3 backend-specific error Err(Error::S3Response) } + +#[derive(Debug)] +struct PendingBlob { + s3_path: String, + moved_hash: Option>, +} diff --git a/deepwell/src/services/error.rs b/deepwell/src/services/error.rs index ae8e0075ba..a61bfff937 100644 --- a/deepwell/src/services/error.rs +++ b/deepwell/src/services/error.rs @@ -218,9 +218,6 @@ pub enum Error { #[error("File revision does not exist")] FileRevisionNotFound, - #[error("File not uploaded")] - FileNotUploaded, // occurs when presign URL is not uploaded to - #[error("Vote does not exist")] VoteNotFound, @@ -239,6 +236,9 @@ pub enum Error { #[error("Blob item does not exist")] BlobNotFound, + #[error("Blob not uploaded")] + BlobNotUploaded, + #[error("Cannot use blob uploaded by different user")] BlobWrongUser, @@ -324,7 +324,6 @@ impl Error { Error::MessageDraftNotFound => 2015, Error::BlobNotFound => 2016, Error::TextNotFound => 2017, - Error::FileNotUploaded => 2018, // 2100 -- Existing data Error::UserExists => 2100, @@ -384,6 +383,7 @@ impl Error { Error::MessageTooManyRecipients => 4021, Error::BlobWrongUser => 4022, Error::BlobTooBig => 4023, + Error::BlobNotUploaded => 4024, // 4100 -- Localization Error::LocaleInvalid(_) => 4100, From 16aaf3fa58c9557afa8a77cbddeb98dfb7e55c8a Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Tue, 1 Oct 2024 02:42:41 -0400 Subject: [PATCH 73/91] Add expected_length column to blob_pending. --- deepwell/migrations/20220906103252_deepwell.sql | 1 + deepwell/src/models/blob_pending.rs | 1 + 2 files changed, 2 insertions(+) diff --git a/deepwell/migrations/20220906103252_deepwell.sql b/deepwell/migrations/20220906103252_deepwell.sql index e88f726e20..aecb1981d4 100644 --- a/deepwell/migrations/20220906103252_deepwell.sql +++ b/deepwell/migrations/20220906103252_deepwell.sql @@ -421,6 +421,7 @@ CREATE TABLE blob_pending ( created_by BIGINT NOT NULL REFERENCES "user"(user_id), created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT now(), expires_at TIMESTAMP WITH TIME ZONE NOT NULL, + expected_length BIGINT NOT NULL, s3_path TEXT NOT NULL CHECK (length(s3_path) > 1), s3_hash BYTEA, -- NULL means not yet moved, NOT NULL means deleted from s3_path presign_url TEXT NOT NULL CHECK (length(presign_url) > 1), diff --git a/deepwell/src/models/blob_pending.rs b/deepwell/src/models/blob_pending.rs index c5c994d566..8cd96e2abf 100644 --- a/deepwell/src/models/blob_pending.rs +++ b/deepwell/src/models/blob_pending.rs @@ -11,6 +11,7 @@ pub struct Model { pub created_by: i64, pub created_at: TimeDateTimeWithTimeZone, pub expires_at: TimeDateTimeWithTimeZone, + pub expected_length: i64, #[sea_orm(column_type = "Text")] pub s3_path: String, #[sea_orm(column_type = "VarBinary(StringLen::None)")] From c984abd02bf5e0a386e97ae66e02b67e7d5ed330 Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Thu, 3 Oct 2024 00:25:28 -0400 Subject: [PATCH 74/91] Add expected blob length to database. --- deepwell/src/services/blob/service.rs | 7 +++++-- deepwell/src/services/blob/structs.rs | 1 + 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/deepwell/src/services/blob/service.rs b/deepwell/src/services/blob/service.rs index 75da42f6cc..18c179d38f 100644 --- a/deepwell/src/services/blob/service.rs +++ b/deepwell/src/services/blob/service.rs @@ -66,8 +66,10 @@ impl BlobService { /// The generated presign URL, which can be uploaded to. pub async fn start_upload( ctx: &ServiceContext<'_>, - StartBlobUpload { user_id }: StartBlobUpload, + StartBlobUpload { user_id, blob_size }: StartBlobUpload, ) -> Result { + info!("Creating upload by {user_id} with promised length {blob_size}"); + let config = ctx.config(); let txn = ctx.transaction(); @@ -105,11 +107,12 @@ impl BlobService { // Add pending blob entry let model = blob_pending::ActiveModel { external_id: Set(pending_blob_id), + expected_length: Set(blob_size), s3_path: Set(s3_path), presign_url: Set(presign_url), + created_by: Set(user_id), created_at: Set(created_at), expires_at: Set(expires_at), - created_by: Set(user_id), ..Default::default() }; diff --git a/deepwell/src/services/blob/structs.rs b/deepwell/src/services/blob/structs.rs index 51e3005024..824ab09251 100644 --- a/deepwell/src/services/blob/structs.rs +++ b/deepwell/src/services/blob/structs.rs @@ -24,6 +24,7 @@ use time::OffsetDateTime; #[derive(Deserialize, Debug, Clone)] pub struct StartBlobUpload { pub user_id: i64, + pub blob_size: i64, } #[derive(Serialize, Debug, Clone)] From 2419237a8d60c028e866cdd98c7c89341cc9f60d Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Thu, 3 Oct 2024 00:30:56 -0400 Subject: [PATCH 75/91] Add CHECK constraint for file size. --- deepwell/migrations/20220906103252_deepwell.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deepwell/migrations/20220906103252_deepwell.sql b/deepwell/migrations/20220906103252_deepwell.sql index aecb1981d4..d6ff7f3551 100644 --- a/deepwell/migrations/20220906103252_deepwell.sql +++ b/deepwell/migrations/20220906103252_deepwell.sql @@ -421,7 +421,7 @@ CREATE TABLE blob_pending ( created_by BIGINT NOT NULL REFERENCES "user"(user_id), created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT now(), expires_at TIMESTAMP WITH TIME ZONE NOT NULL, - expected_length BIGINT NOT NULL, + expected_length BIGINT NOT NULL CHECK (expected_length >= 0), s3_path TEXT NOT NULL CHECK (length(s3_path) > 1), s3_hash BYTEA, -- NULL means not yet moved, NOT NULL means deleted from s3_path presign_url TEXT NOT NULL CHECK (length(presign_url) > 1), From 3bdaf5d5c84df6881b544d26bad1ec3f668cfd8d Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Thu, 3 Oct 2024 00:39:55 -0400 Subject: [PATCH 76/91] Check expected file size in finish_upload(). --- deepwell/src/services/blob/service.rs | 14 +++++++++++++- deepwell/src/services/blob/structs.rs | 2 +- deepwell/src/services/error.rs | 4 ++++ 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/deepwell/src/services/blob/service.rs b/deepwell/src/services/blob/service.rs index 18c179d38f..2b392b025e 100644 --- a/deepwell/src/services/blob/service.rs +++ b/deepwell/src/services/blob/service.rs @@ -69,10 +69,12 @@ impl BlobService { StartBlobUpload { user_id, blob_size }: StartBlobUpload, ) -> Result { info!("Creating upload by {user_id} with promised length {blob_size}"); - let config = ctx.config(); let txn = ctx.transaction(); + // Convert expected length integer type + let blob_size = i64::try_from(blob_size).map_err(|_| Error::BlobTooBig)?; + // Generate primary key and random S3 path let pending_blob_id = cuid(); let s3_path = { @@ -142,6 +144,7 @@ impl BlobService { s3_path, s3_hash, created_by, + expected_length, .. } = match row { Some(pending) => pending, @@ -155,6 +158,7 @@ impl BlobService { Ok(PendingBlob { s3_path, + expected_length, moved_hash: s3_hash, }) } @@ -297,6 +301,7 @@ impl BlobService { let PendingBlob { s3_path, + expected_length, moved_hash, } = Self::get_pending_blob_path(ctx, user_id, pending_blob_id).await?; @@ -308,6 +313,12 @@ impl BlobService { Some(hash_vec) => { let BlobMetadata { mime, size, .. } = Self::get_metadata(ctx, &hash_vec).await?; + + if expected_length != size { + error!("Expected blob length of {expected_length} bytes, instead found {size} uploaded"); + return Err(Error::BlobSizeMismatch); + } + let mut hash = [0; 64]; hash.copy_from_slice(&hash_vec); @@ -488,5 +499,6 @@ fn s3_error(response: &ResponseData, action: &str) -> Result { #[derive(Debug)] struct PendingBlob { s3_path: String, + expected_length: i64, moved_hash: Option>, } diff --git a/deepwell/src/services/blob/structs.rs b/deepwell/src/services/blob/structs.rs index 824ab09251..1ff7176ee9 100644 --- a/deepwell/src/services/blob/structs.rs +++ b/deepwell/src/services/blob/structs.rs @@ -24,7 +24,7 @@ use time::OffsetDateTime; #[derive(Deserialize, Debug, Clone)] pub struct StartBlobUpload { pub user_id: i64, - pub blob_size: i64, + pub blob_size: u64, } #[derive(Serialize, Debug, Clone)] diff --git a/deepwell/src/services/error.rs b/deepwell/src/services/error.rs index a61bfff937..1277f274bb 100644 --- a/deepwell/src/services/error.rs +++ b/deepwell/src/services/error.rs @@ -245,6 +245,9 @@ pub enum Error { #[error("Uploaded blob is too big for this operation")] BlobTooBig, + #[error("Uploaded blob does not match expected length")] + BlobSizeMismatch, + #[error("Text item does not exist")] TextNotFound, @@ -384,6 +387,7 @@ impl Error { Error::BlobWrongUser => 4022, Error::BlobTooBig => 4023, Error::BlobNotUploaded => 4024, + Error::BlobSizeMismatch => 4025, // 4100 -- Localization Error::LocaleInvalid(_) => 4100, From a257e71ca2a39181356bb6e7d452bef761fa9ba5 Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Thu, 3 Oct 2024 00:59:32 -0400 Subject: [PATCH 77/91] Add maximum-blob-size-kb field to config. --- deepwell/config.example.toml | 3 +++ install/files/dev/deepwell.toml | 3 ++- install/files/local/deepwell.toml | 3 ++- install/files/prod/deepwell.toml | 3 ++- 4 files changed, 9 insertions(+), 3 deletions(-) diff --git a/deepwell/config.example.toml b/deepwell/config.example.toml index 99f74ae396..65d4befa49 100644 --- a/deepwell/config.example.toml +++ b/deepwell/config.example.toml @@ -371,6 +371,9 @@ presigned-path-length = 32 # The value should only be a few minutes, and no longer than 12 hours. presigned-expiration-minutes = 5 +# The maximum blob size allowed globally, in KiB. +maximum-blob-size-kb = 1_048_576 + # The maximum blob size allowed for user avatars, in KiB. maximum-avatar-size-kb = 250 diff --git a/install/files/dev/deepwell.toml b/install/files/dev/deepwell.toml index 65a4949cf8..b842b4c716 100644 --- a/install/files/dev/deepwell.toml +++ b/install/files/dev/deepwell.toml @@ -70,7 +70,8 @@ refill-name-change-days = 90 [file] presigned-path-length = 32 presigned-expiration-minutes = 10 -maximum-avatar-size-kb = 100 +maximum-blob-size-kb = 1048576 # 1 GiB +maximum-avatar-size-kb = 100 # 100 KiB [message] maximum-subject-bytes = 128 diff --git a/install/files/local/deepwell.toml b/install/files/local/deepwell.toml index 273bdce82d..94d45d9dd7 100644 --- a/install/files/local/deepwell.toml +++ b/install/files/local/deepwell.toml @@ -70,7 +70,8 @@ refill-name-change-days = 90 [file] presigned-path-length = 32 presigned-expiration-minutes = 10 -maximum-avatar-size-kb = 4096 +maximum-blob-size-kb = 1048576 # 1 GiB +maximum-avatar-size-kb = 4096 # 4 MiB [message] maximum-subject-bytes = 128 diff --git a/install/files/prod/deepwell.toml b/install/files/prod/deepwell.toml index e7b7e00cbe..8032e9ed06 100644 --- a/install/files/prod/deepwell.toml +++ b/install/files/prod/deepwell.toml @@ -70,7 +70,8 @@ refill-name-change-days = 90 [file] presigned-path-length = 32 presigned-expiration-minutes = 5 -maximum-avatar-size-kb = 100 +maximum-blob-size-kb = 1048576 # 1 GiB +maximum-avatar-size-kb = 100 # 100 KiB [message] maximum-subject-bytes = 128 From e5e494797c06638911d0d72d3de3ef6b904bb2b5 Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Thu, 3 Oct 2024 01:05:16 -0400 Subject: [PATCH 78/91] Add field to config struct. --- deepwell/src/config/file.rs | 3 +++ deepwell/src/config/object.rs | 3 +++ 2 files changed, 6 insertions(+) diff --git a/deepwell/src/config/file.rs b/deepwell/src/config/file.rs index 745b6a3002..7f357697bc 100644 --- a/deepwell/src/config/file.rs +++ b/deepwell/src/config/file.rs @@ -188,6 +188,7 @@ struct User { struct FileSection { presigned_path_length: usize, presigned_expiration_minutes: u32, + maximum_blob_size_kb: i64, maximum_avatar_size_kb: i64, } @@ -317,6 +318,7 @@ impl ConfigFile { FileSection { presigned_path_length, presigned_expiration_minutes, + maximum_blob_size_kb, maximum_avatar_size_kb, }, message: @@ -442,6 +444,7 @@ impl ConfigFile { minimum_name_bytes, presigned_path_length, presigned_expiry_secs: presigned_expiration_minutes * 60, + maximum_blob_size: maximum_blob_size_kb * 1024, maximum_avatar_size: maximum_avatar_size_kb * 1024, maximum_message_subject_bytes, maximum_message_body_bytes, diff --git a/deepwell/src/config/object.rs b/deepwell/src/config/object.rs index 172ec9d6fc..d92a4ecf2e 100644 --- a/deepwell/src/config/object.rs +++ b/deepwell/src/config/object.rs @@ -206,6 +206,9 @@ pub struct Config { /// How long S3 presigned URLs will last before expiry. pub presigned_expiry_secs: u32, + /// Maximum size of a blob globally. + pub maximum_blob_size: i64, + /// Maximum size of a user's avatar image. pub maximum_avatar_size: i64, From 1e0735f702f4952585747e4a6ef9a6f274a1d3a9 Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Thu, 3 Oct 2024 01:19:21 -0400 Subject: [PATCH 79/91] Move size check. --- deepwell/src/services/blob/service.rs | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/deepwell/src/services/blob/service.rs b/deepwell/src/services/blob/service.rs index 2b392b025e..42b8ce7d79 100644 --- a/deepwell/src/services/blob/service.rs +++ b/deepwell/src/services/blob/service.rs @@ -209,7 +209,10 @@ impl BlobService { } }; - // TODO compare actual data length to promised length + if expected_length != size { + error!("Expected blob length of {expected_length} bytes, instead found {size} uploaded"); + return Err(Error::BlobSizeMismatch); + } // Special handling for empty blobs if data.is_empty() { @@ -314,11 +317,6 @@ impl BlobService { let BlobMetadata { mime, size, .. } = Self::get_metadata(ctx, &hash_vec).await?; - if expected_length != size { - error!("Expected blob length of {expected_length} bytes, instead found {size} uploaded"); - return Err(Error::BlobSizeMismatch); - } - let mut hash = [0; 64]; hash.copy_from_slice(&hash_vec); From 4ff81128f650e1885bd9fc7b1e91742fd49988d1 Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Thu, 3 Oct 2024 01:40:34 -0400 Subject: [PATCH 80/91] Delete blob if found to mismatch. Also clears blobs that are too big, if uploaded. --- deepwell/src/services/blob/service.rs | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/deepwell/src/services/blob/service.rs b/deepwell/src/services/blob/service.rs index 42b8ce7d79..274a3adfdb 100644 --- a/deepwell/src/services/blob/service.rs +++ b/deepwell/src/services/blob/service.rs @@ -190,6 +190,7 @@ impl BlobService { ctx: &ServiceContext<'_>, pending_blob_id: &str, s3_path: &str, + expected_length: usize, ) -> Result { let bucket = ctx.s3_bucket(); let txn = ctx.transaction(); @@ -209,8 +210,13 @@ impl BlobService { } }; - if expected_length != size { - error!("Expected blob length of {expected_length} bytes, instead found {size} uploaded"); + if expected_length != data.len() { + error!( + "Expected blob length of {} bytes, instead found {} uploaded. Deleting pending.", + expected_length, + data.len(), + ); + bucket.delete_object(&s3_path).await?; return Err(Error::BlobSizeMismatch); } @@ -310,13 +316,22 @@ impl BlobService { let output = match moved_hash { // Need to move from pending to main hash area - None => Self::move_uploaded(ctx, pending_blob_id, &s3_path).await?, + None => { + let expected_length = expected_length + .try_into() + .map_err(|_| Error::BlobSizeMismatch)?; + + Self::move_uploaded(ctx, pending_blob_id, &s3_path, expected_length) + .await? + } // Already moved Some(hash_vec) => { let BlobMetadata { mime, size, .. } = Self::get_metadata(ctx, &hash_vec).await?; + debug_assert_eq!(expected_length, size); + let mut hash = [0; 64]; hash.copy_from_slice(&hash_vec); From 2b48ce5909ec0606ca0c926cdf517c564674c3e2 Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Thu, 3 Oct 2024 03:26:41 -0400 Subject: [PATCH 81/91] Add check for blobs that are too large. --- deepwell/src/services/blob/service.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/deepwell/src/services/blob/service.rs b/deepwell/src/services/blob/service.rs index 274a3adfdb..ac5bf71bd2 100644 --- a/deepwell/src/services/blob/service.rs +++ b/deepwell/src/services/blob/service.rs @@ -72,8 +72,16 @@ impl BlobService { let config = ctx.config(); let txn = ctx.transaction(); - // Convert expected length integer type + // Convert expected length integer type, then check it let blob_size = i64::try_from(blob_size).map_err(|_| Error::BlobTooBig)?; + if blob_size > config.maximum_blob_size { + error!( + "Blob proposed to upload is too big ({} > {})", + blob_size, config.maximum_blob_size, + ); + + return Err(Error::BlobTooBig); + } // Generate primary key and random S3 path let pending_blob_id = cuid(); From 5f11d0f480c84a082a3ee50d0ae1203a7bf5c53f Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Thu, 3 Oct 2024 23:52:35 -0400 Subject: [PATCH 82/91] Create request.py helper script. --- deepwell/scripts/request.py | 110 ++++++++++++++++++++++++++++++++++++ 1 file changed, 110 insertions(+) create mode 100755 deepwell/scripts/request.py diff --git a/deepwell/scripts/request.py b/deepwell/scripts/request.py new file mode 100755 index 0000000000..094c0dbd7e --- /dev/null +++ b/deepwell/scripts/request.py @@ -0,0 +1,110 @@ +#!/usr/bin/env python3 + +import argparse +import json +import os +import sys +from pprint import pprint + +import requests + + +def color_settings(value): + match value: + case "auto": + fd = sys.stdout.fileno() + return os.isatty(fd) + case "always": + return True + case "never": + return False + + +def print_data(data): + if isinstance(data, str): + print(data) + else: + pprint(data) + + +def deepwell_request(endpoint, method, data, id=0, color=False): + r = requests.post( + endpoint, + json={ + "jsonrpc": "2.0", + "method": method, + "params": data, + "id": id, + }, + ) + + if color: + green_start = "\x1b[32m" + red_start = "\x1b[31m" + color_end = "\x1b[0m" + else: + green_start = "" + red_start = "" + color_end = "" + + match r.json(): + case {"jsonrpc": "2.0", "id": id, "result": data}: + print(f"{green_start}OK {color_end}", end="") + print_data(data) + return 0 + case {"jsonrpc": "2.0", "id": id, "error": data}: + print(f"{red_start}ERR {color_end}", end="") + print_data(data) + return 1 + + +if __name__ == "__main__": + argparser = argparse.ArgumentParser( + "deepwell-request", + description="Helper script to run DEEPWELL JSONRPC requests", + ) + argparser.add_argument( + "-H", + "--host", + default="localhost", + ) + argparser.add_argument( + "-p", + "--port", + type=int, + default=2747, + ) + argparser.add_argument( + "-s", + "--https", + dest="scheme", + action="store_const", + const="https", + default="http", + ) + argparser.add_argument( + "-I", + "--id", + default=0, + ) + argparser.add_argument( + "-C", + "--color", + choices=["never", "auto", "always"], + default="auto", + ) + argparser.add_argument("method") + argparser.add_argument("data", nargs="?", type=json.loads, default="{}") + args = argparser.parse_args() + enable_color = color_settings(args.color) + + endpoint = f"{args.scheme}://{args.host}:{args.port}/jsonrpc" + exit_code = deepwell_request( + endpoint, + args.method, + args.data, + args.id, + color=enable_color, + ) + + sys.exit(exit_code) From c2bbe374531952eb34c8cb21362ad2cee86a63c7 Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Fri, 4 Oct 2024 00:00:18 -0400 Subject: [PATCH 83/91] Add better multiline formatting. --- deepwell/scripts/request.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/deepwell/scripts/request.py b/deepwell/scripts/request.py index 094c0dbd7e..06ac4506d5 100755 --- a/deepwell/scripts/request.py +++ b/deepwell/scripts/request.py @@ -4,7 +4,7 @@ import json import os import sys -from pprint import pprint +from pprint import pformat import requests @@ -24,7 +24,10 @@ def print_data(data): if isinstance(data, str): print(data) else: - pprint(data) + output = pformat(data) + if "\n" in output: + print() + print(output) def deepwell_request(endpoint, method, data, id=0, color=False): From 007c022100a561729e4b378104e50bab1f5b5b7f Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Fri, 4 Oct 2024 00:00:48 -0400 Subject: [PATCH 84/91] Add example usage of new request.py script. --- deepwell/README.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/deepwell/README.md b/deepwell/README.md index 3f9c215ac4..09fcfc3a8b 100644 --- a/deepwell/README.md +++ b/deepwell/README.md @@ -114,6 +114,24 @@ $ curl -X POST --json '{"jsonrpc":"2.0","method":"ping","id":0}' http://localhos If you are unfamiliar with JSONRPC, you can read about it [on its website](https://www.jsonrpc.org/specification). For instance, one quirk is that for methods which take a non-list or object argument, you specify it as a list of one element. +There is also a helper script to assist with making JSONRPC requests, `scripts/request.py`. It requires the popular [`requests`](https://requests.readthedocs.io/) library to be installed. + +Example usage: + +```sh +$ scripts/request.py echo '{ "my": ["json","data"] }' +OK {'my': ['json', 'data']} + +$ scripts/request.py ping +OK Pong! + +$ scripts/request.py error +ERR +{'code': 4000, + 'data': None, + 'message': 'The request is in some way malformed or incorrect'} +``` + **NOTE:** When you are uploading files to local minio as part of testing file upload flows, **you must leave the URL unmodified**. The host `files` is used as the S3 provider, which is a problem since this is not a valid host on your development machine, which necessitates use of `--connect-to` to tell `curl` to connect to the appropriate location instead: ```sh From a85208119a71480b1cbce15cbb528567b836b5e4 Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Sun, 6 Oct 2024 17:38:55 -0400 Subject: [PATCH 85/91] Add script for uploading files to local S3. --- deepwell/scripts/upload.sh | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100755 deepwell/scripts/upload.sh diff --git a/deepwell/scripts/upload.sh b/deepwell/scripts/upload.sh new file mode 100755 index 0000000000..375a6a80f7 --- /dev/null +++ b/deepwell/scripts/upload.sh @@ -0,0 +1,27 @@ +#!/bin/bash +set -eu + +# +# Helper script to upload files to a local S3 store for testing upload flows. +# + +if [[ $# -ne 2 ]]; then + echo >&2 "Usage: $0 " + exit 1 +fi + +# Allow either order of arguments, for convenience. +# If it starts with HTTP or HTTPS, we assume it's the presign URL. +if [[ $1 = http:* || $1 = https:* ]]; then + path="$2" + url="$1" +else + path="$1" + url="$2" +fi + +exec \ + curl \ + --connect-to 'files:9000:localhost:9000' \ + --upload-file "$path" \ + "$url" From b443290b34ab6e598e8f0a88022cf81c1c4a21e0 Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Sun, 6 Oct 2024 17:39:47 -0400 Subject: [PATCH 86/91] Mention upload script in README. --- deepwell/README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/deepwell/README.md b/deepwell/README.md index 09fcfc3a8b..e89034a279 100644 --- a/deepwell/README.md +++ b/deepwell/README.md @@ -138,6 +138,12 @@ ERR $ curl --connect-to files:9000:localhost:9000 --upload-file ``` +Alternatively, you can use the helper script: + +```sh +$ scripts/upload.sh +``` + ### Database There are two important directories related to the management of the database (which DEEPWELL can be said to "own"). They are both fairly self-explanatory: From 844fcd57fb954c43f264a2a7318deb452bd1d8b2 Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Sun, 6 Oct 2024 18:00:07 -0400 Subject: [PATCH 87/91] Add wrapper for move_uploaded(), separate transaction. --- deepwell/src/services/blob/service.rs | 31 +++++++++++++++++++++++++++ deepwell/src/services/context.rs | 5 +++++ 2 files changed, 36 insertions(+) diff --git a/deepwell/src/services/blob/service.rs b/deepwell/src/services/blob/service.rs index ac5bf71bd2..6212061c5b 100644 --- a/deepwell/src/services/blob/service.rs +++ b/deepwell/src/services/blob/service.rs @@ -28,7 +28,9 @@ use rand::distributions::{Alphanumeric, DistString}; use rand::thread_rng; use s3::request_trait::ResponseData; use s3::serde_types::HeadObjectResult; +use sea_orm::TransactionTrait; use std::str; +use std::sync::Arc; use time::format_description::well_known::Rfc2822; use time::{Duration, OffsetDateTime}; @@ -194,11 +196,40 @@ impl BlobService { /// Helper function to do the actual "move" step of blob finalization. /// This is where, after uploading to the presign URL, the S3 object is /// then moved to its permanent location with a hashed name. + /// + /// NOTE: Because S3 changes cannot be rolled back on error, we are + /// creating a separate transaction here so that `blob_pending` + /// changes are persistent even if the outer request fails. async fn move_uploaded( ctx: &ServiceContext<'_>, pending_blob_id: &str, s3_path: &str, expected_length: usize, + ) -> Result { + let state = ctx.state(); + let db_state = Arc::clone(&state); + + // Produce temporary context in a new transaction + let txn = db_state.database.begin().await?; + let inner_ctx = ServiceContext::new(&state, &txn); + let result = Self::move_uploaded_inner( + &inner_ctx, + pending_blob_id, + s3_path, + expected_length, + ) + .await; + + // Commit separate transaction, recording a move (if it occurred) + txn.commit().await?; + result + } + + async fn move_uploaded_inner( + ctx: &ServiceContext<'_>, + pending_blob_id: &str, + s3_path: &str, + expected_length: usize, ) -> Result { let bucket = ctx.s3_bucket(); let txn = ctx.transaction(); diff --git a/deepwell/src/services/context.rs b/deepwell/src/services/context.rs index 05fa2250e0..cdb55d4a48 100644 --- a/deepwell/src/services/context.rs +++ b/deepwell/src/services/context.rs @@ -48,6 +48,11 @@ impl<'txn> ServiceContext<'txn> { } // Getters + #[inline] + pub fn state(&self) -> ServerState { + Arc::clone(&self.state) + } + #[inline] pub fn config(&self) -> &Config { &self.state.config From e925585b039994e54d8fc3c555e8bc88197a746f Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Sun, 6 Oct 2024 18:07:03 -0400 Subject: [PATCH 88/91] Print JSON as JSON. Introducing Python-isms like capitalized bool values and single-quoted strings is not going to be helpful when working with Rust. --- deepwell/scripts/request.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/deepwell/scripts/request.py b/deepwell/scripts/request.py index 06ac4506d5..4b50abadc4 100755 --- a/deepwell/scripts/request.py +++ b/deepwell/scripts/request.py @@ -4,7 +4,6 @@ import json import os import sys -from pprint import pformat import requests @@ -24,10 +23,7 @@ def print_data(data): if isinstance(data, str): print(data) else: - output = pformat(data) - if "\n" in output: - print() - print(output) + print(json.dumps(data, indent=4)) def deepwell_request(endpoint, method, data, id=0, color=False): From e555f7ca689507c85d2b63561c66d184c40fab4e Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Sun, 6 Oct 2024 18:08:43 -0400 Subject: [PATCH 89/91] Add logic for inline JSON responses. --- deepwell/scripts/request.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/deepwell/scripts/request.py b/deepwell/scripts/request.py index 4b50abadc4..3cade6047a 100755 --- a/deepwell/scripts/request.py +++ b/deepwell/scripts/request.py @@ -23,7 +23,11 @@ def print_data(data): if isinstance(data, str): print(data) else: - print(json.dumps(data, indent=4)) + # Only print on multiple lines if it's "large" + output = json.dumps(data) + if len(output) > 16: + output = json.dumps(data, indent=4) + print(output) def deepwell_request(endpoint, method, data, id=0, color=False): From ec9fe3b0ec7362ca6ec3f5d1c1d8e39247138d23 Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Sun, 6 Oct 2024 18:09:40 -0400 Subject: [PATCH 90/91] Start JSON data on a separate line. --- deepwell/scripts/request.py | 1 + 1 file changed, 1 insertion(+) diff --git a/deepwell/scripts/request.py b/deepwell/scripts/request.py index 3cade6047a..0accb2dd90 100755 --- a/deepwell/scripts/request.py +++ b/deepwell/scripts/request.py @@ -27,6 +27,7 @@ def print_data(data): output = json.dumps(data) if len(output) > 16: output = json.dumps(data, indent=4) + print() print(output) From d57b0b4991406c97c6b923cd2910e69cd6a63847 Mon Sep 17 00:00:00 2001 From: Emmie Maeda Date: Sun, 6 Oct 2024 22:00:51 -0400 Subject: [PATCH 91/91] Bump deepwell version to v2024.10.6 --- deepwell/Cargo.lock | 2 +- deepwell/Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/deepwell/Cargo.lock b/deepwell/Cargo.lock index a8eaf09ed5..0c09c692e9 100644 --- a/deepwell/Cargo.lock +++ b/deepwell/Cargo.lock @@ -690,7 +690,7 @@ dependencies = [ [[package]] name = "deepwell" -version = "2024.9.14" +version = "2024.10.6" dependencies = [ "anyhow", "argon2", diff --git a/deepwell/Cargo.toml b/deepwell/Cargo.toml index 3e72bbb702..1414eab3f8 100644 --- a/deepwell/Cargo.toml +++ b/deepwell/Cargo.toml @@ -8,7 +8,7 @@ keywords = ["wikijump", "api", "backend", "wiki"] categories = ["asynchronous", "database", "web-programming::http-server"] exclude = [".gitignore", ".editorconfig"] -version = "2024.9.14" +version = "2024.10.6" authors = ["Emmie Smith "] edition = "2021"