From c270e1802f55f957c3d5fd54ecc862afb527e47e Mon Sep 17 00:00:00 2001 From: Val Hendrix Date: Thu, 19 Dec 2024 13:54:16 -0800 Subject: [PATCH] feat: implements batch file upload with promises - Updated `fetchBatchSize` in `DataPackage.js` to replace `batchModeValue`. - Added `uploadBatchSize` configuration in `AppModel.js`. - Implemented `uploadFilesInBatch` method in `DataItemView.js` to handle batch file uploads using promises. - Ensured that the `_.each` loop completes before proceeding to batch processing. Closes NCEAS#2224 --- src/js/collections/DataPackage.js | 179 +++++++++++++++--------------- src/js/models/AppModel.js | 23 +++- src/js/views/DataItemView.js | 133 ++++++++++++++++------ 3 files changed, 212 insertions(+), 123 deletions(-) diff --git a/src/js/collections/DataPackage.js b/src/js/collections/DataPackage.js index 6ab0895a6..94e381d8a 100644 --- a/src/js/collections/DataPackage.js +++ b/src/js/collections/DataPackage.js @@ -404,106 +404,107 @@ define(['jquery', 'underscore', 'backbone', 'rdflib', "uuid", "md5", * @param {number} [batchSize=10] - The number of models to fetch in each batch. * @param {number} [timeout=5000] - The timeout for each fetch request in milliseconds. * @param {number} [maxRetries=3] - The maximum number of retries for each fetch request. + * @since 0.0.0 */ fetchMemberModels(models, index = 0, batchSize = 10, timeout = 5000, maxRetries = 3) { - // Update the number of file metadata items being loaded - this.packageModel.set("numLoadingFileMetadata", models.length - index); + // Update the number of file metadata items being loaded + this.packageModel.set("numLoadingFileMetadata", models.length - index); - // If the index is greater than or equal to the length of the models array, stop fetching - if (index >= models.length) { - this.triggerComplete(); - return; - } + // If the index is greater than or equal to the length of the models array, stop fetching + if (index >= models.length) { + this.triggerComplete(); + return; + } - // If batchSize is 0, set it to the total number of models - if (batchSize == 0) batchSize = models.length; - - const collection = this; - // Slice the models array to get the current batch - const batch = models.slice(index, index + batchSize); - - // Create an array of promises for fetching each model in the batch - const fetchPromises = batch.map((memberModel) => { - return new Promise((resolve, reject) => { - const attemptFetch = (retriesLeft) => { - // Create a promise for the fetch request - const fetchPromise = new Promise((fetchResolve, fetchReject) => { - memberModel.fetch({ - success: () => { - // Once the model is synced, handle the response - memberModel.once("sync", (oldModel) => { - const newModel = collection.getMember(oldModel); - - // If the type of the old model is different from the new model - if (oldModel.type != newModel.type) { - if (newModel.type == "DataPackage") { - // If the new model is a DataPackage, replace the old model with the new one - oldModel.trigger("replace", newModel); - fetchResolve(); - } else { - // Otherwise, fetch the new model and replace the old model with the new one - newModel.set("synced", false); - newModel.fetch(); - newModel.once("sync", (fetchedModel) => { - fetchedModel.set("synced", true); - collection.remove(oldModel); - collection.add(fetchedModel); - oldModel.trigger("replace", newModel); + // If batchSize is 0, set it to the total number of models + if (batchSize == 0) batchSize = models.length; + + const collection = this; + // Slice the models array to get the current batch + const batch = models.slice(index, index + batchSize); + + // Create an array of promises for fetching each model in the batch + const fetchPromises = batch.map((memberModel) => { + return new Promise((resolve, reject) => { + const attemptFetch = (retriesLeft) => { + // Create a promise for the fetch request + const fetchPromise = new Promise((fetchResolve, fetchReject) => { + memberModel.fetch({ + success: () => { + // Once the model is synced, handle the response + memberModel.once("sync", (oldModel) => { + const newModel = collection.getMember(oldModel); + + // If the type of the old model is different from the new model + if (oldModel.type != newModel.type) { + if (newModel.type == "DataPackage") { + // If the new model is a DataPackage, replace the old model with the new one + oldModel.trigger("replace", newModel); + fetchResolve(); + } else { + // Otherwise, fetch the new model and replace the old model with the new one + newModel.set("synced", false); + newModel.fetch(); + newModel.once("sync", (fetchedModel) => { + fetchedModel.set("synced", true); + collection.remove(oldModel); + collection.add(fetchedModel); + oldModel.trigger("replace", newModel); + if (newModel.type == "EML") collection.trigger("add:EML"); + fetchResolve(); + }); + } + } else { + // If the type of the old model is the same as the new model, merge the new model into the collection + newModel.set("synced", true); + collection.add(newModel, { merge: true }); if (newModel.type == "EML") collection.trigger("add:EML"); fetchResolve(); - }); - } + } + }); + }, + error: (model, response) => fetchReject(new Error(response.statusText)) + }); + }); + + // Create a promise for the timeout + const timeoutPromise = new Promise((_, timeoutReject) => { + setTimeout(() => timeoutReject(new Error("Fetch timed out")), timeout); + }); + + // Race the fetch promise against the timeout promise + Promise.race([fetchPromise, timeoutPromise]) + .then(resolve) + .catch((error) => { + if (retriesLeft > 0) { + // Retry the fetch if there are retries left + console.warn(`Retrying fetch for model: ${memberModel.id}, retries left: ${retriesLeft}, error: ${error}`); + attemptFetch(retriesLeft - 1); } else { - // If the type of the old model is the same as the new model, merge the new model into the collection - newModel.set("synced", true); - collection.add(newModel, { merge: true }); - if (newModel.type == "EML") collection.trigger("add:EML"); - fetchResolve(); + // Reject the promise if all retries are exhausted + console.error(`Failed to fetch model: ${memberModel.id} after ${maxRetries} retries, error: ${error}`); + reject(error); } }); - }, - error: (model, response) => fetchReject(new Error(response.statusText)) - }); - }); + }; - // Create a promise for the timeout - const timeoutPromise = new Promise((_, timeoutReject) => { - setTimeout(() => timeoutReject(new Error("Fetch timed out")), timeout); + // Start the fetch attempt with the maximum number of retries + attemptFetch(maxRetries); }); + }); - // Race the fetch promise against the timeout promise - Promise.race([fetchPromise, timeoutPromise]) - .then(resolve) - .catch((error) => { - if (retriesLeft > 0) { - // Retry the fetch if there are retries left - console.warn(`Retrying fetch for model: ${memberModel.id}, retries left: ${retriesLeft}, error: ${error}`); - attemptFetch(retriesLeft - 1); - } else { - // Reject the promise if all retries are exhausted - console.error(`Failed to fetch model: ${memberModel.id} after ${maxRetries} retries, error: ${error}`); - reject(error); - } - }); - }; - - // Start the fetch attempt with the maximum number of retries - attemptFetch(maxRetries); - }); - }); - - // Once all fetch promises are resolved, fetch the next batch - Promise.allSettled(fetchPromises).then((results) => { - const errors = results.filter(result => result.status === "rejected"); - if (errors.length > 0) { - console.error("Error fetching member models:", errors); - } - // Fetch the next batch of models - this.fetchMemberModels.call(collection, models, index + batchSize, batchSize, timeout, maxRetries); - }).catch((error) => { - console.error("Error fetching member models:", error); - }); - }, + // Once all fetch promises are resolved, fetch the next batch + Promise.allSettled(fetchPromises).then((results) => { + const errors = results.filter(result => result.status === "rejected"); + if (errors.length > 0) { + console.error("Error fetching member models:", errors); + } + // Fetch the next batch of models + this.fetchMemberModels.call(collection, models, index + batchSize, batchSize, timeout, maxRetries); + }).catch((error) => { + console.error("Error fetching member models:", error); + }); + }, /** * Overload fetch calls for a DataPackage @@ -685,7 +686,7 @@ define(['jquery', 'underscore', 'backbone', 'rdflib', "uuid", "md5", //Don't fetch each member model if the fetchModels property on this Collection is set to false if( this.fetchModels !== false ){ // Start fetching member models - this.fetchMemberModels.call(this, models, 0, MetacatUI.appModel.get("batchModeValue")); + this.fetchMemberModels.call(this, models, 0, MetacatUI.appModel.get("batchSizeFetch")); } } catch (error) { diff --git a/src/js/models/AppModel.js b/src/js/models/AppModel.js index 392a33f60..94b0cf38c 100644 --- a/src/js/models/AppModel.js +++ b/src/js/models/AppModel.js @@ -1998,7 +1998,7 @@ define(['jquery', 'underscore', 'backbone'], packageFormat: 'application%2Fbagit-1.0', /** - * Whether to batch requests to the DataONE API. This is an experimental feature + * Whether to batch fetch requests from the DataONE API. This is an experimental feature * and should be used with caution. If set to a number greater than 0, MetacatUI will * batch requests to the DataONE API and send them in groups of this size. This can * improve performance when making many requests to the DataONE API, but can also @@ -2011,8 +2011,27 @@ define(['jquery', 'underscore', 'backbone'], * @type {number} * @default 0 * @example 20 + * @since 0.0.0 */ - batchModeValue: 0, + batchSizeFetch: 0, + + /** + * Whether to batch uploads to the DataONE API. This is an experimental feature + * and should be used with caution. If set to a number greater than 0, MetacatUI will + * batch uploads to the DataONE API and send them in groups of this size. This can + * improve performance when uploading many files to the DataONE API, but can also + * cause issues if the requests are too large or if the DataONE API is not able to + * handle the batched requests. + * + * Currently, this feature is only used in the DataPackageModel when uploading files + * to the DataONE API. + * + * @type {number} + * @default 0 + * @example 20 + * @since 0.0.0 + */ + batchSizeUpload: 0 }, MetacatUI.AppConfig), defaultView: "data", diff --git a/src/js/views/DataItemView.js b/src/js/views/DataItemView.js index d8f5f6265..ee52ea49d 100644 --- a/src/js/views/DataItemView.js +++ b/src/js/views/DataItemView.js @@ -505,6 +505,78 @@ define(['underscore', 'jquery', 'backbone', 'models/DataONEObject', }, + /** + * Method to handle batch file uploads. + * This method processes files independently to avoid being slowed down by large files. + * + * @param {FileList} fileList - The list of files to be uploaded. + * @param {number} [batchSize=10] - The number of files to upload concurrently. + * @since 0.0.0 + */ + uploadFilesInBatch(fileList, batchSize = 10) { + const view = this; + let currentIndex = 0; // Index of the current file being processed + let activeUploads = 0; // Counter for the number of active uploads + // If batchSize is 0, set it to the total number of files + if (batchSize == 0) batchSize = fileList.length; + /** + * Function to upload the next file in the list. + * This function is called recursively to ensure that the number of concurrent uploads + * does not exceed the batch size. + */ + function uploadNextFile() { + // If all files have been processed, return + if (currentIndex >= fileList.length) { + return; + } + // If the number of active uploads is less than the batch size, start a new upload + if (activeUploads < batchSize) { + const dataONEObject = fileList[currentIndex]; + currentIndex++; // Move to the next file + activeUploads++; // Increment the active uploads counter + // Create a new Promise to handle the file upload + new Promise((resolve, reject) => { + // If the file needs to be uploaded and its checksum is not calculated + if (dataONEObject.get("uploadFile") && !dataONEObject.get("checksum")) { + // Stop listening to previous checksumCalculated events + dataONEObject.stopListening(dataONEObject, "checksumCalculated"); + // Listen to the checksumCalculated event to start the upload + dataONEObject.listenToOnce(dataONEObject, "checksumCalculated", () => { + dataONEObject.save(); // Save the file + // Listen to changes in the uploadStatus to resolve the Promise + dataONEObject.listenTo(dataONEObject, "change:uploadStatus", () => { + if (dataONEObject.get("uploadStatus") !== "p" && dataONEObject.get("uploadStatus") !== "q" && dataONEObject.get("uploadStatus") !== "l") { + resolve(); // Resolve the Promise when the upload is complete + } + }); + }); + try { + dataONEObject.calculateChecksum(); // Calculate the checksum + } catch (exception) { + reject(exception); // Reject the Promise if an error occurs + } + } else { + resolve(); // Resolve the Promise if the file does not need to be uploaded + } + }) + .then(() => { + activeUploads--; // Decrement the active uploads counter + uploadNextFile(); // Start the next file upload + }) + .catch((error) => { + console.error("Error uploading file:", error); + activeUploads--; // Decrement the active uploads counter + uploadNextFile(); // Start the next file upload + }); + uploadNextFile(); // Start the next file upload + } + } + // Start the initial batch of uploads + for (let i = 0; i < batchSize; i++) { + uploadNextFile(); + } + }, + /** With a file list from the file picker or drag and drop, add the files to the collection @@ -536,49 +608,46 @@ define(['underscore', 'jquery', 'backbone', 'models/DataONEObject', if ( typeof event.delegateTarget.dataset.id !== "undefined" ) { this.parentSciMeta = this.getParentScienceMetadata(event); this.collection = this.getParentDataPackage(event); - - // Read each file, and make a DataONEObject - _.each(fileList, function(file) { - + // Queue the files for upload + const queueFilesPromise = new Promise((resolve) => { + _.each( + fileList, + function (file) { var uploadStatus = "l", - errorMessage = ""; + errorMessage = ""; - if( file.size == 0 ){ + if (file.size == 0) { uploadStatus = "e"; - errorMessage = "This is an empty file. It won't be included in the dataset."; + errorMessage = + "This is an empty file. It won't be included in the dataset."; } var dataONEObject = new DataONEObject({ - synced: true, - type: "Data", - fileName: file.name, - size: file.size, - mediaType: file.type, - uploadFile: file, - uploadStatus: uploadStatus, - errorMessage: errorMessage, - isDocumentedBy: [this.parentSciMeta.id], - isDocumentedByModels: [this.parentSciMeta], - resourceMap: [this.collection.packageModel.id] + synced: true, + type: "Data", + fileName: file.name, + size: file.size, + mediaType: file.type, + uploadFile: file, + uploadStatus: uploadStatus, + errorMessage: errorMessage, + isDocumentedBy: [this.parentSciMeta.id], + isDocumentedByModels: [this.parentSciMeta], + resourceMap: [this.collection.packageModel.id], }); // Add it to the parent collection this.collection.add(dataONEObject); + }, + this, + ); + resolve(); + }); - // Asychronously calculate the checksum - if ( dataONEObject.get("uploadFile") && ! dataONEObject.get("checksum") ) { - dataONEObject.stopListening(dataONEObject, "checksumCalculated"); - dataONEObject.listenToOnce(dataONEObject, "checksumCalculated", dataONEObject.save); - try { - dataONEObject.calculateChecksum(); - } catch (exception) { - // TODO: Fail gracefully here for the user - } - } - - - }, this); - + queueFilesPromise.then(() => { + // Call the batch upload method + this.uploadFilesInBatch(this.collection.models, MetacatUI.appModel.get('batchSizeUpload')); + }); } },