Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/improvement/CLDSRV-402-optimize-…
Browse files Browse the repository at this point in the history
…multiobjectdelete-api' into w/8.6/improvement/CLDSRV-402-optimize-multiobjectdelete-api
  • Loading branch information
williamlardier committed Jul 13, 2023
2 parents c5b1ef6 + af0436f commit c2df0bd
Show file tree
Hide file tree
Showing 14 changed files with 687 additions and 152 deletions.
1 change: 1 addition & 0 deletions constants.js
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,7 @@ const constants = {
NON_CURRENT_TYPE: 'noncurrent',
ORPHAN_DM_TYPE: 'orphan',
},
multiObjectDeleteConcurrency: 50,
};

module.exports = constants;
13 changes: 12 additions & 1 deletion lib/Config.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ const { azureAccountNameRegex, base64Regex,
allowedUtapiEventFilterFields, allowedUtapiEventFilterStates,
} = require('../constants');
const { utapiVersion } = require('utapi');

const constants = require('../constants');

// config paths
const configSearchPaths = [
Expand Down Expand Up @@ -1562,6 +1562,17 @@ class Config extends EventEmitter {

// Version of the configuration we're running under
this.overlayVersion = config.overlayVersion || 0;

this.multiObjectDeleteConcurrency = constants.multiObjectDeleteConcurrency;
const extractedNumber = Number.parseInt(config.multiObjectDeleteConcurrency, 10);
if (!isNaN(extractedNumber) && extractedNumber > 0 && extractedNumber < 1000) {
this.multiObjectDeleteConcurrency = extractedNumber;
}

this.multiObjectDeleteEnableOptimizations = true;
if (config.multiObjectDeleteEnableOptimizations === false) {
this.multiObjectDeleteEnableOptimizations = false;
}
}

_getAuthData() {
Expand Down
18 changes: 18 additions & 0 deletions lib/api/apiUtils/object/deleteObject.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
/**
* _bucketRequiresOplogUpdate - DELETE an object from a bucket
* @param {BucketInfo} bucket - bucket object
* @return {boolean} whether objects require oplog updates on deletion, or not
*/
function _bucketRequiresOplogUpdate(bucket) {
// Default behavior is to require an oplog update
if (!bucket || !bucket.getLifecycleConfiguration || !bucket.getNotificationConfiguration) {
return true;
}
// If the bucket has lifecycle configuration or notification configuration
// set, we also require an oplog update
return bucket.getLifecycleConfiguration() || bucket.getNotificationConfiguration();
}

module.exports = {
_bucketRequiresOplogUpdate,
};
346 changes: 219 additions & 127 deletions lib/api/multiObjectDelete.js

Large diffs are not rendered by default.

7 changes: 6 additions & 1 deletion lib/api/objectDelete.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ const { hasGovernanceBypassHeader, checkUserGovernanceBypass, ObjectLockInfo }
= require('./apiUtils/object/objectLockHelpers');
const { isRequesterNonAccountUser } = require('./apiUtils/authorization/permissionChecks');
const { config } = require('../Config');
const { _bucketRequiresOplogUpdate } = require('./apiUtils/object/deleteObject');

const versionIdUtils = versioning.VersionID;
const objectLockedError = new Error('object locked');
Expand Down Expand Up @@ -191,8 +192,12 @@ function objectDelete(authInfo, request, log, cb) {
delOptions.replayId = objectMD.uploadId;
}

if (!_bucketRequiresOplogUpdate(bucketMD)) {
delOptions.doesNotNeedOpogUpdate = true;
}

return services.deleteObject(bucketName, objectMD, objectKey,
delOptions, log, (err, delResult) => next(err, bucketMD,
delOptions, false, log, (err, delResult) => next(err, bucketMD,
objectMD, delResult, deleteInfo));
}
// putting a new delete marker
Expand Down
42 changes: 41 additions & 1 deletion lib/metadata/metadataUtils.js
Original file line number Diff line number Diff line change
Expand Up @@ -60,13 +60,18 @@ function getNullVersionFromMaster(bucketName, objectKey, log, cb) {
* @param {string} bucketName - name of bucket
* @param {string} objectKey - name of object key
* @param {string} [versionId] - version of object to retrieve
* @param {object} cachedDocuments - cached version of the documents used for
* abstraction purposes
* @param {RequestLogger} log - request logger
* @param {function} cb - callback
* @return {undefined} - and call callback with err, bucket md and object md
*/
function metadataGetObject(bucketName, objectKey, versionId, log, cb) {
function metadataGetObject(bucketName, objectKey, versionId, cachedDocuments, log, cb) {
// versionId may be 'null', which asks metadata to fetch the null key specifically
const options = { versionId, getDeleteMarker: true };
if (cachedDocuments && cachedDocuments[objectKey]) {
return cb(null, cachedDocuments[objectKey]);
}
return metadata.getObjectMD(bucketName, objectKey, options, log,
(err, objMD) => {
if (err) {
Expand All @@ -84,6 +89,40 @@ function metadataGetObject(bucketName, objectKey, versionId, log, cb) {
});
}

/** metadataGetObjects - retrieves specified object or version from metadata. This
* method uses cursors, hence is only compatible with a MongoDB DB backend.
* @param {string} bucketName - name of bucket
* @param {string} objectsKeys - name of object key
* @param {RequestLogger} log - request logger
* @param {function} cb - callback
* @return {undefined} - and call callback with err, bucket md and object md
*/
function metadataGetObjects(bucketName, objectsKeys, log, cb) {
const options = { getDeleteMarker: true };
const objects = objectsKeys.map(objectKey => ({
key: objectKey ? objectKey.inPlay.key : null,
params: options,
versionId: objectKey ? objectKey.versionId : null,
}));

// Returned objects are following the following format: { key, doc, versionId }
// That is required with batching to properly map the objects
return metadata.getObjectsMD(bucketName, objects, log, (err, objMds) => {
if (err) {
log.debug('error getting objects MD from metadata', { error: err });
return cb(err);
}

const result = {};
objMds.forEach(objMd => {
if (objMd.doc) {
result[`${objMd.doc.key}${objMd.versionId}`] = objMd.doc;
}
});

return cb(null, result);
});
}
/**
* Validate that a bucket is accessible and authorized to the user,
* return a specific error code otherwise
Expand Down Expand Up @@ -215,6 +254,7 @@ function metadataValidateBucket(params, log, callback) {
module.exports = {
validateBucket,
metadataGetObject,
metadataGetObjects,
metadataValidateBucketAndObj,
metadataValidateBucket,
};
4 changes: 2 additions & 2 deletions lib/routes/routeBackbeat.js
Original file line number Diff line number Diff line change
Expand Up @@ -937,7 +937,7 @@ function putObjectTagging(request, response, log, callback) {
// retrieve it from metadata here.
if (dataStoreVersionId === '') {
return metadataGetObject(sourceBucket, request.objectKey,
sourceVersionId, log, (err, objMD) => {
sourceVersionId, null, log, (err, objMD) => {
if (err) {
return callback(err);
}
Expand Down Expand Up @@ -969,7 +969,7 @@ function deleteObjectTagging(request, response, log, callback) {
// retrieve it from metadata here.
if (dataStoreVersionId === '') {
return metadataGetObject(sourceBucket, request.objectKey,
sourceVersionId, log, (err, objMD) => {
sourceVersionId, null, log, (err, objMD) => {
if (err) {
return callback(err);
}
Expand Down
17 changes: 13 additions & 4 deletions lib/services.js
Original file line number Diff line number Diff line change
Expand Up @@ -306,11 +306,13 @@ const services = {
* @param {string} objectKey - object key name
* @param {object} options - other instructions, such as { versionId } to
* delete a specific version of the object
* @param {boolean} deferLocationDeletion - true if the object should not
* be removed from the storage, but be returned instead.
* @param {Log} log - logger instance
* @param {function} cb - callback from async.waterfall in objectGet
* @return {undefined}
*/
deleteObject(bucketName, objectMD, objectKey, options, log, cb) {
deleteObject(bucketName, objectMD, objectKey, options, deferLocationDeletion, log, cb) {
log.trace('deleting object from bucket');
assert.strictEqual(typeof bucketName, 'string');
assert.strictEqual(typeof objectMD, 'object');
Expand All @@ -327,12 +329,19 @@ const services = {
log.getSerializedUids());
if (objectMD.location === null) {
return cb(null, res);
} else if (!Array.isArray(objectMD.location)) {
}

if (deferLocationDeletion) {
return cb(null, Array.isArray(objectMD.location)
? objectMD.location : [objectMD.location]);
}

if (!Array.isArray(objectMD.location)) {
data.delete(objectMD.location, deleteLog);
return cb(null, res);
}
return data.batchDelete(objectMD.location, null, null,
deleteLog, err => {

return data.batchDelete(objectMD.location, null, null, deleteLog, err => {
if (err) {
return cb(err);
}
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
"dependencies": {
"@azure/storage-blob": "^12.12.0",
"@hapi/joi": "^17.1.0",
"arsenal": "git+https://github.com/scality/arsenal#8.1.98",
"arsenal": "git+https://github.com/scality/arsenal#8.1.104",
"async": "~2.5.0",
"aws-sdk": "2.905.0",
"bucketclient": "scality/bucketclient#8.1.9",
Expand Down
147 changes: 145 additions & 2 deletions tests/unit/api/multiObjectDelete.js
Original file line number Diff line number Diff line change
@@ -1,17 +1,19 @@
const assert = require('assert');
const { errors, storage } = require('arsenal');

const { getObjMetadataAndDelete }
const { decodeObjectVersion, getObjMetadataAndDelete, initializeMultiObjectDeleteWithBatchingSupport }
= require('../../../lib/api/multiObjectDelete');
const multiObjectDelete = require('../../../lib/api/multiObjectDelete');
const { cleanup, DummyRequestLogger, makeAuthInfo } = require('../helpers');
const DummyRequest = require('../DummyRequest');
const { bucketPut } = require('../../../lib/api/bucketPut');
const objectPut = require('../../../lib/api/objectPut');
const log = new DummyRequestLogger();

const { metadata } = storage.metadata.inMemory.metadata;
const { ds } = storage.data.inMemory.datastore;
const sinon = require('sinon');

const log = new DummyRequestLogger();
const canonicalID = 'accessKey1';
const authInfo = makeAuthInfo(canonicalID);
const namespace = 'default';
Expand All @@ -20,6 +22,8 @@ const postBody = Buffer.from('I am a body', 'utf8');
const contentLength = 2 * postBody.length;
const objectKey1 = 'objectName1';
const objectKey2 = 'objectName2';
const metadataUtils = require('../../../lib/metadata/metadataUtils');
const services = require('../../../lib/services');
const testBucketPutRequest = new DummyRequest({
bucketName,
namespace,
Expand Down Expand Up @@ -69,6 +73,10 @@ describe('getObjMetadataAndDelete function for multiObjectDelete', () => {
});
});

afterEach(() => {
sinon.restore();
});

it('should successfully get object metadata and then ' +
'delete metadata and data', done => {
getObjMetadataAndDelete(authInfo, 'foo', request, bucketName, bucket,
Expand Down Expand Up @@ -185,4 +193,139 @@ describe('getObjMetadataAndDelete function for multiObjectDelete', () => {
done();
});
});

it('should properly batch delete data even if there are errors in other objects', done => {
const deleteObjectStub = sinon.stub(services, 'deleteObject');
deleteObjectStub.onCall(0).callsArgWith(6, errors.InternalError);
deleteObjectStub.onCall(1).callsArgWith(6, null);

getObjMetadataAndDelete(authInfo, 'foo', request, bucketName, bucket,
true, [], [{ key: objectKey1 }, { key: objectKey2 }], log,
(err, quietSetting, errorResults, numOfObjects,
successfullyDeleted, totalContentLengthDeleted) => {
assert.ifError(err);
assert.strictEqual(quietSetting, true);
assert.deepStrictEqual(errorResults, [
{
entry: {
key: objectKey1,
},
error: errors.InternalError,
},
]);
assert.strictEqual(numOfObjects, 1);
assert.strictEqual(totalContentLengthDeleted, contentLength / 2);
// Expect still in memory as we stubbed the function
assert.strictEqual(metadata.keyMaps.get(bucketName).has(objectKey1), true);
assert.strictEqual(metadata.keyMaps.get(bucketName).has(objectKey2), true);
// ensure object 2 only is in the list of successful deletions
assert.strictEqual(successfullyDeleted.length, 1);
assert.deepStrictEqual(successfullyDeleted[0].entry.key, objectKey2);
return done();
});
});
});

describe('initializeMultiObjectDeleteWithBatchingSupport', () => {
let bucketName;
let inPlay;
let log;
let callback;

beforeEach(() => {
bucketName = 'myBucket';
inPlay = { one: 'object1', two: 'object2' };
log = {};
callback = sinon.spy();
});

afterEach(() => {
sinon.restore();
});

it('should not throw if the decodeObjectVersion function fails', done => {
const metadataGetObjectsStub = sinon.stub(metadataUtils, 'metadataGetObjects').yields(null, {});
sinon.stub(multiObjectDelete, 'decodeObjectVersion').returns([new Error('decode error')]);

initializeMultiObjectDeleteWithBatchingSupport(bucketName, inPlay, log, callback);

assert.strictEqual(metadataGetObjectsStub.callCount, 1);
sinon.assert.calledOnce(callback);
assert.strictEqual(callback.getCall(0).args[0], null);
assert.deepStrictEqual(callback.getCall(0).args[1], {});
done();
});

it('should call the batching method if the backend supports it', done => {
const metadataGetObjectsStub = sinon.stub(metadataUtils, 'metadataGetObjects').yields(null, {});
const objectVersion = 'someVersionId';
sinon.stub(multiObjectDelete, 'decodeObjectVersion').returns([null, objectVersion]);

initializeMultiObjectDeleteWithBatchingSupport(bucketName, inPlay, log, callback);

assert.strictEqual(metadataGetObjectsStub.callCount, 1);
sinon.assert.calledOnce(callback);
assert.strictEqual(callback.getCall(0).args[0], null);
done();
});

it('should not return an error if the metadataGetObjects function fails', done => {
const metadataGetObjectsStub =
sinon.stub(metadataUtils, 'metadataGetObjects').yields(new Error('metadata error'), null);
const objectVersion = 'someVersionId';
sinon.stub(multiObjectDelete, 'decodeObjectVersion').returns([null, objectVersion]);

initializeMultiObjectDeleteWithBatchingSupport(bucketName, inPlay, log, callback);

assert.strictEqual(metadataGetObjectsStub.callCount, 1);
sinon.assert.calledOnce(callback);
assert.strictEqual(callback.getCall(0).args[0] instanceof Error, false);
assert.deepStrictEqual(callback.getCall(0).args[1], {});
done();
});

it('should populate the cache when the backend supports it', done => {
const expectedOutput = {
one: {
value: 'object1',
},
two: {
value: 'object2',
},
};
const metadataGetObjectsStub = sinon.stub(metadataUtils, 'metadataGetObjects').yields(null, expectedOutput);
const objectVersion = 'someVersionId';
sinon.stub(multiObjectDelete, 'decodeObjectVersion').returns([null, objectVersion]);

initializeMultiObjectDeleteWithBatchingSupport(bucketName, inPlay, log, callback);

assert.strictEqual(metadataGetObjectsStub.callCount, 1);
sinon.assert.calledOnce(callback);
assert.strictEqual(callback.getCall(0).args[0], null);
assert.deepStrictEqual(callback.getCall(0).args[1], expectedOutput);
done();
});
});

describe('decodeObjectVersion function helper', () => {
it('should throw error for invalid version IDs', () => {
const ret = decodeObjectVersion({
versionId: '\0',
});
assert(ret[0].is.NoSuchVersion);
});

it('should return "null" for null versionId', () => {
const ret = decodeObjectVersion({
versionId: 'null',
});
assert.strictEqual(ret[0], null);
assert.strictEqual(ret[1], 'null');
});

it('should return null error on success', () => {
const ret = decodeObjectVersion({});
assert.ifError(ret[0]);
assert.deepStrictEqual(ret[1], undefined);
});
});
Loading

0 comments on commit c2df0bd

Please sign in to comment.