From 3b9f291abb49697aab61a594acd5696dcda05564 Mon Sep 17 00:00:00 2001 From: Sven Breuner Date: Wed, 13 Mar 2024 22:50:00 +0100 Subject: [PATCH] s3: support random fill in object prefix A sequence of three of more "%" chars in an object prefix will now get replaced by a random hex string of the same length. --- CHANGELOG.md | 9 +++ Makefile | 2 +- source/ProgArgs.cpp | 8 ++- source/ProgArgs.h | 8 ++- source/toolkits/random/RandAlgoGoldenPrime.h | 5 ++ source/workers/LocalWorker.cpp | 59 +++++++++++++++++++- source/workers/LocalWorker.h | 2 + 7 files changed, 89 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 16bafe5..81fa2c2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,14 @@ # Changelog of elbencho +## v3.0.6 (work in progress) + +### New Features & Enancements +* Added new multi-arch (ARM64 & x86_64) docker container with CUDA/GDS support. +* If an S3 object prefix contains a sequence of three or more '%' chars, this sequence will now get replaced by a random hex string of the same length. + +### Contributors +* Thanks to Phil Canman, Erez Binia and Ohad Shamir for helpful comments and suggestions. + ## v3.0.5 (Jan 05, 2024) ### New Features & Enhancements diff --git a/Makefile b/Makefile index c303e5e..70c00fa 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ EXE_NAME ?= elbencho EXE_VER_MAJOR ?= 3 EXE_VER_MINOR ?= 0 -EXE_VER_PATCHLEVEL ?= 5 +EXE_VER_PATCHLEVEL ?= 6 EXE_VERSION ?= $(EXE_VER_MAJOR).$(EXE_VER_MINOR)-$(EXE_VER_PATCHLEVEL) EXE ?= $(BIN_PATH)/$(EXE_NAME) EXE_UNSTRIPPED ?= $(EXE)-unstripped diff --git a/source/ProgArgs.cpp b/source/ProgArgs.cpp index 6693f03..a6820bd 100644 --- a/source/ProgArgs.cpp +++ b/source/ProgArgs.cpp @@ -475,7 +475,8 @@ void ProgArgs::defineAllowedArgs() "Don't check for S3 multi-part uploads exceeding 10,000 parts.") /*s3o*/ (ARG_S3OBJECTPREFIX_LONG, bpo::value(&this->s3ObjectPrefix), "S3 object prefix. This will be prepended to all object names when the benchmark path " - "is a bucket.") + "is a bucket. (A sequence of 3 to 16 \"" RAND_PREFIX_MARKS_SUBSTR "\" chars will be " + "replaced by a random hex string of the same length.)") /*s3r*/ (ARG_S3RANDOBJ_LONG, bpo::bool_switch(&this->useS3RandObjSelect), "Read at random offsets and randomly select a new object for each S3 block read. Only " "effective in read phase and in combination with \"-" ARG_NUMDIRS_SHORT "\" & \"-" @@ -685,6 +686,7 @@ void ProgArgs::defineDefaults() this->madviseFlags = 0; this->runS3MultiDelObjNum = 0; this->disablePathBracketsExpansion = false; + this->useS3ObjectPrefixRand = false; } /** @@ -748,6 +750,7 @@ void ProgArgs::initImplicitValues() blockVariancePercent = 0; } + useS3ObjectPrefixRand = (s3ObjectPrefix.find(RAND_PREFIX_MARKS_SUBSTR) != std::string::npos); } /** @@ -2958,6 +2961,9 @@ void ProgArgs::setFromPropertyTreeForService(bpt::ptree& tree) gpuIDsStr = gpuIDsServiceOverride; parseGPUIDs(); + + // init implict vals + useS3ObjectPrefixRand = (s3ObjectPrefix.find(RAND_PREFIX_MARKS_SUBSTR) != std::string::npos); } /** diff --git a/source/ProgArgs.h b/source/ProgArgs.h index 7ddd804..3e60f4b 100644 --- a/source/ProgArgs.h +++ b/source/ProgArgs.h @@ -204,6 +204,10 @@ namespace bpt = boost::property_tree; #define ARG_MADVISE_FLAG_NOHUGEPAGE_NAME "nohugepage" +#define RAND_PREFIX_MARK_CHAR '%' // name prefix char to replace with random value +#define RAND_PREFIX_MARKS_SUBSTR "%%%" // three times RAND_PREFIX_MARK_CHAR + + typedef std::vector CuFileHandleDataVec; typedef std::vector CuFileHandleDataPtrVec; @@ -372,6 +376,7 @@ class ProgArgs unsigned short s3LogLevel; // log level for AWS SDK bool noDirectIOCheck; // ignore directIO alignment and sanity checks std::string s3ObjectPrefix; // object name/path prefix for s3 "directory mode" + bool useS3ObjectPrefixRand; // implicit based on RAND_PREFIX_MARKS_SUBSTR in s3ObjectPrefix uint64_t runS3ListObjNum; // run seq list objects phase if >0, given number is listing limit bool runS3ListObjParallel; // multi-threaded object listing (requires "-n" / "-N") bool doS3ListObjVerify; // verify object listing (requires "-n" / "-N") @@ -559,7 +564,8 @@ class ProgArgs bool getUseS3TransferManager() const { return useS3TransferManager; } unsigned short getS3LogLevel() const { return s3LogLevel; } bool getNoDirectIOCheck() const { return noDirectIOCheck; } - std::string getS3ObjectPrefix() const { return s3ObjectPrefix; } + const std::string& getS3ObjectPrefix() const { return s3ObjectPrefix; } + bool getUseS3ObjectPrefixRand() const { return useS3ObjectPrefixRand; } uint64_t getS3ListObjNum() const { return runS3ListObjNum; } bool getRunListObjPhase() const { return (runS3ListObjNum > 0); } bool getRunListObjParallelPhase() const { return runS3ListObjParallel; } diff --git a/source/toolkits/random/RandAlgoGoldenPrime.h b/source/toolkits/random/RandAlgoGoldenPrime.h index debed15..ef4dc3b 100644 --- a/source/toolkits/random/RandAlgoGoldenPrime.h +++ b/source/toolkits/random/RandAlgoGoldenPrime.h @@ -25,6 +25,11 @@ class RandAlgoGoldenPrime : public RandAlgoInterface state = stateSeeder.next(); } + RandAlgoGoldenPrime(uint64_t seed) + { + state = seed; + } + virtual ~RandAlgoGoldenPrime() {} private: diff --git a/source/workers/LocalWorker.cpp b/source/workers/LocalWorker.cpp index da70920..e1e0e6d 100644 --- a/source/workers/LocalWorker.cpp +++ b/source/workers/LocalWorker.cpp @@ -50,6 +50,8 @@ #define NETBENCH_CONNECT_TIMEOUT_SEC 20 // max time for servers to wait and clients to retry #define NETBENCH_RECEIVE_TIMEOUT_SEC 20 // max time to wait for incoming data on client & server #define NETBENCH_SHORT_POLL_TIMEOUT_SEC 2 // time to check for interrupts in longer poll wait loops +#define HEX_ALPHABET "0123456789ABCDEF" +#define HEX_ALPHABET_LEN (sizeof(HEX_ALPHABET) - 1) #ifdef S3_SUPPORT @@ -3367,6 +3369,7 @@ void LocalWorker::s3ModeIterateObjects() all workers use the dirs of worker rank 0 */ const bool useTransMan = progArgs->getUseS3TransferManager(); std::string objectPrefix = progArgs->getS3ObjectPrefix(); + const bool objectPrefixRand = progArgs->getUseS3ObjectPrefixRand(); const BenchPhase globalBenchPhase = workersSharedData->currentBenchPhase; const size_t localWorkerRank = workerRank - progArgs->getRankOffset(); const bool isRWMixedReader = ( (globalBenchPhase == BenchPhase_CREATEFILES) && @@ -3405,9 +3408,14 @@ void LocalWorker::s3ModeIterateObjects() "dirIndex: " + std::to_string(dirIndex) + "; " "fileIndex: " + std::to_string(fileIndex) ); + if(objectPrefixRand) + objectPrefix = getS3RandObjectPrefix( + workerRank, dirIndex, fileIndex, progArgs->getS3ObjectPrefix() ); + unsigned bucketIndex = (workerRank + dirIndex) % bucketVec.size(); std::string currentObjectPath = objectPrefix + currentPath.data(); + rwOffsetGen->reset(); // reset for next file std::chrono::steady_clock::time_point ioStartT = std::chrono::steady_clock::now(); @@ -3484,6 +3492,7 @@ void LocalWorker::s3ModeIterateObjectsRand() const size_t workerDirRank = progArgs->getDoDirSharing() ? 0 : workerRank; /* for dir sharing, all workers use the dirs of worker rank 0 */ std::string objectPrefix = progArgs->getS3ObjectPrefix(); + const bool objectPrefixRand = progArgs->getUseS3ObjectPrefixRand(); // init random generators for dir & file index selection @@ -3534,6 +3543,10 @@ void LocalWorker::s3ModeIterateObjectsRand() "dirIndex: " + std::to_string(dirIndex) + "; " "fileIndex: " + std::to_string(fileIndex) ); + if(objectPrefixRand) + objectPrefix = getS3RandObjectPrefix( + workerRank, dirIndex, fileIndex, progArgs->getS3ObjectPrefix() ); + const unsigned bucketIndex = (workerRank + dirIndex) % bucketVec.size(); std::string currentObjectPath = objectPrefix + currentPath.data(); @@ -4535,6 +4548,7 @@ void LocalWorker::s3ModeListObjParallel() const size_t workerDirRank = progArgs->getDoDirSharing() ? 0 : workerRank; /* for dir sharing, all workers use the dirs of worker rank 0 */ std::string objectPrefix = progArgs->getS3ObjectPrefix(); + const bool objectPrefixRand = progArgs->getUseS3ObjectPrefixRand(); const bool doListObjVerify = progArgs->getDoListObjVerify(); @@ -4586,6 +4600,10 @@ void LocalWorker::s3ModeListObjParallel() "dirIndex: " + std::to_string(dirIndex) + "; " "fileIndex: " + std::to_string(fileIndex) ); + if(objectPrefixRand) + objectPrefix = getS3RandObjectPrefix( + workerRank, dirIndex, fileIndex, progArgs->getS3ObjectPrefix() ); + std::string currentObjectPath = objectPrefix + currentPath.data(); expectedObjs.insert(currentObjectPath); @@ -4711,7 +4729,7 @@ void LocalWorker::s3ModeVerifyListing(StringSet& expectedSet, StringList& receiv } /** - * List objects and add multi-delete them in given buckets with user-defined limit for number of + * List objects and multi-delete them in given buckets with user-defined limit for number of * entries. * * @throw WorkerException on error. @@ -4840,6 +4858,45 @@ bool LocalWorker::getS3ModeDoReverseSeqFallback() return false; } +/** + * In S3 mode, replace any sequence of at least 3 consecutive RAND_PREFIX_MARK_CHAR chars with a + * random uppercase hex string based on worker rank, dir index and file index. It's based on these + * so that we can calculate the same random values again later to find the files. + * + * Note: It's a good idea to check progArgs->getUseS3ObjectPrefixRand() to avoid calling this + * unnecessairly. + * + * @objectPrefix string in which to repace the consecutive occurences of RAND_PREFIX_MARK_CHAR. + * @return objectPrefix with replaced RAND_PREFIX_MARK_CHAR chars. + */ +std::string LocalWorker::getS3RandObjectPrefix(size_t workerRank, size_t dirIdx, + size_t fileIdx, const std::string& objectPrefix) +{ + size_t threeMarksPos = objectPrefix.find(RAND_PREFIX_MARKS_SUBSTR); + + if(threeMarksPos == std::string::npos) + return objectPrefix; // not found, so nothing to replace here + + std::string randObjectPrefix(objectPrefix); // the copy to replace chars + + // we don't want any zero-based to turn result to all-zero (e.g. "-n 0" would always be 0) + workerRank++; + dirIdx++; + fileIdx++; + + uint64_t randomNum = RandAlgoGoldenPrime(workerRank * dirIdx * fileIdx).next(); + + for(size_t i = threeMarksPos; + (i < objectPrefix.size() ) && (objectPrefix[i] == RAND_PREFIX_MARK_CHAR); + i++) + { + randObjectPrefix[i] = ( (char*)HEX_ALPHABET)[randomNum % HEX_ALPHABET_LEN]; + + randomNum /= HEX_ALPHABET_LEN; + } + + return randObjectPrefix; +} /** * Return appropriate file open flags for the current benchmark phase in dir mode. diff --git a/source/workers/LocalWorker.h b/source/workers/LocalWorker.h index 4b13d6d..583db10 100644 --- a/source/workers/LocalWorker.h +++ b/source/workers/LocalWorker.h @@ -220,6 +220,8 @@ class LocalWorker : public Worker std::string bucketName, std::string listPrefix); void s3ModeListAndMultiDeleteObjects(); bool getS3ModeDoReverseSeqFallback(); + std::string getS3RandObjectPrefix(size_t workerRank, size_t dirIdx, size_t fileIdx, + const std::string& objectPrefix); void hdfsDirModeIterateDirs(); void hdfsDirModeIterateFiles();