Skip to content

Commit

Permalink
s3: support random fill in object prefix
Browse files Browse the repository at this point in the history
A sequence of three of more "%" chars in an object prefix will now get
replaced by a random hex string of the same length.
  • Loading branch information
breuner committed Mar 13, 2024
1 parent 199e1af commit 3b9f291
Show file tree
Hide file tree
Showing 7 changed files with 89 additions and 4 deletions.
9 changes: 9 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,14 @@
# Changelog of elbencho

## v3.0.6 (work in progress)

### New Features & Enancements
* Added new multi-arch (ARM64 & x86_64) docker container with CUDA/GDS support.
* If an S3 object prefix contains a sequence of three or more '%' chars, this sequence will now get replaced by a random hex string of the same length.

### Contributors
* Thanks to Phil Canman, Erez Binia and Ohad Shamir for helpful comments and suggestions.

## v3.0.5 (Jan 05, 2024)

### New Features & Enhancements
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
EXE_NAME ?= elbencho
EXE_VER_MAJOR ?= 3
EXE_VER_MINOR ?= 0
EXE_VER_PATCHLEVEL ?= 5
EXE_VER_PATCHLEVEL ?= 6
EXE_VERSION ?= $(EXE_VER_MAJOR).$(EXE_VER_MINOR)-$(EXE_VER_PATCHLEVEL)
EXE ?= $(BIN_PATH)/$(EXE_NAME)
EXE_UNSTRIPPED ?= $(EXE)-unstripped
Expand Down
8 changes: 7 additions & 1 deletion source/ProgArgs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -475,7 +475,8 @@ void ProgArgs::defineAllowedArgs()
"Don't check for S3 multi-part uploads exceeding 10,000 parts.")
/*s3o*/ (ARG_S3OBJECTPREFIX_LONG, bpo::value(&this->s3ObjectPrefix),
"S3 object prefix. This will be prepended to all object names when the benchmark path "
"is a bucket.")
"is a bucket. (A sequence of 3 to 16 \"" RAND_PREFIX_MARKS_SUBSTR "\" chars will be "
"replaced by a random hex string of the same length.)")
/*s3r*/ (ARG_S3RANDOBJ_LONG, bpo::bool_switch(&this->useS3RandObjSelect),
"Read at random offsets and randomly select a new object for each S3 block read. Only "
"effective in read phase and in combination with \"-" ARG_NUMDIRS_SHORT "\" & \"-"
Expand Down Expand Up @@ -685,6 +686,7 @@ void ProgArgs::defineDefaults()
this->madviseFlags = 0;
this->runS3MultiDelObjNum = 0;
this->disablePathBracketsExpansion = false;
this->useS3ObjectPrefixRand = false;
}

/**
Expand Down Expand Up @@ -748,6 +750,7 @@ void ProgArgs::initImplicitValues()
blockVariancePercent = 0;
}

useS3ObjectPrefixRand = (s3ObjectPrefix.find(RAND_PREFIX_MARKS_SUBSTR) != std::string::npos);
}

/**
Expand Down Expand Up @@ -2958,6 +2961,9 @@ void ProgArgs::setFromPropertyTreeForService(bpt::ptree& tree)
gpuIDsStr = gpuIDsServiceOverride;

parseGPUIDs();

// init implict vals
useS3ObjectPrefixRand = (s3ObjectPrefix.find(RAND_PREFIX_MARKS_SUBSTR) != std::string::npos);
}

/**
Expand Down
8 changes: 7 additions & 1 deletion source/ProgArgs.h
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,10 @@ namespace bpt = boost::property_tree;
#define ARG_MADVISE_FLAG_NOHUGEPAGE_NAME "nohugepage"


#define RAND_PREFIX_MARK_CHAR '%' // name prefix char to replace with random value
#define RAND_PREFIX_MARKS_SUBSTR "%%%" // three times RAND_PREFIX_MARK_CHAR


typedef std::vector<CuFileHandleData> CuFileHandleDataVec;
typedef std::vector<CuFileHandleData*> CuFileHandleDataPtrVec;

Expand Down Expand Up @@ -372,6 +376,7 @@ class ProgArgs
unsigned short s3LogLevel; // log level for AWS SDK
bool noDirectIOCheck; // ignore directIO alignment and sanity checks
std::string s3ObjectPrefix; // object name/path prefix for s3 "directory mode"
bool useS3ObjectPrefixRand; // implicit based on RAND_PREFIX_MARKS_SUBSTR in s3ObjectPrefix
uint64_t runS3ListObjNum; // run seq list objects phase if >0, given number is listing limit
bool runS3ListObjParallel; // multi-threaded object listing (requires "-n" / "-N")
bool doS3ListObjVerify; // verify object listing (requires "-n" / "-N")
Expand Down Expand Up @@ -559,7 +564,8 @@ class ProgArgs
bool getUseS3TransferManager() const { return useS3TransferManager; }
unsigned short getS3LogLevel() const { return s3LogLevel; }
bool getNoDirectIOCheck() const { return noDirectIOCheck; }
std::string getS3ObjectPrefix() const { return s3ObjectPrefix; }
const std::string& getS3ObjectPrefix() const { return s3ObjectPrefix; }
bool getUseS3ObjectPrefixRand() const { return useS3ObjectPrefixRand; }
uint64_t getS3ListObjNum() const { return runS3ListObjNum; }
bool getRunListObjPhase() const { return (runS3ListObjNum > 0); }
bool getRunListObjParallelPhase() const { return runS3ListObjParallel; }
Expand Down
5 changes: 5 additions & 0 deletions source/toolkits/random/RandAlgoGoldenPrime.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@ class RandAlgoGoldenPrime : public RandAlgoInterface
state = stateSeeder.next();
}

RandAlgoGoldenPrime(uint64_t seed)
{
state = seed;
}

virtual ~RandAlgoGoldenPrime() {}

private:
Expand Down
59 changes: 58 additions & 1 deletion source/workers/LocalWorker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@
#define NETBENCH_CONNECT_TIMEOUT_SEC 20 // max time for servers to wait and clients to retry
#define NETBENCH_RECEIVE_TIMEOUT_SEC 20 // max time to wait for incoming data on client & server
#define NETBENCH_SHORT_POLL_TIMEOUT_SEC 2 // time to check for interrupts in longer poll wait loops
#define HEX_ALPHABET "0123456789ABCDEF"
#define HEX_ALPHABET_LEN (sizeof(HEX_ALPHABET) - 1)


#ifdef S3_SUPPORT
Expand Down Expand Up @@ -3367,6 +3369,7 @@ void LocalWorker::s3ModeIterateObjects()
all workers use the dirs of worker rank 0 */
const bool useTransMan = progArgs->getUseS3TransferManager();
std::string objectPrefix = progArgs->getS3ObjectPrefix();
const bool objectPrefixRand = progArgs->getUseS3ObjectPrefixRand();
const BenchPhase globalBenchPhase = workersSharedData->currentBenchPhase;
const size_t localWorkerRank = workerRank - progArgs->getRankOffset();
const bool isRWMixedReader = ( (globalBenchPhase == BenchPhase_CREATEFILES) &&
Expand Down Expand Up @@ -3405,9 +3408,14 @@ void LocalWorker::s3ModeIterateObjects()
"dirIndex: " + std::to_string(dirIndex) + "; "
"fileIndex: " + std::to_string(fileIndex) );

if(objectPrefixRand)
objectPrefix = getS3RandObjectPrefix(
workerRank, dirIndex, fileIndex, progArgs->getS3ObjectPrefix() );

unsigned bucketIndex = (workerRank + dirIndex) % bucketVec.size();
std::string currentObjectPath = objectPrefix + currentPath.data();


rwOffsetGen->reset(); // reset for next file

std::chrono::steady_clock::time_point ioStartT = std::chrono::steady_clock::now();
Expand Down Expand Up @@ -3484,6 +3492,7 @@ void LocalWorker::s3ModeIterateObjectsRand()
const size_t workerDirRank = progArgs->getDoDirSharing() ? 0 : workerRank; /* for dir sharing,
all workers use the dirs of worker rank 0 */
std::string objectPrefix = progArgs->getS3ObjectPrefix();
const bool objectPrefixRand = progArgs->getUseS3ObjectPrefixRand();

// init random generators for dir & file index selection

Expand Down Expand Up @@ -3534,6 +3543,10 @@ void LocalWorker::s3ModeIterateObjectsRand()
"dirIndex: " + std::to_string(dirIndex) + "; "
"fileIndex: " + std::to_string(fileIndex) );

if(objectPrefixRand)
objectPrefix = getS3RandObjectPrefix(
workerRank, dirIndex, fileIndex, progArgs->getS3ObjectPrefix() );

const unsigned bucketIndex = (workerRank + dirIndex) % bucketVec.size();
std::string currentObjectPath = objectPrefix + currentPath.data();

Expand Down Expand Up @@ -4535,6 +4548,7 @@ void LocalWorker::s3ModeListObjParallel()
const size_t workerDirRank = progArgs->getDoDirSharing() ? 0 : workerRank; /* for dir sharing,
all workers use the dirs of worker rank 0 */
std::string objectPrefix = progArgs->getS3ObjectPrefix();
const bool objectPrefixRand = progArgs->getUseS3ObjectPrefixRand();
const bool doListObjVerify = progArgs->getDoListObjVerify();


Expand Down Expand Up @@ -4586,6 +4600,10 @@ void LocalWorker::s3ModeListObjParallel()
"dirIndex: " + std::to_string(dirIndex) + "; "
"fileIndex: " + std::to_string(fileIndex) );

if(objectPrefixRand)
objectPrefix = getS3RandObjectPrefix(
workerRank, dirIndex, fileIndex, progArgs->getS3ObjectPrefix() );

std::string currentObjectPath = objectPrefix + currentPath.data();

expectedObjs.insert(currentObjectPath);
Expand Down Expand Up @@ -4711,7 +4729,7 @@ void LocalWorker::s3ModeVerifyListing(StringSet& expectedSet, StringList& receiv
}

/**
* List objects and add multi-delete them in given buckets with user-defined limit for number of
* List objects and multi-delete them in given buckets with user-defined limit for number of
* entries.
*
* @throw WorkerException on error.
Expand Down Expand Up @@ -4840,6 +4858,45 @@ bool LocalWorker::getS3ModeDoReverseSeqFallback()
return false;
}

/**
* In S3 mode, replace any sequence of at least 3 consecutive RAND_PREFIX_MARK_CHAR chars with a
* random uppercase hex string based on worker rank, dir index and file index. It's based on these
* so that we can calculate the same random values again later to find the files.
*
* Note: It's a good idea to check progArgs->getUseS3ObjectPrefixRand() to avoid calling this
* unnecessairly.
*
* @objectPrefix string in which to repace the consecutive occurences of RAND_PREFIX_MARK_CHAR.
* @return objectPrefix with replaced RAND_PREFIX_MARK_CHAR chars.
*/
std::string LocalWorker::getS3RandObjectPrefix(size_t workerRank, size_t dirIdx,
size_t fileIdx, const std::string& objectPrefix)
{
size_t threeMarksPos = objectPrefix.find(RAND_PREFIX_MARKS_SUBSTR);

if(threeMarksPos == std::string::npos)
return objectPrefix; // not found, so nothing to replace here

std::string randObjectPrefix(objectPrefix); // the copy to replace chars

// we don't want any zero-based to turn result to all-zero (e.g. "-n 0" would always be 0)
workerRank++;
dirIdx++;
fileIdx++;

uint64_t randomNum = RandAlgoGoldenPrime(workerRank * dirIdx * fileIdx).next();

for(size_t i = threeMarksPos;
(i < objectPrefix.size() ) && (objectPrefix[i] == RAND_PREFIX_MARK_CHAR);
i++)
{
randObjectPrefix[i] = ( (char*)HEX_ALPHABET)[randomNum % HEX_ALPHABET_LEN];

randomNum /= HEX_ALPHABET_LEN;
}

return randObjectPrefix;
}

/**
* Return appropriate file open flags for the current benchmark phase in dir mode.
Expand Down
2 changes: 2 additions & 0 deletions source/workers/LocalWorker.h
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,8 @@ class LocalWorker : public Worker
std::string bucketName, std::string listPrefix);
void s3ModeListAndMultiDeleteObjects();
bool getS3ModeDoReverseSeqFallback();
std::string getS3RandObjectPrefix(size_t workerRank, size_t dirIdx, size_t fileIdx,
const std::string& objectPrefix);

void hdfsDirModeIterateDirs();
void hdfsDirModeIterateFiles();
Expand Down

0 comments on commit 3b9f291

Please sign in to comment.