Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

compare-review - added helper script and support for snappy #125

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions performance/compression-review/compression-review.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import lzma
import zstandard as zstd
import zlib
import snappy


def createDictionary(appConfig, databaseName, collectionName):
Expand Down Expand Up @@ -50,8 +51,8 @@ def getData(appConfig):
logFileHandle.write("\n")

# output header to csv
logFileHandle.write("{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{}\n".format('dbName','collName','numDocs','avgDocSize','sizeGB','storageGB','compRatio','minSample','maxSample','avgSample','minComp','maxComp','avgComp','compRatio','exceptions','compTime(ms)'))

logFileHandle.write("{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{}\n".format('dbName','collName','numDocs','avgDocSize','sizeGB','storageGB','compRatio','minSample','maxSample','avgSample','minComp','maxComp','avgComp','lz4Ratio','exceptions','compTime(ms)'))
# get databases - filter out admin, config, local, and system
dbDict = client.admin.command("listDatabases",nameOnly=True,filter={"name":{"$nin":['admin','config','local','system']}})['databases']
for thisDb in dbDict:
Expand Down Expand Up @@ -134,6 +135,8 @@ def getData(appConfig):
compressed = lzma.compress(docAsString.encode(),format=lzma.FORMAT_XZ,preset=0)
elif compressor == 'zlib-1':
compressed = zlib.compress(docAsString.encode(),level=1)
elif compressor == 'snappy':
compressed = snappy.compress(docAsString.encode())
else:
print('Unknown compressor | {}'.format('compressor'))
sys.exit(1)
Expand Down Expand Up @@ -198,7 +201,7 @@ def main():

parser.add_argument('--compressor',
required=False,
choices=['lz4-fast','lz4-high','lz4-fast-dict','lz4-high-dict','zstd-1','zstd-5','zstd-1-dict','zstd-5-dict','bz2-1','lzma-0','zlib-1'],
choices=['lz4-fast','lz4-high','lz4-fast-dict','lz4-high-dict','zstd-1','zstd-5','zstd-1-dict','zstd-5-dict','bz2-1','lzma-0','zlib-1', 'snappy'],
type=str,
default='lz4-fast',
help='Compressor')
Expand Down Expand Up @@ -235,3 +238,4 @@ def main():

if __name__ == "__main__":
main()

26 changes: 26 additions & 0 deletions performance/compression-review/gather-stats.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#!/bin/bash
export MONGODB_URI='mongodb://.../?tls=true&tlsCAFile=global-bundle.pem&retryWrites=false'
export SAMPLE_SIZE=1000
export DICTIONARY_SAMPLE_SIZE=100
export FILENAME=output.log
declare -a COMPRESSION_ALGOS=("lz4-fast" "lz4-fast-dict" "zstd-1" "zstd-1-dict" "lz4-high" "lz4-high-dict" "zstd-5" "zstd-5-dict" "snappy")

# clean up previously download files
rm -f *.csv output.log
# download the global bundle ca file
if [ ! -f "global-bundle.pem" ]
then
wget https://truststore.pki.rds.amazonaws.com/global/global-bundle.pem
fi
# create header entry
echo "algo,dbName,collName,numDocs,avgDocSize,sizeGB,storageGB,compRatio,minSample,maxSample,avgSample,minComp,maxComp,avgComp,lz4Ratio,exceptions,compTime(ms)
" > $FILENAME
# loop through the compression algorithms
for COMPRESSION_ALGO in "${COMPRESSION_ALGOS[@]}"
do
python compression-review.py --uri=$MONGODB_URI --server-alias docdb-test --sample-size $SAMPLE_SIZE --dictionary-sample-size $DICTIONARY_SAMPLE_SIZE --compressor $COMPRESSION_ALGO
mv docdb-test-*-compression-review.csv ${COMPRESSION_ALGO}-docdb-test-compression-review.csv
cat ${COMPRESSION_ALGO}-docdb-test-compression-review.csv | tail -n +5 | awk "\$0=\"${COMPRESSION_ALGO},\"\$0" >> $FILENAME
done


2 changes: 2 additions & 0 deletions performance/compression-review/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
pymongo
lz4
zstandard
python-snappy