Skip to content
This repository has been archived by the owner on Mar 23, 2023. It is now read-only.

[utils] add benchmark runner for YCSB #131

Open
wants to merge 1 commit into
base: stable-1.0
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions utils/parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# SPDX-License-Identifier: BSD-3-Clause
# Copyright 2017-2022, Intel Corporation

import os
from os.path import join, getsize

for root, dirs, filenames in os.walk('results'):
if len(dirs) == 0:
parsed_results = []
for filename in filenames:
if filename.split('_')[0] == 'run':
with open(root + '/' + filename) as file_object:
file_object.readline()
trimmed_lines = []
for line in file_object.readlines():
record = tuple(line.replace(',','').split(' '))
if record[0] != '[CLEANUP]' or record[0] != '[READ-FAILED]':
if record[0] == '[READ]' or record[0] == '[INSERT]' or record[0] == '[UPDATE]' or record[0] == '[OVERALL]': #in case of READ
try:
int(record[1])
except ValueError: #if cannot cast it's fine
trimmed_lines.append(record)
parsed_results.append([int(filename.split('_')[1].split('.')[0]), trimmed_lines])

parsed_results = sorted(parsed_results, key=lambda x: x[0], reverse=False)
csv = []
print root
threads = 'Threads;#;'
if len(parsed_results) <= 0:
continue
print '------CSV------'
for i in range(0, len(parsed_results[0][1])):
csv.append(parsed_results[0][1][i][0] + ';' + parsed_results[0][1][i][1] + ';')
for test_result in parsed_results:
threads += str(test_result[0]) + ';'
for i, line in enumerate(test_result[1]):
csv[i] += line[2].replace('\n','').replace('.',',') + ';'
csv.insert(0, threads)
with open(root + '/results.csv','w') as csv_file:
for x in csv:
csv_file.write(x + '\n')
print x
csv_file.close()
206 changes: 206 additions & 0 deletions utils/run_suite.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
#!/usr/bin/python2

# SPDX-License-Identifier: BSD-3-Clause
# Copyright 2017-2022, Intel Corporation

import json
import os
import subprocess

#comment
# SUITE write_workload
# THREADS 1 2 4 8 16 32 48 64 96
# JOURNALING enabled/disabled
# RECORDS 1000
# OPERATIONS 100
# READ_PROPORTION 0.0
# UPDATE_PROPORTION 0.0
# INSERT_PROPORTION 1.0
# YCSB_NUMA 1
# DROP_BEFORE
# ENDSUITE

#GET PATHS FROM CONFIG FILE
PATH_TO_YCSB = ''

path_configuration = open("path_configuration.txt", "r")
for line in path_configuration:
if line.startswith('YCSB_PATH='):
arg = line.split("=")
if len(arg) > 1:
PATH_TO_YCSB = arg[1].replace('\n','')
else:
raise NameError('No path in YCSB_PATH!')

if not os.path.isdir(PATH_TO_YCSB):
raise NameError('Wrong path to YCSB!')

class Test:
def __init__(self):
self.pmemkv_engine = "cmap"
self.pmemkv_dbsize = 0
self.pmemkv_dbpath = "/dev/shm/"
self.workload_type = "workloada"
self.testName = ""
self.threads = []
# self.journaling = ""
self.records = 0
self.operations = 0
self.read_proportion = -1.0
self.update_proportion = -1.0
self.insert_proportion = -1.0
self.ycsb_numa = -1
# Actually we don't need creation
# self.drop_before = -1
# self.create_after_drop = -1
self.is_load = -1
def toJSON(self):
return json.dumps(self, default=lambda o: o.__dict__,
sort_keys=True, indent=4)

def getArgs(str):
arguments = []
for i in range(1, len(str)):
arguments.append(str[i])
return arguments

KEYWORDS = set(["THREADS", "JOURNALING", "RECORDS", "OPERATIONS",
"READ_PROPORTION", "LOAD", "UPDATE_PROPORTION",
"INSERT_PROPORTION", "YCSB_NUMA", "SUITE", "ENDSUITE",
"DROP_BEFORE", "CREATE_AFTER_DROP", "PMEMKV_ENGINE",
"PMEMKV_DBSIZE", "PMEMKV_DBPATH", "WORKLOAD_TYPE"]) #Add keyword if you need to extend implementation

# open meta file
with open("test_suite.txt", "r") as configfile:
configurations = []
for line in configfile:
splittedLine = line.split()
if line == '\n' or line.startswith('#'):
continue
if len(set.intersection(KEYWORDS, splittedLine)) != 1:
print(splittedLine)
raise NameError('Too many keywords in single line!')

#get args if exists
args = getArgs(splittedLine)

#if line starts from keyword we must read arguments
if splittedLine[0] == "SUITE":
configurations.append(Test())
configurations[len(configurations)-1].testName = args[0]
elif splittedLine[0] == "THREADS":
configurations[len(configurations)-1].threads = args
elif splittedLine[0] == "LOAD":
configurations[len(configurations)-1].is_load = 1
elif splittedLine[0] == "RECORDS":
configurations[len(configurations)-1].records = args[0]
elif splittedLine[0] == "OPERATIONS":
configurations[len(configurations)-1].operations = args[0]
elif splittedLine[0] == "READ_PROPORTION":
configurations[len(configurations)-1].read_proportion = args[0]
elif splittedLine[0] == "UPDATE_PROPORTION":
configurations[len(configurations)-1].update_proportion = args[0]
elif splittedLine[0] == "INSERT_PROPORTION":
configurations[len(configurations)-1].insert_proportion = args[0]
elif splittedLine[0] == "YCSB_NUMA":
configurations[len(configurations)-1].ycsb_numa = args[0]
elif splittedLine[0] == "PMEMKV_ENGINE":
configurations[len(configurations)-1].pmemkv_engine = args[0]
elif splittedLine[0] == "PMEMKV_DBSIZE":
configurations[len(configurations)-1].pmemkv_dbsize = args[0]
elif splittedLine[0] == "PMEMKV_DBPATH":
configurations[len(configurations)-1].pmemkv_dbpath = args[0]
elif splittedLine[0] == "WORKLOAD_TYPE":
configurations[len(configurations)-1].workload_type = args[0]
elif splittedLine[0] == "ENDSUITE":
continue
else:
raise NameError('Unrecognized keyword')
configfile.close()

print('Script read those tests:')
i = 1
for conf in configurations:
print('{:>20} {:<12}'.format('Test#: ', str(i)))
print('{:>20} {:<12}'.format("Name: ", conf.testName))
print('{:>20} {:<12}'.format("Threads: " ,str(conf.threads)))
print('{:>20} {:<12}'.format("Records: ", conf.records))
print('{:>20} {:<12}'.format("Operation: ", conf.operations))
print('{:>20} {:<12}'.format("Read proportion: ", str(conf.read_proportion)))
print('{:>20} {:<12}'.format("Update proportion: ", str(conf.update_proportion)))
print('{:>20} {:<12}'.format("Insert proportion: ", str(conf.insert_proportion)))
print('{:>20} {:<12}'.format("Is load: ", str(conf.is_load)))
print('{:>20} {:<12}'.format("NUMA for YCSB: ", conf.ycsb_numa))
print('{:>20} {:<12}'.format("Workload type: ", conf.workload_type))
print('{:>20} {:<12}'.format("Pmemkv engine: ", conf.pmemkv_engine))
print('{:>20} {:<12}'.format("Pmemkv size: ", conf.pmemkv_dbsize))
print('{:>20} {:<12}'.format("Pmemkv path: ", conf.pmemkv_dbpath))
print("")
i = i + 1

# PUT CONFIGURATION TO FILE IN PROPER PATH
results_directory = "results/"
if not os.path.exists(results_directory):
os.makedirs(results_directory)
i = 1
with open(results_directory + '/configurations.json', 'w') as jsonconfig:
for conf in configurations:
jsonconfig.write(conf.toJSON() + '\n')
if not os.path.exists(results_directory + conf.testName + '/'):
os.makedirs(results_directory + conf.testName + '/')
with open(results_directory + conf.testName + '/test_description.txt', 'a') as test_description:
test_description.write('{:>20} {:<12}'.format('Test#: ', str(i)) + '\n') # 'Test #' + str(i)
test_description.write('{:>20} {:<12}'.format("Name: ", conf.testName) + '\n')
test_description.write('{:>20} {:<12}'.format("Threads: " ,str(conf.threads)) + '\n')
test_description.write('{:>20} {:<12}'.format("Records: ", conf.records) + '\n')
test_description.write('{:>20} {:<12}'.format("Operation: ", conf.operations) + '\n')
test_description.write('{:>20} {:<12}'.format("Read proportion: ", str(conf.read_proportion)) + '\n')
test_description.write('{:>20} {:<12}'.format("Update proportion: ", str(conf.update_proportion)) + '\n')
test_description.write('{:>20} {:<12}'.format("Insert proportion: ", str(conf.insert_proportion)) + '\n')
test_description.write('{:>20} {:<12}'.format("NUMA for YCSB: ", conf.ycsb_numa) + '\n')
test_description.write('{:>20} {:<12}'.format("Workload type: ", conf.workload_type) + '\n')
test_description.write('{:>20} {:<12}'.format("Pmemkv engine: ", conf.pmemkv_engine) + '\n')
test_description.write('{:>20} {:<12}'.format("Pmemkv size: ", conf.pmemkv_dbsize) + '\n')
test_description.write('{:>20} {:<12}'.format("Pmemkv path: ", conf.pmemkv_dbpath) + '\n')
test_description.write('\n')
i = i + 1

# run specified configurations
generated_commands = []
for test in configurations:
command_prefix = ''
command_suffix = ''

command_prefix = './run_workload.sh ' + test.testName

if not test.is_load == 1:
command_prefix += ' run '
else:
command_prefix += ' load '


# Put path to YCSB main directory
command_suffix += PATH_TO_YCSB + ' '
# Put operation numbers
command_suffix += test.records + ' ' + test.operations + ' '
# Put workload ratios
command_suffix += test.read_proportion + ' ' + test.update_proportion + ' ' + test.insert_proportion + ' '
# Put NUMA node
if test.ycsb_numa == -1:
print('NUMA node is not set for test: ' + test.testName + '.')
command_suffix += test.ycsb_numa + ' '
# Put workload type
command_suffix += test.workload_type + ' '
# Put engine specific fields
command_suffix += test.pmemkv_engine + ' ' + test.pmemkv_dbsize + ' ' + test.pmemkv_dbpath + ' '

for thread_no in test.threads:
# DROP&CREATE BEFORE NEXT INSERTS
generated_commands.append(command_prefix + thread_no + ' ' + command_suffix)

# Generate script
with open('testplan.sh','w') as testplan:
testplan.write('#!/bin/bash\n')
for x in generated_commands:
testplan.write(x + '\n')
print(generated_commands)
78 changes: 78 additions & 0 deletions utils/run_workload.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
#!/bin/bash

# SPDX-License-Identifier: BSD-3-Clause
# Copyright 2017-2022, Intel Corporation

# Run workload from command line
#
# e.g. ./run_workload.sh run_cmap run 12 PATH_TO_YCSB 1000000 1000000
# {0} {1} {2} {3} {4} {5} {6}
# -1.0 -1.0 -1.0 1 workloadb csmap 80000000 DBPATH
# {7} {8} {9} {10} {11} {12} {13} {14}
# 1 - suite name
# 2 - ycsb phase: load/run
# 3 - thread count
# 4 - path to YCSB
# 5 - record count
# 6 - operation count
# 7 - read proportion
# 8 - insert proportion
# 9 - update proportion
# 10 - NUMA node for YCSB
# 11 - workload scenario (workload[a-f])
####### Engine related args
# 12 - pmemkv: engine name
# 13 - pmemkv: pool size
# 14 - pmemkv: path to pool

YCSB_PATH=/home/kfilipek/Development/YCSB/ # TODO(kfilipek): remove hardcoding
echo $YCSB_PATH
OLD_PATH=$(pwd)

echo $@
echo "Passed $# argumets to script"

if [ "$#" -ne "14" ];
then
echo "Illegal number of parameters, should be 11. Check script documentation."
exit 0
fi

mkdir -p "results/$1/" # Create results directory: results/{test_suite_name}/
# Prepare future arguments for YCSB
NUMA_ARG=""
READ_RATIO=""
INSERT_RATIO=""
UPDATE_RATIO=""
if [ "$7" != "-1.0" ];
then
READ_RATIO=" -p readproportion=$7 "
fi
if [ "$8" != "-1.0" ];
then
INSERT_RATIO=" -p insertproportion=$8 "
fi
if [ "$9" != "-1.0" ];
then
UPDATE_RATIO=" -p updateproportion=$9 "
fi
if [ "${10}" != "-1" ];
then
NUMA_ARG=" numactl -N ${10} "
fi
# echo "READ_RATIO param: $READ_RATIO"
# echo "INSERT_RATIO param: $INSERT_RATIO"
# echo "UPDATE_RATIO param: $UPDATE_RATIO"
# echo "NUMA NODE param: $NUMA_ARG"
#exit

# TODOD(kfilipek): Implement splitting threads into processes
cd $YCSB_PATH
if [ "${2}" == "load" ];
then
# Remove old DB before new load phase
rm -rf ${14}
fi
echo "PMEM_IS_PMEM_FORCE=1 $NUMA_ARG bin/ycsb.sh $2 pmemkv -P workloads/${11} -p hdrhistogram.percentiles=95,99,99.9,99.99 -p recordcount=$5 -p operationcount=$6 -p pmemkv.engine=${12} -p pmemkv.dbsize=${13} -p pmemkv.dbpath=${14} > $OLD_PATH/results/$1/${2}_${3}.log" >> $OLD_PATH/results/$1/cmds_executed.log
PMEM_IS_PMEM_FORCE=1 $NUMA_ARG bin/ycsb.sh $2 pmemkv -P workloads/${11} -p hdrhistogram.percentiles=95,99,99.9,99.99 -p recordcount=$5 -p operationcount=$6 -p pmemkv.engine=${12} -p pmemkv.dbsize=${13} -p pmemkv.dbpath=${14} > $OLD_PATH/results/$1/${2}_${3}.log
cd $OLD_PATH