forked from harvard-acc/DeepRecSys
-
Notifications
You must be signed in to change notification settings - Fork 0
/
run_DeepRecSys.sh
executable file
·68 lines (50 loc) · 3.56 KB
/
run_DeepRecSys.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
#!/bin/bash
# Example script to run DeepRecSys.
# This allows you to run the neural recommendation models found in models/
# along with the recommendation load generator and query scheduler to measure
# latency-bounded throughput (inference QPS).
###############################################################################
########## Epoch args ##############
# The total number of inference queries run is product of the number of epochs and number of batches
# Inference querues = nepochs * num_batches
# The number of batches is the unique number of data inputs generated by the data generators
# The number of epochs of epochs determines how many iterations to loop over
# Configuring these parameters is important to getting accurate caching
# behavior based on the use case being modeled
nepochs=64
num_batches=32
epoch_args="--nepochs $nepochs --num_batches $num_batches"
########## Inference engine args ##############
# The number of inference engines determines the unique number of Caffe2
# CPU processes to parallelize queries over
inference_engines=32
caffe2_net_type="async_dag"
engine_args="--inference_engines $inference_engines --caffe2_net_type $caffe2_net_type"
########## Query size args ##############
# Configuration for query sizes.
batch_size_distribution="normal" # number of candidate items in query follows normal distribution
max_mini_batch_size=1024 # maximum number of candidate items queries
avg_mini_batch_size=165 # mean number of canidate items in query
var_mini_batch_size=16 # variation of number of canidate items in query
sub_task_batch_size=32 # per-core query size (number of items processed per-core)
batch_args="--batch_size_distribution $batch_size_distribution --max_mini_batch_size $max_mini_batch_size --avg_mini_batch_size $avg_mini_batch_size --var_mini_batch_size $var_mini_batch_size --sub_task_batch_size $sub_task_batch_size"
########## Scheduling args ##############
# Configuration for hill-climbing based scheduler
target_latency=25 # target p95 tail latency in terms of ms
# Configuring input arrival rates for hill-climbing scheduler to iterate over
min_arr_range=1 # minimum input query arrival rate (Poisson distribution) in terms of ms
max_arr_range=20 # maximum input query arrival rate (Poisson distribution) in terms of ms
arr_steps=50 # Number of steps between min. and max. arrival rate to try equally spaced based on logspace
# Optimal batching configurations to try (task vs. request or data-level parallelism)
# These are all the per-core query or batch-sizes for the hill-climbing scheduler to try.
batch_configs="512-256-128"
# Parameters for hill-climbing scheduler to find steady state
req_granularity=64 # number of queries after which scheduler updates arrival rates
sched_timeout=128 # number of updates after which scheduler scheduler times out finding optimal point
scheduler_args="--target_latency $target_latency --min_arr_range $min_arr_range --max_arr_range $max_arr_range --arr_steps $arr_steps --batch_configs $batch_configs --req_granularity $req_granularity --sched_timeout $sched_timeout"
###############################################################################
#python DeepRecSys.py $epoch_args $engine_args $batch_args $scheduler_args --queue --config_file "models/configs/wide_and_deep.json" --tune_batch_qps
########## Accelerator arguments ##############
accel_configs="96-128-192-256-384-512"
accel_args="--accel_configs $accel_configs --model_accel"
python DeepRecSys.py $epoch_args $engine_args $batch_args $scheduler_args $accel_args --queue --config_file "models/configs/wide_and_deep.json" --tune_batch_qps --tune_accel_qps