Skip to content

Commit

Permalink
add scripts to run the demos
Browse files Browse the repository at this point in the history
  • Loading branch information
jiazhihao committed Jan 26, 2019
1 parent c00bb03 commit 861ffa3
Show file tree
Hide file tree
Showing 9 changed files with 59 additions and 8 deletions.
1 change: 1 addition & 0 deletions alexnet.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

void CnnModel::add_layers()
{
printf("Create Alexnet:\n");
Tensor t = add_conv_layer(input_image, 64, 11, 11, 4, 4, 2, 2);
t = add_pool_layer(t, 3, 3, 2, 2, 0, 0);
t = add_conv_layer(t, 192, 5, 5, 1, 1, 2, 2);
Expand Down
4 changes: 3 additions & 1 deletion cnn.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ void parse_input_args(char **argv, int argc,
void top_level_task(const Task *task, const std::vector<PhysicalRegion> &regions,
Context ctx, Runtime *runtime)
{
printf("\n-------- Start FlexFlow Runtime --------\n");
// Set up config parameters
int num_par_h = 1;
int num_par_w = 1;
Expand Down Expand Up @@ -160,7 +161,8 @@ void top_level_task(const Task *task, const std::vector<PhysicalRegion> &regions
future.get_void_result();
double ts_end = Realm::Clock::current_time_in_microseconds();
double run_time = 1e-6 * (ts_end - ts_start);
printf("time = %.4fs, tp = %.2f images/s\n", run_time, batchSize * numIterations / run_time);
printf("End-to-end execution time = %.4fs\n", run_time);
printf("Training throughput = %.2f images/s\n", batchSize * numIterations / run_time);
}

int main(int argc, char **argv)
Expand Down
13 changes: 10 additions & 3 deletions conv_2d.cu
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ Conv2D::Conv2D(CnnConfig config, Tensor input, IndexSpaceT<3> part_is,
output.partition = output_lp;
output.region_grad = output_grad_lr;
output.partition_grad = output_grad_lp;
printf("Create conv layer: output(n=%d c=%d h=%d w=%d)\n",
printf(" Create conv layer: output(n=%d c=%d h=%d w=%d)\n",
output.adim[3], output.adim[2], output.adim[1], output.adim[0]);

// Compute partition bound for input
Expand Down Expand Up @@ -245,10 +245,12 @@ OpMeta* Conv2D::init_task(const Task *task,
int input_h = rect_input.hi[1] - rect_input.lo[1] + 1;
int output_w = rect_output.hi[0] - rect_output.lo[0] + 1;
int output_h = rect_output.hi[1] - rect_output.lo[1] + 1;
#ifdef VERBOSE_PRINT
printf("init conv (input): n(%d) c(%d) h(%d) w(%d)\n", conv->inputs[0].pdim[3],
conv->inputs[0].pdim[2], input_h, input_w);
printf("init conv (output): n(%d) c_out(%d) h(%d) w(%d)\n", conv->output.pdim[3],
conv->output.pdim[2], output_h, output_w);
#endif
checkCUDNN(cudnnSetTensor4dDescriptor(m->inputTensor,
CUDNN_TENSOR_NCHW,
CUDNN_DATA_FLOAT,
Expand All @@ -265,7 +267,6 @@ OpMeta* Conv2D::init_task(const Task *task,
1,
1));

printf("filterDim: kernel(%d %d) c_out(%d)\n", conv->kernel_h, conv->kernel_w, conv->output.pdim[2]);
checkCUDNN(cudnnSetFilter4dDescriptor(m->filterDesc,
CUDNN_DATA_FLOAT,
CUDNN_TENSOR_NCHW,
Expand Down Expand Up @@ -368,7 +369,6 @@ void Conv2D::init_para_task(const Task *task,
coord_t filter_elements = conv->inputs[0].adim[2] * conv->output.adim[2]
* conv->kernel_h * conv->kernel_w;
float factor = 1.0f / sqrt(filter_elements / conv->output.adim[2]);
printf("factor = %.4f elements = %d\n", factor, filter_elements / conv->output.adim[2]);
assert(filter_elements == (coord_t) rect_filter.volume());
curandGenerateUniform(genGPU, filter_ptr, filter_elements);
scale_kernel<<<GET_BLOCKS(filter_elements), CUDA_NUM_THREADS>>>(
Expand Down Expand Up @@ -792,7 +792,10 @@ selectConvolutionForwardAlgorithm(cudnnHandle_t handle,
reqAlgCnt, &cnt, perfResults, workSpace, workSpaceSize));
assert(cnt > 0);
checkCUDNN(perfResults[0].status);
#ifdef VERBOSE_PRINT
printf("factor = %.4f elements = %d\n", factor, filter_elements / conv->output.adim[2]);
printf("forwardAlgo(%d) time(%.2lf)\n", perfResults[0].algo, perfResults[0].time);
#endif
return perfResults[0].algo;
}

Expand All @@ -812,7 +815,9 @@ selectConvolutionBackwardFilterAlgorithm(cudnnHandle_t handle,
reqAlgCnt, &cnt, perfResults, workSpace, workSpaceSize));
assert(cnt > 0);
checkCUDNN(perfResults[0].status);
#ifdef VERBOSE_PRINT
printf("bwdFilterAlgo(%d) time(%.2lf)\n", perfResults[0].algo, perfResults[0].time);
#endif
return perfResults[0].algo;
}

Expand All @@ -832,7 +837,9 @@ selectConvolutionBackwardDataAlgorithm(cudnnHandle_t handle,
reqAlgCnt, &cnt, perfResults, workSpace, workSpaceSize));
assert(cnt > 0);
checkCUDNN(perfResults[0].status);
#ifdef VERBOSE_PRINT
printf("bwdDataAlgo(%d) time(%.2lf)\n", perfResults[0].algo, perfResults[0].time);
#endif
return perfResults[0].algo;
}
#endif
8 changes: 8 additions & 0 deletions ffcompile.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/bin/bash

APP="$1"

if [ -z "$APP" ]; then echo "Usage: ./ffcompile app"; exit; fi

make -j 8 APP="${APP}"

10 changes: 9 additions & 1 deletion linear.cu
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,9 @@ Linear::Linear(CnnConfig config, Tensor input, IndexSpaceT<2> part_is,
int fc_num_par_n = part_rect.hi[1] - part_rect.lo[1] + 1;
num_replica = fc_num_par_n;

#ifdef VERBOSE_PRINT
printf("Linear fc_num_par_c(%d) fc_num_par_n(%d)\n", fc_num_par_c, fc_num_par_n);
#endif
FieldSpace fs = config.field_space;

Rect<2, coord_t> output_rect(Point<2>(0, 0), Point<2>(out_channels-1, input.adim[1]-1));
Expand Down Expand Up @@ -96,7 +98,7 @@ Linear::Linear(CnnConfig config, Tensor input, IndexSpaceT<2> part_is,
transform[0][0] = extent_c * in_channels;
transform[1][1] = 1;
Rect<2, coord_t> extent_k_grad(Point<2>(0, 0), Point<2>(extent_c*in_channels-1, 0));
printf("extent_k(%dx%d %d)\n", extent_c, in_channels, 1);
//printf("extent_k(%dx%d %d)\n", extent_c, in_channels, 1);
IndexPartition kernel_grad_ip =
runtime->create_partition_by_restriction(ctx, kernel_grad_is, part_is,
transform, extent_k_grad);
Expand Down Expand Up @@ -157,6 +159,8 @@ Linear::Linear(CnnConfig config, Tensor input, IndexSpaceT<2> part_is,
output.partition = output_lp;
output.region_grad = output_grad_lr;
output.partition_grad = output_grad_lp;
printf(" Create linear layer: output(n=%d c=%d))\n",
output.adim[1], output.adim[0]);

// Every partition reads all in_channels
transform[0][0] = 0;
Expand Down Expand Up @@ -202,7 +206,9 @@ OpMeta* Linear::init_task(const Task *task,
int input_channels = rect_input.hi[0] - rect_input.lo[0] + 1;
int output_channels = rect_output.hi[0] - rect_output.lo[0] + 1;
int batch_size = linear->output.pdim[1];
#ifdef VERBOSE_PRINT
printf("init linear (input): in_c(%d) out_c(%d) batch_size(%d)\n", input_channels, output_channels, batch_size);
#endif
LinearMeta* m = new LinearMeta(handle);
#ifndef DISABLE_COMPUTATION
m->relu = linear->relu;
Expand Down Expand Up @@ -706,7 +712,9 @@ void Linear::update_task(const Task *task,
size_t bias_size = rect_bias.volume();
assert(filter_size == linear->in_channels * linear->out_channels);
assert(bias_size == linear->out_channels);
#ifdef VERBOSE_PRINT
printf("filter_size(%d) linear->num_replica(%d) rect_filter_grad(%d)\n", filter_size, linear->num_replica, rect_filter_grad.volume());
#endif
assert(filter_size * linear->num_replica == rect_filter_grad.volume());
assert(bias_size * linear->num_replica == rect_bias_grad.volume());
assert(acc_filter.accessor.is_dense_arbitrary(rect_filter));
Expand Down
4 changes: 2 additions & 2 deletions ops.cu
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ CnnHandle init_cudnn(const Task *task,
size_t workSpaceSize = *(const size_t*) task->args;
CnnHandle handle;
handle.workSpaceSize = workSpaceSize;
printf("workSpaceSize = %zu\n", workSpaceSize);
//printf("workSpaceSize = %zu\n", workSpaceSize);
#ifndef DISABLE_COMPUTATION
checkCUDA(cublasCreate(&handle.blas));
checkCUDNN(cudnnCreate(&handle.dnn));
Expand Down Expand Up @@ -580,7 +580,7 @@ Flat::Flat(CnnConfig config, Tensor input,
output.region_grad = output_grad_lr;
output.partition = output_lp;
output.partition_grad = output_grad_lp;
printf("Create flat layer: input(N=%d C=%d H=%d W=%d) -> output(N=%d C=%d)\n",
printf(" Create flat layer: input(N=%d C=%d H=%d W=%d) -> output(N=%d C=%d)\n",
input.adim[3], input.adim[2], input.adim[1], input.adim[0], output.adim[1], output.adim[0]);

FieldSpace proj_fs = runtime->create_field_space(ctx);
Expand Down
4 changes: 3 additions & 1 deletion pool_2d.cu
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ Pooling2D::Pooling2D(CnnConfig config, Tensor input, IndexSpaceT<3> part_is,
output.partition = output_lp;
output.region_grad = output_grad_lr;
output.partition_grad = output_grad_lp;
printf("Create pool2d layer: output(n=%d c=%d h=%d w=%d)\n",
printf(" Create pool2d layer: output(n=%d c=%d h=%d w=%d)\n",
output.adim[3], output.adim[2], output.adim[1], output.adim[0]);

// Compute partition bound for input
Expand Down Expand Up @@ -143,10 +143,12 @@ OpMeta* Pooling2D::init_task(const Task *task,
int input_h = rect_input.hi[1] - rect_input.lo[1] + 1;
int output_w = rect_output.hi[0] - rect_output.lo[0] + 1;
int output_h = rect_output.hi[1] - rect_output.lo[1] + 1;
#ifdef VERBOSE_PRINT
printf("init pool (input): n(%d) c(%d) h(%d) w(%d)\n", pool->inputs[0].pdim[3],
pool->inputs[0].pdim[2], input_h, input_w);
printf("init pool (output): n(%d) c(%d) h(%d) w(%d)\n", pool->output.pdim[3],
pool->output.pdim[2], output_h, output_w);
#endif
checkCUDNN(cudnnSetTensor4dDescriptor(m->inputTensor,
CUDNN_TENSOR_NCHW,
CUDNN_DATA_FLOAT,
Expand Down
1 change: 1 addition & 0 deletions resnet.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

void CnnModel::add_layers()
{
printf("Create Resnet-121:\n");
Tensor t = add_conv_layer(input_image, 64, 7, 7, 2, 2, 3, 3);
t = add_pool_layer(t, 3, 3, 2, 2, 1, 1);
for (int i = 0; i < 3; i++)
Expand Down
22 changes: 22 additions & 0 deletions run_experiments.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/bin/bash

# AlexNet experiments
./ffcompile.sh alexnet

./alexnet -b 64 -ll:gpu 1 -ll:fsize 5000 -ll:zsize 5000 --strategy dp
./alexnet -b 64 -ll:gpu 1 -ll:fsize 5000 -ll:zsize 5000 --strategy opt
./alexnet -b 128 -ll:gpu 2 -ll:fsize 5000 -ll:zsize 5000 --strategy dp
./alexnet -b 128 -ll:gpu 2 -ll:fsize 5000 -ll:zsize 5000 --strategy opt
./alexnet -b 256 -ll:gpu 4 -ll:fsize 5000 -ll:zsize 5000 --strategy dp
./alexnet -b 256 -ll:gpu 4 -ll:fsize 5000 -ll:zsize 5000 --strategy opt

# ResNet experiments
./ffcompile.sh resnet
./resnet -b 64 -ll:gpu 1 -ll:fsize 9000 -ll:zsize 5000 --strategy dp
./resnet -b 64 -ll:gpu 1 -ll:fsize 9000 -ll:zsize 5000 --strategy opt
./resnet -b 128 -ll:gpu 2 -ll:fsize 9000 -ll:zsize 5000 --strategy dp
./resnet -b 128 -ll:gpu 2 -ll:fsize 9000 -ll:zsize 5000 --strategy opt
./resnet -b 256 -ll:gpu 4 -ll:fsize 9000 -ll:zsize 5000 --strategy dp
./resnet -b 256 -ll:gpu 4 -ll:fsize 9000 -ll:zsize 5000 --strategy opt


0 comments on commit 861ffa3

Please sign in to comment.