microsoft · xysmlx · Apr 24, 2023 · Jul 15, 2021 · Jul 15, 2021 · Jul 15, 2021
diff --git a/.gitignore b/.gitignore
@@ -52,3 +52,15 @@ doc/doxygen/html/*
 
 nnfusion_rt/
 models/frozenmodels/
+
+artifacts/data
+artifacts/reproduce_results
+*.onnx
+*.tfgraph
+
+artifacts/baseline/attention/onnx
+artifacts/baseline/seq2seq/onnx
+artifacts/baseline/blockdrop/onnx
+artifacts/baseline/skipnet/onnx
+artifacts/baseline/resnet18/resnet18.b1.onnx
+artifacts/baseline/resnet101/resnet101.b1.onnx
diff --git a/.gitmodules b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "/home/heheda/nnfusion/src/nnfusion/engine/cache/roller"]
+	path = /home/heheda/nnfusion/src/nnfusion/engine/cache/roller
+	url = [email protected]:heheda12345/roller.git
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -20,6 +20,7 @@ option(CODE_COVERAGE_ENABLE "Enable code coverage." FALSE)
 option(ONNX_FRONTEND "Enable ONNX frontend." TRUE)
 option(TENSORFLOW_FRONTEND "Enable Tensorflow frontend." TRUE)
 option(TORCHSCRIPT_FRONTEND "Enable TorchScript frontend." FALSE)
+option(PYTHON_INTERPRETER "Enable Python interpreter" FALSE)
 
 #-----------------------------------------------------------------------------------------------
 # STEP.1 Customnized targets
@@ -73,7 +74,7 @@ endif()
 if(${DEBUG_ENABLE})
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O0")
 else()
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2 -g")
 endif()
 
 if (CODE_COVERAGE_ENABLE)
@@ -104,6 +105,10 @@ if(TORCHSCRIPT_FRONTEND)
     add_definitions(-DTORCHSCRIPT_FRONTEND)
 endif()
 
+if(PYTHON_INTERPRETER)
+add_definitions(-DPYTHON_INTERPRETER)
+endif()
+
 add_definitions(-DPROJECT_ROOT_DIR="${CMAKE_CURRENT_SOURCE_DIR}")
 
 #-----------------------------------------------------------------------------------------------
@@ -115,10 +120,17 @@ find_package(CURL)
 ###Static###
 set(Protobuf_USE_STATIC_LIBS ON)
 find_package(Threads REQUIRED) # This is for test usage
-find_package(GTest)
+# find_package(GTest)
 find_package(Protobuf 3.5.0)
 find_library(gflags NAMES libgflags.a)
 find_library(sqlite3 NAMES libsqlite3.a)
+if(PYTHON_INTERPRETER)
+find_package(Python COMPONENTS Interpreter Development)
+message("python include dir ${Python_INCLUDE_DIRS}")
+include_directories(
+    ${Python_INCLUDE_DIRS}
+)
+endif()
 
 #-----------------------------------------------------------------------------------------------
 # STEP.6 Check envs
@@ -136,8 +148,8 @@ message(STATUS "thirdparty enabled")
 add_subdirectory(src)
 message(STATUS "nnfusion enabled")
 
-add_subdirectory(test)
-message(STATUS "unit tests enabled")
+# add_subdirectory(test)
+# message(STATUS "unit tests enabled")
 
 # add_subdirectory(doc)
 # message(STATUS "nnfusion documents enabled")
diff --git a/artifacts/.dockerignore b/artifacts/.dockerignore
@@ -0,0 +1,4 @@
+data
+kernel_db
+third-party
+models
diff --git a/artifacts/Figure14/run.sh b/artifacts/Figure14/run.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+source ~/miniconda3/etc/profile.d/conda.sh
+
+conda activate controlflow
+./run_pytorch.sh
+conda deactivate
+
+conda activate baseline_tf1
+./run_tf.sh
+conda deactivate
+
+conda activate baseline_jax
+./run_jax.sh
+conda deactivate
+
+conda activate controlflow
+./run_base.sh
+./run_sys.sh
+conda deactivate
diff --git a/artifacts/Figure14/run_base.sh b/artifacts/Figure14/run_base.sh
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+LOG_DIR=${ARTIFACT_ROOT}/reproduce_results/Figure14/base
+mkdir -p ${LOG_DIR}
+cd ${ARTIFACT_ROOT}/models
+
+# build modified models
+DIR_MAP=(
+"manual_seq2seq/bs1/seq2seq_bs1_0-forward/nnfusion_rt/cuda_codegen:seq2seq/Constant.bs1.0"
+"manual_seq2seq/bs64/seq2seq_bs64_0-forward/nnfusion_rt/cuda_codegen:seq2seq/Constant.bs64.0"
+"manual_attention/bs1/attention_bs1_0-forward/nnfusion_rt/cuda_codegen:attention/Constant.bs1.0"
+"manual_attention/bs1/attention_bs1_1-forward/nnfusion_rt/cuda_codegen:attention/Constant.bs1.1"
+"manual_attention/bs64/attention_bs64_0-forward/nnfusion_rt/cuda_codegen:attention/Constant.bs64.0"
+"manual_attention/bs64/attention_bs64_1-forward/nnfusion_rt/cuda_codegen:attention/Constant.bs64.1"
+)
+
+for pair in ${DIR_MAP[@]}; do
+    IFS=':' read -ra ADDR <<< "$pair"
+    workdir=${ADDR[0]}
+    datadir=${ADDR[1]}
+    if [ ! -r $workdir/main_test ]; then
+        echo "preparing $workdir"
+        cd $workdir
+        cp -r ${ARTIFACT_ROOT}/data/$datadir Constant
+        cmake . && make -j
+        cd ${ARTIFACT_ROOT}/models
+    fi
+done
+
+# run tests
+python3 lstm.py --platform V100 --bs 1 --no-torch --disable-cf --measure 2>&1 | tee $LOG_DIR/lstm.b1.log
+python3 lstm.py --platform V100 --bs 64 --no-torch --disable-cf --measure 2>&1 | tee $LOG_DIR/lstm.b64.log
+python3 nasrnn.py --platform V100 --bs 1 --no-torch --disable-cf --measure 2>&1 | tee $LOG_DIR/nasrnn.b1.log
+python3 nasrnn.py --platform V100 --bs 64 --no-torch --disable-cf --measure 2>&1 | tee $LOG_DIR/nasrnn.b64.log
+cd manual_attention/bs1 && python3 run.py 2>&1 | tee $LOG_DIR/attention.b1.log && cd ../..
+cd manual_attention/bs64 && python3 run.py 2>&1 | tee $LOG_DIR/attention.b64.log && cd ../..
+cd manual_seq2seq/bs1 && python3 run.py 2>&1 | tee $LOG_DIR/seq2seq.b1.log && cd ../..
+cd manual_seq2seq/bs64 && python3 run.py 2>&1 | tee $LOG_DIR/seq2seq.b64.log && cd ../..
+python3 blockdrop.py --platform V100 --bs 1 --no-torch --disable-cf --measure 2>&1 | tee $LOG_DIR/blockdrop.b1.log
+python3 blockdrop.py --platform V100 --bs 64 --no-torch --disable-cf --measure 2>&1 | tee $LOG_DIR/blockdrop.b64.log
+python3 skipnet.py --platform V100 --bs 1 --no-torch --disable-cf --measure 2>&1 | tee $LOG_DIR/skipnet.b1.log
+python3 rae.py --platform V100 --bs 1 --no-torch --disable-cf --measure 2>&1 | tee $LOG_DIR/rae.b1.log
diff --git a/artifacts/Figure14/run_jax.sh b/artifacts/Figure14/run_jax.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+
+LOG_DIR=${ARTIFACT_ROOT}/reproduce_results/Figure14/jax
+mkdir -p ${LOG_DIR}
+cd ${ARTIFACT_ROOT}/baseline
+cd lstm
+python3 lstm_jax.py --platform V100 --bs 1 2>&1 | tee ${LOG_DIR}/lstm.b1.log
+python3 lstm_jax.py --platform V100 --bs 64 2>&1 | tee ${LOG_DIR}/lstm.b64.log
+cd ..
+cd nasrnn
+python3 nas_jax.py --platform V100 --bs 1 2>&1 | tee ${LOG_DIR}/nasrnn.b1.log
+python3 nas_jax.py --platform V100 --bs 64 2>&1 | tee ${LOG_DIR}/nasrnn.b64.log
+cd ..
+cd attention
+python3 attention_jax.py --platform V100 --bs 1 2>&1 | tee ${LOG_DIR}/attention.b1.log
+python3 attention_jax.py --platform V100 --bs 64 2>&1 | tee ${LOG_DIR}/attention.b64.log
+cd ..
+cd seq2seq
+python3 seq2seq_jax.py --platform V100 --bs 1 2>&1 | tee ${LOG_DIR}/seq2seq.b1.log
+python3 seq2seq_jax.py --platform V100 --bs 64 2>&1 | tee ${LOG_DIR}/seq2seq.b64.log
+cd ..
+cd blockdrop
+python3 blockdrop_jax.py --platform V100 --bs 1 2>&1 | tee ${LOG_DIR}/blockdrop.b1.log
+python3 blockdrop_jax.py --platform V100 --bs 64 2>&1 | tee ${LOG_DIR}/blockdrop.b64.log
+cd ..
+cd skipnet
+python3 skipnet_jax.py --platform V100 --bs 1 2>&1 | tee ${LOG_DIR}/skipnet.b1.log
+cd ..
+cd rae
+python3 rae_jax.py --platform V100 --bs 1 2>&1 | tee ${LOG_DIR}/rae.b1.log
+cd ..
diff --git a/artifacts/Figure14/run_pytorch.sh b/artifacts/Figure14/run_pytorch.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+
+LOG_DIR=${ARTIFACT_ROOT}/reproduce_results/Figure14/pytorch
+mkdir -p ${LOG_DIR}
+cd ${ARTIFACT_ROOT}/baseline
+cd lstm
+python3 lstm_pytorch.py --platform V100 --bs 1 2>&1 | tee ${LOG_DIR}/lstm.b1.log
+python3 lstm_pytorch.py --platform V100 --bs 64 2>&1 | tee ${LOG_DIR}/lstm.b64.log
+cd ..
+cd nasrnn
+python3 nas_pytorch.py --platform V100 --bs 1 2>&1 | tee ${LOG_DIR}/nasrnn.b1.log
+python3 nas_pytorch.py --platform V100 --bs 64 2>&1 | tee ${LOG_DIR}/nasrnn.b64.log
+cd ..
+cd attention
+python3 attention_pytorch.py --platform V100 --bs 1 2>&1 | tee ${LOG_DIR}/attention.b1.log
+python3 attention_pytorch.py --platform V100 --bs 64 2>&1 | tee ${LOG_DIR}/attention.b64.log
+cd ..
+cd seq2seq
+python3 seq2seq_pytorch.py --platform V100 --bs 1 2>&1 | tee ${LOG_DIR}/seq2seq.b1.log
+python3 seq2seq_pytorch.py --platform V100 --bs 64 2>&1 | tee ${LOG_DIR}/seq2seq.b64.log
+cd ..
+cd blockdrop
+python3 blockdrop_pytorch.py --platform V100 --bs 1 2>&1 | tee ${LOG_DIR}/blockdrop.b1.log
+python3 blockdrop_pytorch.py --platform V100 --bs 64 2>&1 | tee ${LOG_DIR}/blockdrop.b64.log
+cd ..
+cd skipnet
+python3 skipnet_pytorch.py --platform V100 --bs 1 2>&1 | tee ${LOG_DIR}/skipnet.b1.log
+cd ..
+cd rae
+python3 rae_pytorch.py --platform V100 --bs 1 2>&1 | tee ${LOG_DIR}/rae.b1.log
+cd ..
diff --git a/artifacts/Figure14/run_sys.sh b/artifacts/Figure14/run_sys.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+
+LOG_DIR=${ARTIFACT_ROOT}/reproduce_results/Figure14/sys
+mkdir -p ${LOG_DIR}
+cd ${ARTIFACT_ROOT}/models
+python3 lstm.py --platform V100 --bs 1 --no-torch --measure 2>&1 | tee $LOG_DIR/lstm.b1.log
+python3 lstm.py --platform V100 --bs 64 --no-torch --measure 2>&1 | tee $LOG_DIR/lstm.b64.log
+python3 nasrnn.py --platform V100 --bs 1 --no-torch --measure 2>&1 | tee $LOG_DIR/nasrnn.b1.log
+python3 nasrnn.py --platform V100 --bs 64 --no-torch --measure 2>&1 | tee $LOG_DIR/nasrnn.b64.log
+python3 seq2seq.py --platform V100 --bs 1 --no-torch --measure 2>&1 | tee $LOG_DIR/seq2seq.b1.log
+python3 seq2seq.py --platform V100 --bs 64 --no-torch --measure 2>&1 | tee $LOG_DIR/seq2seq.b64.log
+python3 attention.py --platform V100 --bs 1 --no-torch --measure 2>&1 | tee $LOG_DIR/attention.b1.log
+python3 attention.py --platform V100 --bs 64 --no-torch --measure 2>&1 | tee $LOG_DIR/attention.b64.log
+python3 blockdrop.py --platform V100 --bs 1 --no-torch --measure 2>&1 | tee $LOG_DIR/blockdrop.b1.log
+python3 blockdrop.py --platform V100 --bs 64 --no-torch --measure 2>&1 | tee $LOG_DIR/blockdrop.b64.log
+python3 skipnet.py --platform V100 --bs 1 --no-torch --measure 2>&1 | tee $LOG_DIR/skipnet.b1.log
+python3 rae.py --platform V100 --bs 1 --no-torch --measure 2>&1 | tee $LOG_DIR/rae.b1.log
diff --git a/artifacts/Figure14/run_tf.sh b/artifacts/Figure14/run_tf.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+
+LOG_DIR=${ARTIFACT_ROOT}/reproduce_results/Figure14/tf
+mkdir -p ${LOG_DIR}
+cd ${ARTIFACT_ROOT}/baseline
+cd lstm
+python3 lstm_tf.py --platform V100 --bs 1 2>&1 | tee ${LOG_DIR}/lstm.b1.log
+python3 lstm_tf.py --platform V100 --bs 64 2>&1 | tee ${LOG_DIR}/lstm.b64.log
+cd ..
+cd nasrnn
+python3 nas_tf.py --platform V100 --bs 1 2>&1 | tee ${LOG_DIR}/nasrnn.b1.log
+python3 nas_tf.py --platform V100 --bs 64 2>&1 | tee ${LOG_DIR}/nasrnn.b64.log
+cd ..
+cd attention
+python3 attention_tf.py --platform V100 --bs 1 2>&1 | tee ${LOG_DIR}/attention.b1.log
+python3 attention_tf.py --platform V100 --bs 64 2>&1 | tee ${LOG_DIR}/attention.b64.log
+cd ..
+cd seq2seq
+python3 seq2seq_tf.py --platform V100 --bs 1 2>&1 | tee ${LOG_DIR}/seq2seq.b1.log
+python3 seq2seq_tf.py --platform V100 --bs 64 2>&1 | tee ${LOG_DIR}/seq2seq.b64.log
+cd ..
+cd blockdrop
+python3 blockdrop_tf.py --platform V100 --bs 1 2>&1 | tee ${LOG_DIR}/blockdrop.b1.log
+python3 blockdrop_tf.py --platform V100 --bs 64 2>&1 | tee ${LOG_DIR}/blockdrop.b64.log
+cd ..
+cd skipnet
+python3 skipnet_tf.py --platform V100 --bs 1 2>&1 | tee ${LOG_DIR}/skipnet.b1.log
+cd ..
diff --git a/artifacts/Figure15/run.sh b/artifacts/Figure15/run.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+source ~/miniconda3/etc/profile.d/conda.sh
+
+conda activate controlflow
+./run_pytorch.sh
+conda deactivate
+
+conda activate baseline_tf1
+./run_tf.sh
+conda deactivate
+
+conda activate baseline_jax
+./run_jax.sh
+conda deactivate
+
+conda activate controlflow
+./run_sys.sh
+conda deactivate
diff --git a/artifacts/Figure15/run_jax.sh b/artifacts/Figure15/run_jax.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+
+LOG_DIR=${ARTIFACT_ROOT}/reproduce_results/Figure15/jax
+mkdir -p ${LOG_DIR}
+cd ${ARTIFACT_ROOT}/baseline
+
+cd lstm
+python3 lstm_jax.py --bs 1 --platform V100 --overhead_test --unroll 2>&1 | tee ${LOG_DIR}/lstm.unroll.log
+nvprof --profile-from-start off python3 lstm_jax.py --bs 1 --platform V100 --overhead_test --unroll 2>&1 | tee ${LOG_DIR}/lstm.unroll.nvprof.log
+python3 lstm_jax.py --bs 1 --platform V100 --overhead_test --fix 2>&1 | tee ${LOG_DIR}/lstm.fix.log
+nvprof --profile-from-start off python3 lstm_jax.py --bs 1 --platform V100 --overhead_test --fix 2>&1 | tee ${LOG_DIR}/lstm.fix.nvprof.log
+cd ..
+cd nasrnn
+python3 nas_jax.py --bs 1 --platform V100 --overhead_test --unroll 2>&1 | tee ${LOG_DIR}/nasrnn.unroll.log
+nvprof --profile-from-start off python3 nas_jax.py --bs 1 --platform V100 --overhead_test --unroll 2>&1 | tee ${LOG_DIR}/nasrnn.unroll.nvprof.log
+python3 nas_jax.py --bs 1 --platform V100 --overhead_test --fix 2>&1 | tee ${LOG_DIR}/nasrnn.fix.log
+nvprof --profile-from-start off python3 nas_jax.py --bs 1 --platform V100 --overhead_test --fix 2>&1 | tee ${LOG_DIR}/nasrnn.fix.nvprof.log
+cd ..
+cd attention
+python3 attention_jax.py --bs 1 --platform V100 --overhead_test --unroll 2>&1 | tee ${LOG_DIR}/attention.unroll.log
+nvprof --profile-from-start off python3 attention_jax.py --bs 1 --platform V100 --overhead_test --unroll 2>&1 | tee ${LOG_DIR}/attention.unroll.nvprof.log
+python3 attention_jax.py --bs 1 --platform V100 --overhead_test --fix 2>&1 | tee ${LOG_DIR}/attention.fix.log
+nvprof --profile-from-start off python3 attention_jax.py --bs 1 --platform V100 --overhead_test --fix 2>&1 | tee ${LOG_DIR}/attention.fix.nvprof.log
+cd ..
+cd seq2seq
+python3 seq2seq_jax.py --bs 1 --platform V100 --overhead_test --unroll 2>&1 | tee ${LOG_DIR}/seq2seq.unroll.log
+nvprof --profile-from-start off python3 seq2seq_jax.py --bs 1 --platform V100 --overhead_test --unroll 2>&1 | tee ${LOG_DIR}/seq2seq.unroll.nvprof.log
+python3 seq2seq_jax.py --bs 1 --platform V100 --overhead_test --fix 2>&1 | tee ${LOG_DIR}/seq2seq.fix.log
+nvprof --profile-from-start off python3 seq2seq_jax.py --bs 1 --platform V100 --overhead_test --fix 2>&1 | tee ${LOG_DIR}/seq2seq.fix.nvprof.log
+cd ..
+cd ..
diff --git a/artifacts/Figure15/run_pytorch.sh b/artifacts/Figure15/run_pytorch.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+LOG_DIR=${ARTIFACT_ROOT}/reproduce_results/Figure15/pytorch
+mkdir -p ${LOG_DIR}
+cd ${ARTIFACT_ROOT}/baseline
+cd lstm
+python3 lstm_pytorch.py --bs 1 --platform V100 --overhead_test --unroll 2>&1 | tee ${LOG_DIR}/lstm.unroll.log
+nvprof --profile-from-start off python3 lstm_pytorch.py --bs 1 --platform V100 --overhead_test --unroll 2>&1 | tee ${LOG_DIR}/lstm.unroll.nvprof.log
+python3 lstm_pytorch.py --bs 1 --platform V100 --overhead_test --fix 2>&1 | tee ${LOG_DIR}/lstm.fix.log
+nvprof --profile-from-start off python3 lstm_pytorch.py --bs 1 --platform V100 --overhead_test --fix 2>&1 | tee ${LOG_DIR}/lstm.fix.nvprof.log
+cd ..
+cd nasrnn
+python3 nas_pytorch.py --bs 1 --platform V100 --overhead_test --unroll 2>&1 | tee ${LOG_DIR}/nasrnn.unroll.log
+nvprof --profile-from-start off python3 nas_pytorch.py --bs 1 --platform V100 --overhead_test --unroll 2>&1 | tee ${LOG_DIR}/nasrnn.unroll.nvprof.log
+python3 nas_pytorch.py --bs 1 --platform V100 --overhead_test --fix 2>&1 | tee ${LOG_DIR}/nasrnn.fix.log
+nvprof --profile-from-start off python3 nas_pytorch.py --bs 1 --platform V100 --overhead_test --fix 2>&1 | tee ${LOG_DIR}/nasrnn.fix.nvprof.log
+cd ..
+cd attention
+python3 attention_pytorch.py --bs 1 --platform V100 --overhead_test --unroll 2>&1 | tee ${LOG_DIR}/attention.unroll.log
+nvprof --profile-from-start off python3 attention_pytorch.py --bs 1 --platform V100 --overhead_test --unroll 2>&1 | tee ${LOG_DIR}/attention.unroll.nvprof.log
+python3 attention_pytorch.py --bs 1 --platform V100 --overhead_test --fix 2>&1 | tee ${LOG_DIR}/attention.fix.log
+nvprof --profile-from-start off python3 attention_pytorch.py --bs 1 --platform V100 --overhead_test --fix 2>&1 | tee ${LOG_DIR}/attention.fix.nvprof.log
+cd ..
+cd seq2seq
+python3 seq2seq_pytorch.py --bs 1 --platform V100 --overhead_test --unroll 2>&1 | tee ${LOG_DIR}/seq2seq.unroll.log
+nvprof --profile-from-start off python3 seq2seq_pytorch.py --bs 1 --platform V100 --overhead_test --unroll 2>&1 | tee ${LOG_DIR}/seq2seq.unroll.nvprof.log
+python3 seq2seq_pytorch.py --bs 1 --platform V100 --overhead_test --fix 2>&1 | tee ${LOG_DIR}/seq2seq.fix.log
+nvprof --profile-from-start off python3 seq2seq_pytorch.py --bs 1 --platform V100 --overhead_test --fix 2>&1 | tee ${LOG_DIR}/seq2seq.fix.nvprof.log
+cd ..
+cd ..
diff --git a/artifacts/Figure15/run_sys.sh b/artifacts/Figure15/run_sys.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+LOG_DIR=${ARTIFACT_ROOT}/reproduce_results/Figure15/sys
+mkdir -p ${LOG_DIR}
+cd ${ARTIFACT_ROOT}/models
+
+python3 lstm-unroll.py --measure 2>&1 | tee $LOG_DIR/lstm.unroll.log
+nvprof --profile-from-start off python3 lstm-unroll.py --measure 2>&1 | tee $LOG_DIR/lstm.unroll.nvprof.log
+python3 lstm.py --platform V100 --bs 1 --no-torch --measure 2>&1 | tee $LOG_DIR/lstm.fix.log
+nvprof --profile-from-start off python3 lstm.py --platform V100 --bs 1 --no-torch --measure 2>&1 | tee $LOG_DIR/lstm.fix.nvprof.log
+
+python3 nasrnn-unroll.py --measure 2>&1 | tee $LOG_DIR/nasrnn.unroll.log
+nvprof --profile-from-start off python3 nasrnn-unroll.py --measure 2>&1 | tee $LOG_DIR/nasrnn.unroll.nvprof.log
+python3 nasrnn.py --platform V100 --bs 1 --no-torch --measure 2>&1 | tee $LOG_DIR/nasrnn.fix.log
+nvprof --profile-from-start off python3 nasrnn.py --platform V100 --bs 1 --no-torch --measure 2>&1 | tee $LOG_DIR/nasrnn.fix.nvprof.log
+
+
+python3 attention-unroll.py --measure 2>&1 | tee $LOG_DIR/attention.unroll.log
+nvprof --profile-from-start off python3 attention-unroll.py --measure 2>&1 | tee $LOG_DIR/attention.unroll.nvprof.log
+python3 attention.py --platform V100 --bs 1 --no-torch --measure 2>&1 | tee $LOG_DIR/attention.fix.log
+nvprof --profile-from-start off python3 attention.py --platform V100 --bs 1 --no-torch --measure 2>&1 | tee $LOG_DIR/attention.fix.nvprof.log
+
+python3 seq2seq.py --platform V100 --bs 1 --no-torch --measure --overhead_test --unroll 2>&1 | tee $LOG_DIR/seq2seq.unroll.log
+nvprof --profile-from-start off python3 seq2seq.py --platform V100 --bs 1 --no-torch --measure --overhead_test --unroll 2>&1 | tee ${LOG_DIR}/seq2seq.unroll.nvprof.log
+python3 seq2seq.py --platform V100 --bs 1 --no-torch --measure --overhead_test --fix 2>&1 | tee $LOG_DIR/seq2seq.fix.log
+nvprof --profile-from-start off python3 seq2seq.py --platform V100 --bs 1 --no-torch --measure --overhead_test --fix 2>&1 | tee $LOG_DIR/seq2seq.fix.nvprof.log
diff --git a/artifacts/Figure15/run_tf.sh b/artifacts/Figure15/run_tf.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+LOG_DIR=${ARTIFACT_ROOT}/reproduce_results/Figure15/tf
+mkdir -p ${LOG_DIR}
+cd ${ARTIFACT_ROOT}/baseline
+
+cd lstm
+python3 lstm_tf.py --bs 1 --platform V100 --overhead_test --unroll=true 2>&1 | tee ${LOG_DIR}/lstm.unroll.log
+nvprof --profile-from-start off python3 lstm_tf.py --bs 1 --platform V100 --overhead_test --unroll=true 2>&1 | tee ${LOG_DIR}/lstm.unroll.nvprof.log
+python3 lstm_tf.py --bs 1 --platform V100 --overhead_test --unroll=false 2>&1 | tee ${LOG_DIR}/lstm.fix.log
+nvprof --profile-from-start off python3 lstm_tf.py --bs 1 --platform V100 --overhead_test --unroll=false 2>&1 | tee ${LOG_DIR}/lstm.fix.nvprof.log
+cd ..
+
+cd nasrnn
+python3 nas_tf.py --bs 1 --platform V100 --overhead_test --unroll=true 2>&1 | tee ${LOG_DIR}/nasrnn.unroll.log
+nvprof --profile-from-start off python3 nas_tf.py --bs 1 --platform V100 --overhead_test --unroll=true 2>&1 | tee ${LOG_DIR}/nasrnn.unroll.nvprof.log
+python3 nas_tf.py --bs 1 --platform V100 --overhead_test --unroll=false 2>&1 | tee ${LOG_DIR}/nasrnn.fix.log
+nvprof --profile-from-start off python3 nas_tf.py --bs 1 --platform V100 --overhead_test --unroll=false 2>&1 | tee ${LOG_DIR}/nasrnn.fix.nvprof.log
+cd ..
+
+cd attention
+python3 attention_tf.py --bs 1 --platform V100 --overhead_test --unroll 2>&1 | tee ${LOG_DIR}/attention.unroll.log
+nvprof --profile-from-start off python3 attention_tf.py --bs 1 --platform V100 --overhead_test --unroll 2>&1 | tee ${LOG_DIR}/attention.unroll.nvprof.log
+python3 attention_tf.py --bs 1 --platform V100 --overhead_test --fix 2>&1 | tee ${LOG_DIR}/attention.fix.log
+nvprof --profile-from-start off python3 attention_tf.py --bs 1 --platform V100 --overhead_test --fix 2>&1 | tee ${LOG_DIR}/attention.fix.nvprof.log
+cd ..
+
+cd seq2seq
+python3 seq2seq_tf.py --bs 1 --platform V100 --overhead_test --unroll 2>&1 | tee ${LOG_DIR}/seq2seq.unroll.log
+nvprof --profile-from-start off python3 seq2seq_tf.py --bs 1 --platform V100 --overhead_test --unroll 2>&1 | tee ${LOG_DIR}/seq2seq.unroll.nvprof.log
+python3 seq2seq_tf.py --bs 1 --platform V100 --overhead_test --fix 2>&1 | tee ${LOG_DIR}/seq2seq.fix.log
+nvprof --profile-from-start off python3 seq2seq_tf.py --bs 1 --platform V100 --overhead_test --fix 2>&1 | tee ${LOG_DIR}/seq2seq.fix.nvprof.log
+cd ..
diff --git a/artifacts/Figure16/run.sh b/artifacts/Figure16/run.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+source ~/miniconda3/etc/profile.d/conda.sh
+
+conda activate controlflow
+./run_pytorch.sh
+conda deactivate
+
+conda activate baseline_tf1
+./run_tf.sh
+conda deactivate
+
+conda activate baseline_jax
+./run_jax.sh
+conda deactivate
+
+conda activate controlflow
+./run_sys.sh
+conda deactivate
diff --git a/artifacts/Figure16/run_jax.sh b/artifacts/Figure16/run_jax.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+LOG_DIR=${ARTIFACT_ROOT}/reproduce_results/Figure16/jax
+mkdir -p ${LOG_DIR}
+cd ${ARTIFACT_ROOT}/baseline
+
+cd resnet18
+python3 resnet18_jax.py --bs 1 --platform V100 2>&1 | tee ${LOG_DIR}/blockdrop.noskip.log
+nvprof --profile-from-start off python3 resnet18_jax.py --bs 1 --platform V100 2>&1 | tee ${LOG_DIR}/blockdrop.noskip.nvprof.log
+cd ..
+
+cd blockdrop
+python3 blockdrop_jax.py --bs 1 --platform V100 --overhead_test --unroll 2>&1 | tee ${LOG_DIR}/blockdrop.unroll.log
+nvprof --profile-from-start off python3 blockdrop_jax.py --bs 1 --platform V100 --overhead_test --unroll 2>&1 | tee ${LOG_DIR}/blockdrop.unroll.nvprof.log
+python3 blockdrop_jax.py --bs 1 --platform V100 --overhead_test --fix 2>&1 | tee ${LOG_DIR}/blockdrop.fix.log
+nvprof --profile-from-start off python3 blockdrop_jax.py --bs 1 --platform V100 --overhead_test --fix 2>&1 | tee ${LOG_DIR}/blockdrop.fix.nvprof.log
+cd ..
+
+cd resnet101
+python3 resnet101_jax.py --bs 1 --platform V100 2>&1 | tee ${LOG_DIR}/skipnet.noskip.log
+nvprof --profile-from-start off python3 resnet101_jax.py --bs 1 --platform V100 2>&1 | tee ${LOG_DIR}/skipnet.noskip.nvprof.log
+cd ..
+
+cd skipnet
+python3 skipnet_jax.py --bs 1 --platform V100 --overhead_test --unroll 2>&1 | tee ${LOG_DIR}/skipnet.unroll.log
+nvprof --profile-from-start off python3 skipnet_jax.py --bs 1 --platform V100 --overhead_test --unroll 2>&1 | tee ${LOG_DIR}/skipnet.unroll.nvprof.log
+python3 skipnet_jax.py --bs 1 --platform V100 --overhead_test --fix 2>&1 | tee ${LOG_DIR}/skipnet.fix.log
+nvprof --profile-from-start off python3 skipnet_jax.py --bs 1 --platform V100 --overhead_test --fix 2>&1 | tee ${LOG_DIR}/skipnet.fix.nvprof.log
+cd ..
diff --git a/artifacts/Figure16/run_pytorch.sh b/artifacts/Figure16/run_pytorch.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+LOG_DIR=${ARTIFACT_ROOT}/reproduce_results/Figure16/pytorch
+mkdir -p ${LOG_DIR}
+cd ${ARTIFACT_ROOT}/baseline
+
+cd resnet18
+python3 resnet18_pytorch.py --bs 1 --platform V100 2>&1 | tee ${LOG_DIR}/blockdrop.noskip.log
+nvprof --profile-from-start off python3 resnet18_pytorch.py --bs 1 --platform V100 2>&1 | tee ${LOG_DIR}/blockdrop.noskip.nvprof.log
+cd ..
+
+cd blockdrop
+python3 blockdrop_pytorch.py --bs 1 --platform V100 --overhead_test --unroll 2>&1 | tee ${LOG_DIR}/blockdrop.unroll.log
+nvprof --profile-from-start off python3 blockdrop_pytorch.py --bs 1 --platform V100 --overhead_test --unroll 2>&1 | tee ${LOG_DIR}/blockdrop.unroll.nvprof.log
+python3 blockdrop_pytorch.py --bs 1 --platform V100 --overhead_test --fix 2>&1 | tee ${LOG_DIR}/blockdrop.fix.log
+nvprof --profile-from-start off python3 blockdrop_pytorch.py --bs 1 --platform V100 --overhead_test --fix 2>&1 | tee ${LOG_DIR}/blockdrop.fix.nvprof.log
+cd ..
+
+cd resnet101
+python3 resnet101_pytorch.py --bs 1 --platform V100 2>&1 | tee ${LOG_DIR}/skipnet.noskip.log
+nvprof --profile-from-start off python3 resnet101_pytorch.py --bs 1 --platform V100 2>&1 | tee ${LOG_DIR}/skipnet.noskip.nvprof.log
+cd ..
+
+cd skipnet
+python3 skipnet_pytorch.py --bs 1 --platform V100 --overhead_test --unroll 2>&1 | tee ${LOG_DIR}/skipnet.unroll.log
+nvprof --profile-from-start off python3 skipnet_pytorch.py --bs 1 --platform V100 --overhead_test --unroll 2>&1 | tee ${LOG_DIR}/skipnet.unroll.nvprof.log
+python3 skipnet_pytorch.py --bs 1 --platform V100 --overhead_test --fix 2>&1 | tee ${LOG_DIR}/skipnet.fix.log
+nvprof --profile-from-start off python3 skipnet_pytorch.py --bs 1 --platform V100 --overhead_test --fix 2>&1 | tee ${LOG_DIR}/skipnet.fix.nvprof.log
+cd ..
diff --git a/artifacts/Figure16/run_sys.sh b/artifacts/Figure16/run_sys.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+LOG_DIR=${ARTIFACT_ROOT}/reproduce_results/Figure16/sys
+mkdir -p ${LOG_DIR}
+cd ${ARTIFACT_ROOT}/models
+
+python3 resnet18.py --platform V100 --bs 1 --no-torch --measure 2>&1 | tee ${LOG_DIR}/blockdrop.noskip.log
+nvprof --profile-from-start off python3 resnet18.py --platform V100 --bs 1 --no-torch --measure 2>&1 | tee ${LOG_DIR}/blockdrop.noskip.nvprof.log
+python3 blockdrop.py --bs 1 --no-torch --overhead_test --unroll --measure 2>&1 | tee $LOG_DIR/blockdrop.unroll.log
+nvprof --profile-from-start off python3 blockdrop.py --bs 1 --no-torch --overhead_test --unroll --measure 2>&1 | tee $LOG_DIR/blockdrop.unroll.nvprof.log
+python3 blockdrop.py --bs 1 --no-torch --overhead_test --fix --measure 2>&1 | tee $LOG_DIR/blockdrop.fix.log
+nvprof --profile-from-start off python3 blockdrop.py --bs 1 --no-torch --overhead_test --fix --measure 2>&1 | tee $LOG_DIR/blockdrop.fix.nvprof.log
+
+python3 resnet101.py --platform V100 --bs 1 --no-torch --measure 2>&1 | tee ${LOG_DIR}/skipnet.noskip.log
+nvprof --profile-from-start off python3 resnet101.py --platform V100 --bs 1 --no-torch --measure 2>&1 | tee ${LOG_DIR}/skipnet.noskip.nvprof.log
+python3 skipnet.py --bs 1 --no-torch --overhead_test --unroll --measure 2>&1 | tee $LOG_DIR/skipnet.unroll.log
+nvprof --profile-from-start off python3 skipnet.py --bs 1 --no-torch --overhead_test --unroll --measure 2>&1 | tee $LOG_DIR/skipnet.unroll.nvprof.log
+python3 skipnet.py --bs 1 --no-torch --overhead_test --fix --measure 2>&1 | tee $LOG_DIR/skipnet.fix.log
+nvprof --profile-from-start off python3 skipnet.py --bs 1 --no-torch --overhead_test --fix --measure 2>&1 | tee $LOG_DIR/skipnet.fix.nvprof.log
diff --git a/artifacts/Figure16/run_tf.sh b/artifacts/Figure16/run_tf.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+LOG_DIR=${ARTIFACT_ROOT}/reproduce_results/Figure16/tf
+mkdir -p ${LOG_DIR}
+cd ${ARTIFACT_ROOT}/baseline
+
+cd resnet18
+python3 resnet18_tf.py --bs=1 --platform=V100 2>&1 | tee ${LOG_DIR}/blockdrop.noskip.log
+nvprof --profile-from-start off python3 resnet18_tf.py --bs=1 --platform=V100 2>&1 | tee ${LOG_DIR}/blockdrop.noskip.nvprof.log
+cd ..
+
+cd blockdrop
+python3 blockdrop_tf.py --bs 1 --platform V100 --overhead_test --unroll 2>&1 | tee ${LOG_DIR}/blockdrop.unroll.log
+nvprof --profile-from-start off python3 blockdrop_tf.py --bs 1 --platform V100 --overhead_test --unroll 2>&1 | tee ${LOG_DIR}/blockdrop.unroll.nvprof.log
+python3 blockdrop_tf.py --bs 1 --platform V100 --overhead_test --fix 2>&1 | tee ${LOG_DIR}/blockdrop.fix.log
+nvprof --profile-from-start off python3 blockdrop_tf.py --bs 1 --platform V100 --overhead_test --fix 2>&1 | tee ${LOG_DIR}/blockdrop.fix.nvprof.log
+cd ..
+
+cd resnet101
+python3 resnet101_tf.py --bs 1 --platform V100 2>&1 | tee ${LOG_DIR}/skipnet.noskip.log
+nvprof --profile-from-start off python3 resnet101_tf.py --bs 1 --platform V100 2>&1 | tee ${LOG_DIR}/skipnet.noskip.nvprof.log
+cd ..
+
+cd skipnet
+python3 skipnet_tf.py --bs 1 --platform V100 --overhead_test --unroll 2>&1 | tee ${LOG_DIR}/skipnet.unroll.log
+nvprof --profile-from-start off python3 skipnet_tf.py --bs 1 --platform V100 --overhead_test --unroll 2>&1 | tee ${LOG_DIR}/skipnet.unroll.nvprof.log
+python3 skipnet_tf.py --bs 1 --platform V100 --overhead_test --fix 2>&1 | tee ${LOG_DIR}/skipnet.fix.log
+nvprof --profile-from-start off python3 skipnet_tf.py --bs 1 --platform V100 --overhead_test --fix 2>&1 | tee ${LOG_DIR}/skipnet.fix.nvprof.log
+cd ..
diff --git a/artifacts/Figure17/run.sh b/artifacts/Figure17/run.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+source ~/miniconda3/etc/profile.d/conda.sh
+
+# conda activate controlflow
+# ./run_pytorch.sh
+# conda deactivate
+
+# conda activate baseline_tf1
+# ./run_tf.sh
+# conda deactivate
+
+conda activate baseline_jax
+./run_jax.sh
+conda deactivate
+
+conda activate controlflow
+./run_sys.sh
+conda deactivate
diff --git a/artifacts/Figure17/run_jax.sh b/artifacts/Figure17/run_jax.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+LOG_DIR=${ARTIFACT_ROOT}/reproduce_results/Figure17/jax
+mkdir -p ${LOG_DIR}
+cd ${ARTIFACT_ROOT}/baseline
+
+rates=(0 25 50 75 100)
+
+cd resnet18
+python3 resnet18_jax.py --bs 1 --platform V100 2>&1 | tee ${LOG_DIR}/blockdrop.noskip.log
+cd ..
+
+cd blockdrop
+for rate in ${rates[*]}; do
+    python3 blockdrop_jax.py --bs 1 --platform V100 --overhead_test --unroll --rate $rate 2>&1 | tee ${LOG_DIR}/blockdrop.${rate}.unroll.log
+    python3 blockdrop_jax.py --bs 1 --platform V100 --overhead_test --fix --rate $rate 2>&1 | tee ${LOG_DIR}/blockdrop.${rate}.fix.log
+done
+cd ..
+
+cd resnet101
+python3 resnet101_jax.py --bs 1 --platform V100 2>&1 | tee ${LOG_DIR}/skipnet.noskip.log
+cd ..
+
+cd skipnet
+for rate in ${rates[*]}; do
+    python3 skipnet_jax.py --bs 1 --platform V100 --overhead_test --unroll --rate $rate 2>&1 | tee ${LOG_DIR}/skipnet.${rate}.unroll.log
+    python3 skipnet_jax.py --bs 1 --platform V100 --overhead_test --fix --rate $rate 2>&1 | tee ${LOG_DIR}/skipnet.${rate}.fix.log
+done
+cd ..
diff --git a/artifacts/Figure17/run_pytorch.sh b/artifacts/Figure17/run_pytorch.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+LOG_DIR=${ARTIFACT_ROOT}/reproduce_results/Figure17/pytorch
+mkdir -p ${LOG_DIR}
+cd ${ARTIFACT_ROOT}/baseline
+
+rates=(0 25 50 75 100)
+
+cd resnet18
+python3 resnet18_pytorch.py --bs 1 --platform V100 2>&1 | tee ${LOG_DIR}/blockdrop.noskip.log
+cd ..
+
+cd blockdrop
+for rate in ${rates[*]}; do
+    python3 blockdrop_pytorch.py --bs 1 --platform V100 --overhead_test --unroll --rate $rate 2>&1 | tee ${LOG_DIR}/blockdrop.${rate}.unroll.log
+    python3 blockdrop_pytorch.py --bs 1 --platform V100 --overhead_test --fix --rate $rate 2>&1 | tee ${LOG_DIR}/blockdrop.${rate}.fix.log
+done
+cd ..
+
+cd resnet101
+python3 resnet101_pytorch.py --bs 1 --platform V100 2>&1 | tee ${LOG_DIR}/blockdrop.noskip.log
+cd ..
+
+cd skipnet
+for rate in ${rates[*]}; do
+    python3 skipnet_pytorch.py --bs 1 --platform V100 --overhead_test --unroll --rate $rate 2>&1 | tee ${LOG_DIR}/skipnet.${rate}.unroll.log
+    python3 skipnet_pytorch.py --bs 1 --platform V100 --overhead_test --fix --rate $rate 2>&1 | tee ${LOG_DIR}/skipnet.${rate}.fix.log
+done
+cd ..
diff --git a/artifacts/Figure17/run_sys.sh b/artifacts/Figure17/run_sys.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+LOG_DIR=${ARTIFACT_ROOT}/reproduce_results/Figure17/sys
+mkdir -p ${LOG_DIR}
+cd ${ARTIFACT_ROOT}/models
+
+rates=(0 25 50 75 100)
+
+python3 resnet18.py --platform V100 --bs 1 --no-torch --measure 2>&1 | tee ${LOG_DIR}/blockdrop.noskip.log
+
+for rate in ${rates[*]}; do
+    python3 blockdrop.py --bs 1 --no-torch --overhead_test --unroll --measure --rate $rate 2>&1 | tee ${LOG_DIR}/blockdrop.${rate}.unroll.log
+    python3 blockdrop.py --bs 1 --no-torch --overhead_test --fix --measure --rate $rate 2>&1 | tee ${LOG_DIR}/blockdrop.${rate}.fix.log
+done
+
+python3 resnet101.py --platform V100 --bs 1 --no-torch --measure 2>&1 | tee ${LOG_DIR}/skipnet.noskip.log
+
+for rate in ${rates[*]}; do
+    python3 skipnet.py --bs 1 --no-torch --overhead_test --unroll --measure --rate $rate 2>&1 | tee ${LOG_DIR}/skipnet.${rate}.unroll.log
+    python3 skipnet.py --bs 1 --no-torch --overhead_test --fix --measure --rate $rate 2>&1 | tee ${LOG_DIR}/skipnet.${rate}.fix.log
+done
+cd ..
diff --git a/artifacts/Figure17/run_tf.sh b/artifacts/Figure17/run_tf.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+LOG_DIR=${ARTIFACT_ROOT}/reproduce_results/Figure17/tf
+mkdir -p ${LOG_DIR}
+cd ${ARTIFACT_ROOT}/baseline
+
+rates=(0 25 50 75 100)
+
+cd resnet18
+python3 resnet18_tf.py --bs=1 --platform=V100 2>&1 | tee ${LOG_DIR}/blockdrop.noskip.log
+cd ..
+
+cd blockdrop
+for rate in ${rates[*]}; do
+    python3 blockdrop_tf.py --bs 1 --platform V100 --overhead_test --unroll --rate $rate 2>&1 | tee ${LOG_DIR}/blockdrop.${rate}.unroll.log
+    python3 blockdrop_tf.py --bs 1 --platform V100 --overhead_test --fix --rate $rate 2>&1 | tee ${LOG_DIR}/blockdrop.${rate}.fix.log
+done
+cd ..
+
+cd resnet101
+python3 resnet101_tf.py --bs=1 --platform=V100 2>&1 | tee ${LOG_DIR}/blockdrop.noskip.log
+cd ..
+
+cd skipnet
+for rate in ${rates[*]}; do
+    python3 skipnet_tf.py --bs 1 --platform V100 --overhead_test --unroll --rate $rate 2>&1 | tee ${LOG_DIR}/skipnet.${rate}.unroll.log
+    python3 skipnet_tf.py --bs 1 --platform V100 --overhead_test --fix --rate $rate 2>&1 | tee ${LOG_DIR}/skipnet.${rate}.fix.log
+done
+cd ..
diff --git a/artifacts/Figure18/run.sh b/artifacts/Figure18/run.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+
+source ~/miniconda3/etc/profile.d/conda.sh
+
+conda activate controlflow
+./run_pytorch.sh
+conda deactivate
+
+conda activate baseline_jax
+./run_jax.sh
+conda deactivate
+
+conda activate controlflow
+./run_sys.sh
+conda deactivate
diff --git a/artifacts/Figure18/run_jax.sh b/artifacts/Figure18/run_jax.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+LOG_DIR=${ARTIFACT_ROOT}/reproduce_results/Figure18/jax
+mkdir -p ${LOG_DIR}
+cd ${ARTIFACT_ROOT}/baseline
+
+cd rae
+python3 rae_jax.py --bs 1 --platform V100 --overhead_test --unroll 2>&1 | tee ${LOG_DIR}/rae.unroll.log
+nvprof --profile-from-start off python3 rae_jax.py --bs 1 --platform V100 --overhead_test --unroll 2>&1 | tee ${LOG_DIR}/rae.unroll.nvprof.log
+python3 rae_jax.py --bs 1 --platform V100 --overhead_test --fix 2>&1 | tee ${LOG_DIR}/rae.fix.log
+nvprof --profile-from-start off python3 rae_jax.py --bs 1 --platform V100 --overhead_test --fix 2>&1 | tee ${LOG_DIR}/rae.fix.nvprof.log
+cd ..
+cd ..
diff --git a/artifacts/Figure18/run_pytorch.sh b/artifacts/Figure18/run_pytorch.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+LOG_DIR=${ARTIFACT_ROOT}/reproduce_results/Figure18/pytorch
+mkdir -p ${LOG_DIR}
+cd ${ARTIFACT_ROOT}/baseline
+cd rae
+python3 rae_pytorch.py --bs 1 --platform V100 --overhead_test --unroll 2>&1 | tee ${LOG_DIR}/rae.unroll.log
+nvprof --profile-from-start off python3 rae_pytorch.py --bs 1 --platform V100 --overhead_test --unroll 2>&1 | tee ${LOG_DIR}/rae.unroll.nvprof.log
+python3 rae_pytorch.py --bs 1 --platform V100 --overhead_test --fix 2>&1 | tee ${LOG_DIR}/rae.fix.log
+nvprof --profile-from-start off python3 rae_pytorch.py --bs 1 --platform V100 --overhead_test --fix 2>&1 | tee ${LOG_DIR}/rae.fix.nvprof.log
+cd ..
+cd ..
diff --git a/artifacts/Figure18/run_sys.sh b/artifacts/Figure18/run_sys.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+LOG_DIR=${ARTIFACT_ROOT}/reproduce_results/Figure18/sys
+mkdir -p ${LOG_DIR}
+cd ${ARTIFACT_ROOT}/models
+
+python3 rae-unroll.py --platform V100 --bs 1 --no-torch --measure 2>&1 | tee $LOG_DIR/rae.unroll.log
+nvprof --profile-from-start off python3 rae-unroll.py --measure 2>&1 | tee $LOG_DIR/rae.unroll.nvprof.log
+python3 rae.py --platform V100 --bs 1 --no-torch --overhead_test --measure 2>&1 | tee $LOG_DIR/rae.fix.log
+nvprof --profile-from-start off python3 rae.py --platform V100 --bs 1 --no-torch --measure 2>&1 | tee $LOG_DIR/rae.fix.nvprof.log
diff --git a/artifacts/Figure19/README.md b/artifacts/Figure19/README.md
@@ -0,0 +1,14 @@
+Please run the following commands: (NOTE: please do not run them simultaneously)
+```bash
+# in tf-ae docker 
+ssh root@impreza0 -p 31702
+cd Figure19 && ./run_tf.sh # about 10 min
+logout
+# in jax-ae docker
+ssh root@impreza0 -p 31703
+cd Figure19 && ./run_jax.sh # about 10 min
+logout
+# in cocktailer-ae docker
+ssh root@impreza0 -p 31705
+cd Figure19 && ./run_in_sys_docker.sh # about 1 hour
+```
diff --git a/artifacts/Figure19/run_base.sh b/artifacts/Figure19/run_base.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+LOG_DIR=${ARTIFACT_ROOT}/reproduce_results/Figure19/base
+mkdir -p ${LOG_DIR}
+cd ${ARTIFACT_ROOT}/models
+
+# build modified models
+DIR_MAP=(
+"manual_seq2seq/bs1.rocm/seq2seq_bs1_0-forward/nnfusion_rt/rocm_codegen:seq2seq/Constant.bs1.rocm.0"
+"manual_attention/bs1.rocm/attention_bs1_0-forward/nnfusion_rt/rocm_codegen:attention/Constant.bs1.rocm.0"
+"manual_attention/bs1.rocm/attention_bs1_1-forward/nnfusion_rt/rocm_codegen:attention/Constant.bs1.rocm.1"
+)
+
+for pair in ${DIR_MAP[@]}; do
+    IFS=':' read -ra ADDR <<< "$pair"
+    workdir=${ADDR[0]}
+    datadir=${ADDR[1]}
+    if [ ! -r $workdir/main_test ]; then
+        echo "preparing $workdir"
+        cd $workdir
+        cp -r ${ARTIFACT_ROOT}/data/$datadir Constant
+        cmake . && make -j
+        cd ${ARTIFACT_ROOT}/models
+    fi
+done
+
+# run tests
+python3 lstm.py --platform MI100 --bs 1 --no-torch --disable-cf --measure 2>&1 | tee $LOG_DIR/lstm.b1.log
+python3 nasrnn.py --platform MI100 --bs 1 --no-torch --disable-cf --measure 2>&1 | tee $LOG_DIR/nasrnn.b1.log
+cd manual_attention/bs1.rocm && python3 run.py 2>&1 | tee $LOG_DIR/attention.b1.log && cd ../..
+cd manual_seq2seq/bs1.rocm && python3 run.py 2>&1 | tee $LOG_DIR/seq2seq.b1.log && cd ../..
+python3 blockdrop.py --platform MI100 --bs 1 --no-torch --disable-cf --measure 2>&1 | tee $LOG_DIR/blockdrop.b1.log
+python3 skipnet.py --platform MI100 --bs 1 --no-torch --disable-cf --measure 2>&1 | tee $LOG_DIR/skipnet.b1.log
+python3 rae.py --platform MI100 --bs 1 --no-torch --disable-cf --measure 2>&1 | tee $LOG_DIR/rae.b1.log
diff --git a/artifacts/Figure19/run_in_sys_docker.sh b/artifacts/Figure19/run_in_sys_docker.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+
+./run_pytorch.sh
+./run_base.sh
+./run_sys.sh
diff --git a/artifacts/Figure19/run_jax.sh b/artifacts/Figure19/run_jax.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+LOG_DIR=${ARTIFACT_ROOT}/reproduce_results/Figure19/jax
+mkdir -p ${LOG_DIR}
+cd ${ARTIFACT_ROOT}/baseline
+cd lstm
+python3 lstm_jax.py --platform MI100 --bs 1 2>&1 | tee ${LOG_DIR}/lstm.b1.log
+cd ..
+cd nasrnn
+python3 nas_jax.py --platform MI100 --bs 1 2>&1 | tee ${LOG_DIR}/nasrnn.b1.log
+cd ..
+cd attention
+python3 attention_jax.py --platform MI100 --bs 1 2>&1 | tee ${LOG_DIR}/attention.b1.log
+cd ..
+cd seq2seq
+python3 seq2seq_jax.py --platform MI100 --bs 1 2>&1 | tee ${LOG_DIR}/seq2seq.b1.log
+cd ..
+cd blockdrop
+python3 blockdrop_jax.py --platform MI100 --bs 1 --rand_weight 2>&1 | tee ${LOG_DIR}/blockdrop.b1.log
+cd ..
+cd skipnet
+python3 skipnet_jax.py --platform MI100 --bs 1 --rand_weight 2>&1 | tee ${LOG_DIR}/skipnet.b1.log
+cd ..
+cd rae
+python3 rae_jax.py --platform MI100 --bs 1 2>&1 | tee ${LOG_DIR}/rae.b1.log
+cd ..
diff --git a/artifacts/Figure19/run_pytorch.sh b/artifacts/Figure19/run_pytorch.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+LOG_DIR=${ARTIFACT_ROOT}/reproduce_results/Figure19/pytorch
+mkdir -p ${LOG_DIR}
+cd ${ARTIFACT_ROOT}/baseline
+cd lstm
+python3 lstm_pytorch.py --platform MI100 --bs 1 2>&1 | tee ${LOG_DIR}/lstm.b1.log
+cd ..
+cd nasrnn
+python3 nas_pytorch.py --platform MI100 --bs 1 2>&1 | tee ${LOG_DIR}/nasrnn.b1.log
+cd ..
+cd attention
+python3 attention_pytorch.py --platform MI100 --bs 1 2>&1 | tee ${LOG_DIR}/attention.b1.log
+cd ..
+cd seq2seq
+python3 seq2seq_pytorch.py --platform MI100 --bs 1 2>&1 | tee ${LOG_DIR}/seq2seq.b1.log
+cd ..
+cd blockdrop
+python3 blockdrop_pytorch.py --platform MI100 --bs 1 2>&1 | tee ${LOG_DIR}/blockdrop.b1.log
+cd ..
+cd skipnet
+python3 skipnet_pytorch.py --platform MI100 --bs 1 2>&1 | tee ${LOG_DIR}/skipnet.b1.log
+cd ..
+cd rae
+python3 rae_pytorch.py --platform MI100 --bs 1 2>&1 | tee ${LOG_DIR}/rae.b1.log
+cd ..
diff --git a/artifacts/Figure19/run_sys.sh b/artifacts/Figure19/run_sys.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+LOG_DIR=${ARTIFACT_ROOT}/reproduce_results/Figure19/sys
+mkdir -p ${LOG_DIR}
+cd ${ARTIFACT_ROOT}/models
+python3 lstm.py --platform MI100 --bs 1 --no-torch --measure 2>&1 | tee $LOG_DIR/lstm.b1.log
+python3 nasrnn.py --platform MI100 --bs 1 --no-torch --measure 2>&1 | tee $LOG_DIR/nasrnn.b1.log
+python3 attention.py --platform MI100 --bs 1 --no-torch --measure 2>&1 | tee $LOG_DIR/attention.b1.log
+python3 seq2seq.py --platform MI100 --bs 1 --no-torch --measure 2>&1 | tee $LOG_DIR/seq2seq.b1.log
+python3 blockdrop.py --platform MI100 --bs 1 --no-torch --measure 2>&1 | tee $LOG_DIR/blockdrop.b1.log
+python3 skipnet.py --platform MI100 --bs 1 --no-torch --measure 2>&1 | tee $LOG_DIR/skipnet.b1.log
+python3 rae.py --platform MI100 --bs 1 --no-torch --measure 2>&1 | tee $LOG_DIR/rae.b1.log
diff --git a/artifacts/Figure19/run_tf.sh b/artifacts/Figure19/run_tf.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+
+LOG_DIR=${ARTIFACT_ROOT}/reproduce_results/Figure19/tf
+mkdir -p ${LOG_DIR}
+cd ${ARTIFACT_ROOT}/baseline
+cd lstm
+python3 lstm_tf.py --platform MI100 --bs 1 2>&1 | tee ${LOG_DIR}/lstm.b1.log
+cd ..
+cd nasrnn
+python3 nas_tf.py --platform MI100 --bs 1 2>&1 | tee ${LOG_DIR}/nasrnn.b1.log
+cd ..
+cd attention
+cp ${ARTIFACT_ROOT}/data/attention/attention.b1.tfgraph attention.b1.tfgraph
+python3 attention_tf.py --platform MI100 --bs 1 2>&1 | tee ${LOG_DIR}/attention.b1.log
+cd ..
+cd seq2seq
+cp ${ARTIFACT_ROOT}/data/seq2seq/seq2seq.b1.tfgraph seq2seq.b1.tfgraph
+python3 seq2seq_tf.py --platform MI100 --bs 1 2>&1 | tee ${LOG_DIR}/seq2seq.b1.log
+cd ..
+cd blockdrop
+mkdir -p onnx
+cp ${ARTIFACT_ROOT}/data/blockdrop/blockdrop.b1.tfgraph blockdrop.b1.tfgraph
+python3 blockdrop_tf.py --platform MI100 --bs 1 2>&1 | tee ${LOG_DIR}/blockdrop.b1.log
+cd ..
+cd skipnet
+cp ${ARTIFACT_ROOT}/data/skipnet/skipnet.b1.tfgraph skipnet.b1.tfgraph
+python3 skipnet_tf.py --platform MI100 --bs 1 2>&1 | tee ${LOG_DIR}/skipnet.b1.log
+cd ..
diff --git a/artifacts/Figure20/run.sh b/artifacts/Figure20/run.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+
+source ~/miniconda3/etc/profile.d/conda.sh
+conda activate controlflow
+
+cd ${ARTIFACT_ROOT}/models
+
+mkdir -p ${ARTIFACT_ROOT}/reproduce_results/Figure20
+cp -r ${ARTIFACT_ROOT}/reproduce_results/Figure14/base ${ARTIFACT_ROOT}/reproduce_results/Figure20
+cp -r ${ARTIFACT_ROOT}/reproduce_results/Figure14/sys ${ARTIFACT_ROOT}/reproduce_results/Figure20
+
+LOG_DIR=${ARTIFACT_ROOT}/reproduce_results/Figure20/schedule
+mkdir -p ${LOG_DIR}
+python3 lstm.py --platform V100 --bs 1 --no-torch --measure --enable-breakdown 2>&1 | tee $LOG_DIR/lstm.b1.log
+cp ${ARTIFACT_ROOT}/reproduce_results/Figure14/sys/nasrnn.b1.log ${LOG_DIR}
+python3 attention.py --platform V100 --bs 1 --no-torch --measure --enable-breakdown 2>&1 | tee $LOG_DIR/attention.b1.log
+cp ${ARTIFACT_ROOT}/reproduce_results/Figure14/sys/seq2seq.b1.log ${LOG_DIR}
+python3 blockdrop.py --platform V100 --bs 1 --no-torch --measure --enable-breakdown 2>&1 | tee $LOG_DIR/blockdrop.b1.log
+python3 skipnet.py --platform V100 --bs 1 --no-torch --measure --enable-breakdown 2>&1 | tee $LOG_DIR/skipnet.b1.log
+python3 rae.py --platform V100 --bs 1 --no-torch --measure --enable-breakdown --opt=1 2>&1 | tee $LOG_DIR/rae.opt1.b1.log
+python3 rae.py --platform V100 --bs 1 --no-torch --measure --enable-breakdown --opt=2 2>&1 | tee $LOG_DIR/rae.opt2.b1.log
+python3 rae.py --platform V100 --bs 1 --no-torch --measure --enable-breakdown --opt=3 2>&1 | tee $LOG_DIR/rae.opt3.b1.log
+python3 rae.py --platform V100 --bs 1 --no-torch --measure --enable-breakdown --opt=4 2>&1 | tee $LOG_DIR/rae.opt4.b1.log
+
+conda deactivate
diff --git a/artifacts/INSTALL.md b/artifacts/INSTALL.md
@@ -0,0 +1,70 @@
+# Installation Tutorial
+This document describes how to install the software used in the artifact on a node with NVIDIA GPU. All scripts are assumed to be run from `nnfusion/artifacts` directory.
+
+## Prerequirements
+We assume that you have a node with NVIDIA GPU and CUDA installed. We also assume that you have installed conda and nvcc. If you have not installed conda, you can install it by following the instructions [here](https://docs.conda.io/projects/conda/en/latest/user-guide/install/linux.html) (Miniconda is enough, and this artifact assumes that miniconda is installed at the default path `~/miniconda3`). If you have not installed nvcc, you can install it by following the instructions [here](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html).
+
+## TensorFlow
+The onnx-tf for TF 1.15 needs to be built from source because the pre-compiled version depends on TF2. We also fix some bugs in that commit to properly support the control flow operations. The following commands will prepare the conda env for TF 1.15.
+
+```bash
+conda create python=3.8 --name baseline_tf1 -y
+conda activate baseline_tf1
+pip install nvidia-pyindex
+pip install -r env/requirements_tf.txt
+mkdir -p third-party && cd third-party
+git clone https://github.com/onnx/onnx-tensorflow.git
+cd onnx-tensorflow
+git checkout 0e4f4836 # v1.7.0-tf-1.15m
+git apply ../../env/onnx_tf.patch
+pip install -e .
+conda deactivate
+```
+## JAX
+The following commands will prepare the conda env for JAX.
+```bash
+conda create python=3.8 --name baseline_jax -y
+conda activate baseline_jax
+pip install nvidia-pyindex
+pip install -r env/requirements_jax.txt -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html -f https://download.pytorch.org/whl/torch_stable.html
+conda deactivate
+```
+
+## TVM
+The following commands will prepare the conda env for TVM.
+```bash
+conda create python==3.8 --name kerneldb -y
+conda activate kerneldb
+pip install ply==3.11
+mkdir -p third-party && cd third-party
+git clone https://github.com/apache/tvm.git
+cd tvm
+git checkout 22ba6523c
+git submodule init && git submodule update
+git apply ../../env/tvm.patch # from roller
+mkdir build
+cd build
+cp ../cmake/config.cmake config.cmake
+sed -i "s/USE_CUDA OFF/USE_CUDA ON/g" config.cmake && sed -i "s/USE_LLVM OFF/USE_LLVM ON/g" config.cmake
+cmake .. && make -j
+cd ../python
+pip install -e .
+```
+
+## NNFusion
+The following commands will build nnfusion. Please use the [script](../maint/script/install_dependency.sh) (needs sudo) to prepare the environment for nnfusion before running the following commands.
+
+```bash
+cd .. # to $YOUR_DIR_FOR_NNFUSION/nnfusion
+mkdir build && cd build && cmake .. && make -j
+```
+
+## Pytorch & Cocktailer
+```bash
+conda create python=3.7 --name controlflow -y
+conda activate controlflow
+pip install nvidia-pyindex
+pip install -r env/requirements_pytorch.txt -f https://download.pytorch.org/whl/torch_stable.html
+pip install -e .
+conda deactivate
+```
diff --git a/artifacts/README.md b/artifacts/README.md
@@ -1,5 +1,133 @@
-# OSDI'20 Artifacts Evaluation
+# OSDI'23 Cocktailer Artifacts Evaluation
 
-OSDI'20 Artifact Evaluation of paper #292, titled "[Rammer: Enabling Holistic Deep Learning Compiler Optimizations with rTasks](https://www.usenix.org/conference/osdi20/presentation/ma)". 
+## 0. Overview
+This code branch is used for OSDI'23 Artifact Evaluation of paper #628, titled "Cocktailer: Analysis and Optimization for Dynamic Control Flow in Deep Learning".
 
-**Please refer to the [osdi20_artifact branch](https://github.com/microsoft/nnfusion/tree/osdi20_artifact/artifacts)**
+### Evaluation Setup
+* Artifacts Available:
+    * All Cocktailer related code are available under NNFusion open-source project located in: [https://github.com/microsoft/nnfusion/tree/cocktailer_artifact](https://github.com/microsoft/nnfusion/tree/cocktailer_artifact)
+* Artifacts Functional:
+    * *Documentation*: the following of documents include detailed guidelines on how to build, install, test Cocktailer and the experiments to compare with other baselines.
+    * *Completeness*: the [C++ part](..) of Cocktailer has been merged into NNFusion in this branch, and the [Python part](ast_analyzer) is available in this artifact.
+    * *Exercisability*: under the *artifacts* folder, we prepare all the script and data to reproduce the experiements in individual folders named by the figure name in paper.
+* Results Reproduced:
+    * To reproduce the main results presented in our paper, we provide Docker images containing all the environments and baseline software, and machines with the same configurations as we used in paper evaluation. We also provide detailed guideline to help reproduce the results step by step. 
+
+## 1. Environment Preparation
+
+**For AE Reviewers**:
+Please follow the instructions in "Comments for AEC" on HotCRP and skip this section if you want to use the provided environment. The following steps need docker permission which is not provided due to security concerns.
+
+## NVIDIA GPU
+Please follow the instructions in [INSTALL.md](INSTALL.md) or use the following docker-based script to build and install Cocktailer.
+```bash
+cd $YOUR_DIR_FOR_NNFUSION
+git clone https://github.com/microsoft/nnfusion.git --branch cocktailer_artifact --single-branch
+cd nnfusion/artifacts
+docker build -t cocktailer -f env/Dockerfile.nv .
+chmod 777 $YOUR_DIR_FOR_NNFUSION/nnfusion
+docker run -it --gpus all --name cocktailer-ae -v $YOUR_DIR_FOR_NNFUSION/nnfusion:/root/nnfusion --shm-size="32g" -w /root/nnfusion/artifacts cocktailer:latest /bin/bash
+# run inside docker
+bash ./env/install_in_docker.sh
+```
+
+## AMD GPU
+Please prepare four dockers for running JAX, TensorFlow, TVM, PyTorch \& Cocktailer respectively.
+* download code
+    ```bash
+    cd $YOUR_DIR_FOR_NNFUSION
+    git clone https://github.com/microsoft/nnfusion.git --branch cocktailer_artifact --single-branch
+    ```
+* Build and run jax docker (the result image is `jax-rocm:latest`)
+    ```bash
+    cd $YOUR_DIR_FOR_NNFUSION/nnfusion/artifacts
+    mkdir third-party && cd third-party
+    git clone https://github.com/google/jax.git
+    cd jax
+    git checkout 0282b4bfad
+    git apply ../../env/jax.rocm.patch
+    ./build/rocm/ci_build.sh --keep_image bash -c "./build/rocm/build_rocm.sh"
+    docker run -it --device=/dev/kfd --device=/dev/dri --name jax-ae -v $YOUR_DIR_FOR_NNFUSION/nnfusion:/root/nnfusion -w /root/nnfusion/artifacts -e ARTIFACT_ROOT=/root/nnfusion/artifacts jax-rocm:latest /bin/bash
+    ```
+* Pull and run TensorFlow docker
+    ```bash
+    docker pull rocm/tensorflow:rocm4.3.1-tf1.15-dev
+    docker run -it --device=/dev/kfd --device=/dev/dri --name tf-ae -v $YOUR_DIR_FOR_NNFUSION/nnfusion:/root/nnfusion -w /root/nnfusion/artifacts -e ARTIFACT_ROOT=/root/nnfusion/artifacts rocm/tensorflow:rocm4.3.1-tf1.15-dev /bin/bash
+    ```
+* Build and run TVM docker
+    ```bash
+    mkdir $YOUR_DIR_FOR_NNFUSION/kernel_db
+    cd $YOUR_DIR_FOR_NNFUSION/nnfusion/artifacts
+    docker build -t tvm_rocm_cuda:latest -f env/Dockerfile.tvm.rocm --network=host .
+    docker run -it --device=/dev/kfd --device=/dev/dri --name tvm-ae -v $YOUR_DIR_FOR_NNFUSION/kernel_db:/root/.cache/nnfusion -v $YOUR_DIR_FOR_NNFUSION/nnfusion:/root/nnfusion -w /root/nnfusion/artifacts -e ARTIFACT_ROOT=/root/nnfusion/artifacts tvm_rocm_cuda /bin/bash
+    ```
+* Build and run cocktailer docker
+    ```bash
+    cd $YOUR_DIR_FOR_NNFUSION/nnfusion/artifacts
+    docker build -t cocktailer:latest -f env/Dockerfile.rocm --network=host .
+    docker run -it --device=/dev/kfd --device=/dev/dri --name cocktailer-ae -v $YOUR_DIR_FOR_NNFUSION/kernel_db:/root/.cache/nnfusion -v $YOUR_DIR_FOR_NNFUSION/nnfusion:/root/nnfusion -w /root/nnfusion/artifacts -e ARTIFACT_ROOT=/root/nnfusion/artifacts cocktailer /bin/bash
+    # run inside docker
+    bash ./env/install_in_rocm_docker.sh
+    ```
+
+## 2. Getting Started with a Simple Example
+
+* Go to the *get_started_tutorial/* folder and follow [README_GET_STARTED.md](get_started_tutorial/README_GET_STARTED.md).
+
+
+## 3. Data and Kernel Preparation
+* Download the input data and model weights from [zenodo](https://doi.org/10.5281/zenodo.7856472), unzip them and put them under the nnfusion/artifacts directory. The tree structure should be like:
+    **For AE Reviewers**: This step has been done on the provided nodes.
+    ```
+    nnfusion
+    ├── artifacts
+    │   ├── data
+    │   │   ├── attention
+    │   │   ├── blockdrop
+    │   │   ├── lstm
+    │   │   ├── seq2seq
+    │   │   ├── skipnet
+    │   │   ├── sst
+    │   │   └── tatoeba-eng-fra
+    ```
+
+* Generates all kernels for Cocktailer. More details can be found in [README_KERNEL_DB.md](kernel_db/README_KERNEL_DB.md).
+    **NOTE**: this process will take about 20 minutes for each architecture if using the tuning result in the artifact, or longer if you want to re-tune the kernels.
+    * NVIDIA GPU
+        ```bash
+        # assume running at nnfusion/artifacts directory
+        cd kernel_db
+        srun -p AE -w nico1 --pty --exclusive ./reproduce_kernel_db.sh
+        srun -p AE -w nico1 --pty bash -c "mkdir -p /tmp/`whoami` && rsync -avz nico0:~/.cache/nnfusion/* /tmp/`whoami`/"
+        ```
+    * AMD GPU
+        ```bash
+        # assume running at nnfusion/artifacts directory of tvm docker
+        cd kernel_db
+        ./reproduce_rocm_kernel_db.sh
+        ```
+
+## 4. Reproducing Individual Experiement Results
+**NOTE**: we provide a script named "run_nv_gpu.sh" to run the experiments except Figure19. You can go to `nnfusion/artifacts` directory and use `./run_nv_gpu.sh` to run the experiments. For Figure19, please follow the README.md in the `Figure19` directory.
+
+**For AE Reviewers**: Please use `srun -p AE -w nico1 --pty --exclusive ./run_nv_gpu.sh ` to submit the jobs to the compute node of the NVIDIA GPU cluster and follow the README.md in the `Figure19` directory to reproduce Figure19.
+
+| Experiments   | Figure # in Paper |  Script Location |
+| -----------     | -----------  |  ----------- |
+| #1. Control flow overhead in JAX | Figure 2 | N/A (use the results in Figure 15, 16, and 18) |
+| #2. End-to-end DNN inference on NVIDIA V100 GPU | Figure 14 | [run.sh](Figure14/run.sh) |
+| #3. Control flow overhead of models with loops | Figure 15 | [run.sh](Figure15/run.sh) |
+| #4. Control flow overhead of models with branches | Figure 16 | [run.sh](Figure16/run.sh) |
+| #5. Different ratio of executed layers | Figure 17 | [run.sh](Figure17/run.sh) |
+| #6. Control flow overhead of RAE with recursion | Figure 18 | [run.sh](Figure18/run.sh) |
+| #7. End-to-end DNN inference on ROCm MI100 GPU with BS=1 | Figure 19 | [README.md](Figure19/README.md) |
+| #8. Breakdown of models with BS=1 | Figure 20 | [run.sh](Figure20/run.sh)|
+
+## 5. Reproduce the Figures in the paper
+Copy the ROCM results to the NVIDIA GPU node and draw figures on the NVIDIA GPU node
+
+```bash
+cd $YOUR_DIR_FOR_NNFUSION/nnfusion/artifacts
+scp -P 31705 -r root@impreza0:~/nnfusion/artifacts/reproduce_results/Figure19 reproduce_results 
+cd plot && ./plot_all.sh && cd - 
+```
diff --git a/artifacts/ast_analyzer/__init__.py b/artifacts/ast_analyzer/__init__.py
@@ -0,0 +1 @@
+from .workflow import test_torch_eval, workflow_fix_flag, workflow_search_flag
diff --git a/artifacts/ast_analyzer/grad/__init__.py b/artifacts/ast_analyzer/grad/__init__.py
diff --git a/artifacts/ast_analyzer/grad/annotate.py b/artifacts/ast_analyzer/grad/annotate.py
@@ -0,0 +1,322 @@
+from ast_analyzer.grad import ast_utils
+import astunparse
+from . import annotations as anno
+from . import cfg
+import gast
+from . import transformers
+from ast import AST, iter_fields
+import json
+
+class ResolveCalls(gast.NodeVisitor):
+    """Annotate Call nodes with the function being called."""
+
+    def __init__(self, func):
+        self.func = func
+
+    def visit_FunctionDef(self, node):
+        for field, value in iter_fields(node):
+            if field == 'returns':
+                continue
+            if isinstance(value, list):
+                for item in value:
+                    if isinstance(item, AST):
+                        self.visit(item)
+            elif isinstance(value, AST):
+                self.visit(value)
+        anno.setanno(node, 'func', self.func)
+
+    def visit_Call(self, node):
+        self.generic_visit(node)
+        anno.setanno(node, 'func', node._func_inst)
+
+    def visit_arguments(self, node):
+        # ignore type annotation
+        return
+
+
+def mark_shape(node, type_dict):
+    for n, t in type_dict.items():
+        if anno.hasanno(n, 'type'):
+            anno.delanno(n, 'type')
+        anno.setanno(n, 'type', t)
+
+
+def resolve_calls(node, func):
+    """Put the function instance into annotation.
+    Different from tangent because the function instance has been infered in "shape_inference"
+    Args:
+        node: An AST node
+        func: The function whose calls are being resolved.
+    """
+    ResolveCalls(func).visit(node)
+
+
+def get_anno_line(node):
+    if not isinstance(node, gast.Expr) or not isinstance(node.value, gast.Constant):
+        return None
+    value = node.value.value
+    if not isinstance(value, str):
+        return None
+    if not value.startswith('ANNO'):
+        return None
+    return value
+
+
+class ResolveAnno(gast.NodeTransformer):
+    def anno_FunctionDef(self, node):
+        hint = get_anno_line(node.body[0])
+        if hint is not None and hint.startswith("ANNO_FUNC:"):
+            anno.setanno(node, 'hint', json.loads(hint[11:]))
+            node.body = node.body[1:]
+
+    def visit(self, node):
+        if isinstance(node, gast.FunctionDef):
+            self.anno_FunctionDef(node)
+        for field, value in iter_fields(node):
+            if isinstance(value, list):
+                last_hint = None
+                if field in ['body', 'orelse', 'finalbody']:
+                    for item in value:
+                        if isinstance(item, AST):
+                            if last_hint is not None:
+                                anno.setanno(item, 'hint', last_hint)
+                                last_hint = None
+                            last_hint = get_anno_line(item)
+                            is_hint_stmt = False
+                            if last_hint is not None:
+                                if not last_hint.startswith('ANNO:'):
+                                    last_hint = None
+                                else:
+                                    last_hint = json.loads(last_hint[6:])
+                                    is_hint_stmt = True
+                    new_body = []
+                    for item in value:
+                        if isinstance(item, AST):
+                            hint = get_anno_line(item)
+                            if hint is None or not hint.startswith('ANNO:'):
+                                new_body.append(item)
+                        else:
+                            new_body.append(item)
+                    setattr(node, field, new_body)
+                for item in value:
+                    if isinstance(item, AST):
+                        self.visit(item)
+            elif isinstance(value, AST):
+                self.visit(value)
+        return node
+
+
+def resolve_anno(node):
+    ResolveAnno().visit(node)
+
+
+def unused(node, include_arg=False):
+    """Find unused definitions that can be remove.
+
+    This runs reaching definitions analysis followed by a walk over the AST to
+    find all variable definitions that are not used later on.
+
+    Args:
+      node: The AST of e.g. a function body to find unused variable definitions.
+
+    Returns:
+      unused: After visiting all the nodes, this attribute contanis a set of
+          definitions in the form of `(variable_name, node)` pairs which are
+          unused in this AST.
+    """
+    cfg.backward(node, cfg.BackwardActive())
+    unused = set()
+    for sub in gast.walk(node):
+        if isinstance(sub, gast.Assign):
+            defs = ast_utils.get_updated(sub)
+            active_in = anno.getanno(sub, 'bwd_active_in')
+            used = False
+            for d in defs:
+                if d in active_in:
+                    used = True
+            if not used:
+                unused.add(sub)
+        if isinstance(sub, gast.arguments) and include_arg:
+            active_in = anno.getanno(sub, 'bwd_active_in')
+            for arg in sub.args:
+                if arg.id not in active_in:
+                    unused.add(arg)
+    return unused
+
+
+class ZeroFolding(transformers.TreeTransformer):
+    def __init__(self):
+        super(ZeroFolding, self).__init__()
+
+    def visit_Assign(self, node):
+        if isinstance(node.value, gast.BinOp):
+            left = node.value.left
+            right = node.value.right
+            if isinstance(left, gast.Name) and left.id in anno.getanno(node, 'zero_tensor_in'):
+                node.value = right
+            elif isinstance(right, gast.Name) and right.id in anno.getanno(node, 'zero_tensor_in'):
+                node.value = left
+
+        return node
+
+
+def zero_fold(node):
+    cfg.forward(node, cfg.ZeroTensor())
+    ZeroFolding().visit(node)
+
+
+class GatherDefUse(gast.NodeVisitor):
+    def __init__(self) -> None:
+        super(GatherDefUse, self).__init__()
+        self.result_def = {}
+        self.result_use = {}
+
+    def visit(self, node):
+        self.generic_visit(node)
+        result_def, result_use = cfg.get_def_use(node)
+        for _, value in iter_fields(node):
+            if isinstance(value, list):
+                for item in value:
+                    if isinstance(item, AST):
+                        result_def.update(self.result_def[item])
+                        result_use.update(self.result_use[item])
+            elif isinstance(value, AST):
+                result_def.update(self.result_def[value])
+                result_use.update(self.result_use[value])
+        self.result_def[node] = result_def
+        self.result_use[node] = result_use
+
+
+def block_live(node):
+    cfg.backward(node, cfg.BackwardActive())
+    gather_def_use = GatherDefUse()
+    gather_def_use.visit(node)
+
+    ret = {}
+    for sub in gast.walk(node):
+        if isinstance(sub, (gast.For, gast.If)):
+            active_in = anno.getanno(sub, 'bwd_active_in')
+            active_out = anno.getanno(sub, 'bwd_active_out')
+            defs = gather_def_use.result_def[sub]
+            blockLives = defs.intersection(active_out)
+            ret[sub] = blockLives
+
+    return ret
+
+
+# class GenASTInfo(TwoNodeVisitor):
+#     def __init__(self, gethered_defs):
+#         self.ast_info = {}
+#         self.gathered_defs = gethered_defs
+
+#     def register_node(self, node1, node2):
+#         active_out = anno.getanno(node2, 'bwd_active_out')
+#         defs = self.gathered_defs[node2]
+#         print("[gen_info]", node1, active_out, defs)
+#         blockLives = defs.intersection(active_out)
+#         self.ast_info[node1] = blockLives
+
+#     def visit_If(self, node1, node2):
+#         self.generic_visit(node1, node2)
+#         self.register_node(node1, node2)
+
+#     def visit_For(self, node1, node2):
+#         self.generic_visit(node1, node2)
+#         self.register_node(node1, node2)
+
+def get_last_nodes(node):
+    if isinstance(node, gast.While):
+        return [node.test]
+    if not isinstance(node, gast.If):
+        return [node]
+    if len(node.body) == 0:
+        return get_last_nodes(node.orelse[-1])
+    elif len(node.orelse) == 0:
+        return get_last_nodes(node.body[-1])
+    else:
+        return get_last_nodes(node.body[-1]) + get_last_nodes(node.orelse[-1])
+
+
+def get_arg_ret(stmts, cfg_nodes, white_list): # white_list is a set
+    if len(stmts) == 0:
+        return set(), set()
+    assert not isinstance(stmts[-1], gast.Return)
+    if isinstance(stmts[0], gast.FunctionDef):
+        print("[warning] get_ret_arg for functiondef is not implemented")
+        return set(), set()
+    inner_cfg_nodes = set()
+    for stmt in stmts:
+        for sub_stmt in gast.walk(stmt):
+            if sub_stmt in cfg_nodes:
+                inner_cfg_nodes.add(cfg_nodes[sub_stmt])
+    if isinstance(stmts[0], (gast.If, gast.While)):
+        args = anno.getanno(stmts[0].test, 'bwd_active_out')
+    else:
+        args = anno.getanno(stmts[0], 'bwd_active_out')
+
+    last_nodes = get_last_nodes(stmts[-1])
+
+    rets = set()
+
+    for last_node in last_nodes:
+        cfg_node = cfg_nodes[last_node]
+        for prev in cfg_node.next:
+            if prev not in inner_cfg_nodes:
+                if prev.value is not None:
+                    live_out = anno.getanno(prev.value, 'bwd_active_out')
+                    rets.update(live_out)
+            else:
+                print("[skip]", astunparse.unparse(prev.value))
+
+    defuse = GatherDefUse()
+    defs = set()
+    uses = set()
+    for stmt in stmts:
+        defuse.visit(stmt)
+        defs.update(defuse.result_def[stmt])
+        uses.update(defuse.result_use[stmt])
+    # print("[args]", args)
+    # print("[rets]", rets)
+    # print("[defs]", defs)
+    # print("[uses]", uses)
+    args = args.intersection(uses) - white_list
+    rets = rets.intersection(defs) - white_list
+    # print("[args-real]", args)
+    # print("[rets-real]", rets)
+    return args, rets
+
+
+# def get_live_large_node(stmts, args, rets):
+#     raise NotImplementedError()
+#     copied_stmts = copy.deepcopy(stmts)
+#     cfg.backward_block(copied_stmts, cfg.BackwardActive(), args, rets)
+#     gathered_defs = {}
+#     for c_stmt in copied_stmts:
+#         g = GatherDefUse()
+#         g.visit(c_stmt)
+#         gathered_defs.update(g.result_def)
+#     gen = GenASTInfo(gathered_defs)
+#     for stmt, c_stmt in zip(stmts, copied_stmts):
+#         gen.visit(stmt, c_stmt)
+#     print("[ast_info]", gen.ast_info)
+#     return gen.ast_info
+
+def get_live_large_node(stmts, cfg_nodes, white_list):
+    ast_info = {}
+    for stmt in stmts:
+        for sub_stmt in gast.walk(stmt):
+            if isinstance(sub_stmt, gast.For):
+                args, rets = get_arg_ret(sub_stmt.body, cfg_nodes, white_list)
+                ast_info[sub_stmt] = rets - white_list
+                # print("[ast_info]", ast_info[sub_stmt], args, rets)
+            elif isinstance(sub_stmt, gast.While):
+                args, rets = get_arg_ret(sub_stmt.body, cfg_nodes, white_list)
+                ast_info[sub_stmt] = rets - white_list
+            elif isinstance(sub_stmt, gast.If):
+                args_body, rets_body = get_arg_ret(sub_stmt.body, cfg_nodes, white_list)
+                args_orelse, rets_orelse = get_arg_ret(sub_stmt.orelse, cfg_nodes, white_list)
+                rets = rets_body.union(rets_orelse) - white_list
+                ast_info[sub_stmt] = rets
+                # print("[ast_info]", ast_info[sub_stmt], args_body, args_orelse, rets)
+
+    return ast_info
diff --git a/artifacts/ast_analyzer/grad/annotations.py b/artifacts/ast_analyzer/grad/annotations.py
@@ -0,0 +1,79 @@
+"""Handling annotations on AST nodes."""
+from __future__ import absolute_import
+
+import gast
+import copy
+
+ANNOTATION_FIELD = '_grad_anno'
+# These annotation's won't be cleared between passes
+FIXED_ANNOTATIONS = set(['pop', 'push', 'add_grad', 'init_grad', 'pri', 'adj',
+                         'push_func', 'pop_func', 'adjoint_var',
+                         'temp_adjoint_var', 'temp_var', 'pri_call',
+                         'adj_call', 'comment', 'pre_anf',
+                         'store_stmt', 'restore_stmt', 'cache_ret', 'cache_arg',
+                         'cache_target_ret', 'cache_target_arg', 'related_nodes', 'save_var', 'type', 'origin_ret', 'attr_name', 'manual', 'can_push', 'may_push'])
+
+
+class Annotation:
+    def __init__(self):
+        self.annos = {}
+
+    def __deepcopy__(self, memo):
+        dpcpy = self.__class__()
+        memo[id(self)] = dpcpy
+        new_anno = Annotation()
+        for s in self.annos:
+            new_anno.annos[s] = self.annos[s]
+        return new_anno
+
+    def __str__(self):
+        return "[Annotation]" + str(self.annos)
+
+    def __repr__(self):
+        return "[Annotation]" + str(self.annos)
+
+
+def setanno(node, key, value, safe=True):
+    annotations = getattr(node, ANNOTATION_FIELD, Annotation())
+    setattr(node, ANNOTATION_FIELD, annotations)
+    if safe and hasanno(node, key):
+        raise ValueError('annotation already present:', key, gast.dump(node))
+    annotations.annos[key] = value
+
+    # So that the annotations survive gast_to_ast() and ast_to_gast()
+    if ANNOTATION_FIELD not in node._fields:
+        node._fields += (ANNOTATION_FIELD,)
+
+
+def hasanno(node, key):
+    annotations = getattr(node, ANNOTATION_FIELD, Annotation())
+    return key in annotations.annos
+
+
+def setdefaultanno(node, key, value=None):
+    if not hasanno(node, key):
+        setanno(node, key, value)
+    return getanno(node, key)
+
+
+def clearanno(node):
+    for succ in gast.walk(node):
+        if hasattr(succ, ANNOTATION_FIELD):
+            new = Annotation()
+            for anno in FIXED_ANNOTATIONS:
+                if hasanno(succ, anno):
+                    new.annos[anno] = getanno(succ, anno)
+            setattr(succ, ANNOTATION_FIELD, new)
+    return node
+
+
+def getanno(node, key, default=None):
+    annotations = getattr(node, ANNOTATION_FIELD, Annotation())
+    if key not in annotations.annos and default is None:
+        raise KeyError('Node "%s" has no annotation "%s"' % (gast.dump(node), key))
+    return annotations.annos.get(key, default)
+
+
+def delanno(node, key):
+    annotations = getattr(node, ANNOTATION_FIELD, Annotation())
+    del annotations.annos[key]
diff --git a/artifacts/ast_analyzer/grad/ast_utils.py b/artifacts/ast_analyzer/grad/ast_utils.py
@@ -0,0 +1,255 @@
+"""Utilities to manipulate the AST and its annotations."""
+from __future__ import absolute_import
+import copy
+
+import gast
+
+from . import annotations as anno
+from . import quoting
+from . import template
+
+from ast_analyzer.shape_inference.shape_elem import unwrap_shape
+from ast_analyzer.shape_inference.types import *
+
+
+def get_name(node):
+    """Get the name of a variable.
+
+    Args:
+        node: A `Name`, `Subscript` or `Attribute` node.
+
+    Returns:
+        The name of the variable e.g. `'x'` for `x`, `x.i` and `x[i]`.
+    """
+    if isinstance(node, gast.Name):
+        return node.id
+    elif isinstance(node, (gast.Subscript, gast.Attribute)):
+        return get_name(node.value)
+    else:
+        raise TypeError
+
+
+def _get_target(node):
+    if isinstance(node, (gast.Name, gast.Subscript, gast.Attribute)):
+        return set([get_name(node)])
+    elif isinstance(node, (gast.Tuple, gast.List)):
+        return set.union(*(_get_target(target)
+                           for target in node.elts))
+    elif isinstance(node, (gast.Constant,)):
+        return set()
+    else:
+        print(gast.dump(node))
+        raise ValueError
+
+
+def get_updated(node):
+    """Return the variable names created or mutated by this statement.
+
+    This function considers assign statements, augmented assign statements, and
+    the targets of for loops, as well as function arguments.
+
+    For example, `x[0] = 2` will return `x`, `x, y = 3, 4` will return `x` and
+    `y`, `for i in range(x)` will return `i`, etc.
+
+    Args:
+        node: An AST node
+
+    Returns:
+        A set of variable names (strings) of all the variables created or mutated.
+    """
+    if isinstance(node, gast.Assign):
+        return set.union(*(_get_target(target)
+                           for target in node.targets))
+    elif isinstance(node, (gast.For, gast.AugAssign)):
+        return _get_target(node.target)
+    elif isinstance(node, gast.arguments):
+        targets = set(arg.id for arg in node.args + node.kwonlyargs)
+        if node.vararg:
+            targets.add(node.vararg.id)
+        if node.kwarg:
+            targets.add(node.kwarg.id)
+        return targets
+    else:
+        return set()
+
+
+def copy_node(node):
+    """Copy a node but keep its annotations intact."""
+    if not isinstance(node, gast.AST):
+        return [copy_node(n) for n in node]
+
+    # The shape inference result cannot be deepcopied
+
+    grad_anno = getattr(node, anno.ANNOTATION_FIELD, anno.Annotation())
+    if hasattr(node, anno.ANNOTATION_FIELD):
+        delattr(node, anno.ANNOTATION_FIELD)
+
+    new_node = copy.deepcopy(node)
+
+    setattr(node, anno.ANNOTATION_FIELD, grad_anno)
+    setattr(new_node, anno.ANNOTATION_FIELD, copy.copy(grad_anno))
+    return new_node
+
+
+class ArgAppend(gast.NodeTransformer):
+    """Append arguments to a function definition."""
+
+    def __init__(self, node_list):
+        self.visited = False
+        self.node_list = node_list
+
+    def visit_FunctionDef(self, node):
+        if not self.visited:
+            node.args.args.extend(self.node_list)
+            self.visited = True
+        return node
+
+
+def append_args(node, node_list):
+    if not isinstance(node_list, list):
+        raise TypeError('Please pass in a list')
+    if all([isinstance(n, str) for n in node_list]):
+        node_list = [quoting.quote(n) for n in node_list]
+    return ArgAppend(node_list).visit(node)
+
+
+def is_insert_grad_of_statement(node):
+    """Check whether a context manager calls `insert_grad_of`.
+
+    Args:
+        node: The context manager node.
+
+    Returns:
+        Whether or not this node contains `insert_grad_of` calls.
+
+    Raises:
+        ValueError: If the `insert_grad_of` calls are mixed with other calls.
+    """
+    tangent_calls = [anno.getanno(item.context_expr, 'func', None)
+                     is utils.insert_grad_of for item in node.items]
+    if all(tangent_calls):
+        return True
+    elif any(tangent_calls):
+        raise ValueError
+    else:
+        return False
+
+
+def is_attr_of(node, inst):
+    return isinstance(node, gast.Attribute) and isinstance(node.value, gast.Name) and node.value.id == inst
+
+
+class LoopLevel:
+    def __init__(self):
+        self.levels_fwd = []
+        self.levels_bwd = []
+        self.bounds = []
+        self.is_fixed = True
+
+    def depth(self):
+        return len(self.bounds)
+
+    def get_forward(self, d):
+        return self.levels_fwd[d]
+
+    def get_backward(self, d):
+        return self.levels_bwd[d]
+
+    def add_level(self, target_fwd_node, target_bwd_node, bound_node):
+        self.levels_fwd.append(target_fwd_node)
+        self.levels_bwd.append(target_bwd_node)
+        self.bounds.append(bound_node)
+
+    def del_level(self):
+        self.levels_fwd.pop()
+        self.levels_bwd.pop()
+        self.bounds.pop()
+
+    def tensor_of_type(self, ty, init="empty", device=None):
+        if isinstance(ty, TyNum):
+            shape = self.bounds
+            if ty.is_int():
+                ty_torch = quoting.quote('torch.int64')
+            else:
+                raise NotImplementedError
+        elif isinstance(ty, TyTensor):
+            if not ty.is_fixed_shape():
+                raise NotImplementedError
+            else:
+                ts_shape = list(unwrap_shape(ty.shape))
+                ts_shape = [quoting.quote(str(s)) for s in ts_shape]
+                shape = self.bounds + ts_shape
+            ty_torch = quoting.quote(np_dtype_to_torch_string(ty.dtype))
+        else:
+            print(ty)
+            raise NotImplementedError
+
+        if len(shape) == 0:
+            return template.replace(
+                # use 'repr' to format cuda to "cuda" (with quotes)
+                "torch.{}((), dtype=ty, device={})".format(init, repr(device)),
+                ty=ty_torch
+            )
+        elif len(shape) == 1:
+            return template.replace(
+                "torch.{}(d1, dtype=ty, device={})".format(init, repr(device)),
+                d1=shape[0],
+                ty=ty_torch
+            )
+        elif len(shape) == 2:
+            return template.replace(
+                "torch.{}(d1, d2, dtype=ty, device={})".format(
+                    init, repr(device)),
+                d1=shape[0],
+                d2=shape[1],
+                ty=ty_torch
+            )
+        elif len(shape) == 3:
+            return template.replace(
+                "torch.{}(d1, d2, d3, dtype=ty, device={})".format(
+                    init, repr(device)),
+                d1=shape[0],
+                d2=shape[1],
+                d3=shape[2],
+                ty=ty_torch
+            )
+        elif len(shape) == 4:
+            return template.replace(
+                "torch.{}(d1, d2, d3, d4, dtype=ty, device={})".format(
+                    init, repr(device)),
+                d1=shape[0],
+                d2=shape[1],
+                d3=shape[2],
+                d4=shape[3],
+                ty=ty_torch
+            )
+        else:
+            raise NotImplementedError
+
+
+def tensor_of_type(ty, init="empty", device=None):
+    return LoopLevel().tensor_of_type(ty, init, device=device)
+
+
+def generate_zero_ast(var, ty, device):
+    ty = ty.deref()
+
+    if isinstance(ty, TyNum):
+        if ty.is_int():
+            return gast.Constant(value=0, kind=None)
+        elif ty.is_float():
+            return gast.Constant(value=0.0, kind=None)
+
+    if isinstance(ty, TyTensor):
+        if ty.is_fixed_shape():
+            return tensor_of_type(ty, "zeros", device=device).value
+        if var is not None:
+            return template.replace("torch.zeros_like(param, device=dev)", param=var, dev=device).value
+
+    if isinstance(ty, TyTuple):
+        if ty.is_fixed_len:
+            elts = [generate_zero_ast(None, t, device) for t in ty.get_tys()]
+            return gast.List(elts=elts, ctx=gast.Load())  # TODO: use tuple
+
+    raise ValueError("generate_zero: type not understood: " +
+                     str(ty) + "(" + str(type(ty)) + ")")
diff --git a/artifacts/ast_analyzer/grad/cfg.py b/artifacts/ast_analyzer/grad/cfg.py
diff --git a/artifacts/ast_analyzer/grad/create.py b/artifacts/ast_analyzer/grad/create.py
@@ -0,0 +1,128 @@
+# Copyright 2017 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#      Unless required by applicable law or agreed to in writing, software
+#      distributed under the License is distributed on an "AS IS" BASIS,
+#      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#      See the License for the specific language governing permissions and
+#      limitations under the License.
+"""Helper functions to create gradient nodes from other nodes."""
+from __future__ import absolute_import
+from __future__ import division
+
+import gast
+
+from . import annotations as anno
+from . import utils
+
+
+def create_grad(node, namer, tangent=False):
+    """Given a variable, create a variable for the gradient.
+
+    Args:
+        node: A node to create a gradient for, can be a normal variable (`x`) or a
+                subscript (`x[i]`).
+        namer: The namer object which will determine the name to use for the
+                gradient.
+        tangent: Whether a tangent (instead of adjoint) is created.
+
+    Returns:
+        node: A node representing the gradient with the correct name e.g. the
+                gradient of `x[i]` is `dx[i]`.
+
+                Note that this returns an invalid node, with the `ctx` attribute
+                missing. It is assumed that this attribute is filled in later.
+
+                Node has an `adjoint_var` annotation referring to the node it is an
+                adjoint of.
+    """
+    if not (isinstance(node, (gast.Subscript, gast.Name, gast.Tuple)) or utils.is_constant_str(node)):
+        raise TypeError
+
+    if anno.hasanno(node, 'temp_var'):
+        return create_grad(anno.getanno(node, 'temp_var'), namer, tangent)
+
+    def _name_grad(node):
+        if not isinstance(node, gast.Name):
+            raise TypeError
+        varname = node.id
+        name = namer.grad(varname, tangent)
+        grad_node = gast.Name(
+                id=name, ctx=None, annotation=None, type_comment=None)
+        anno.setanno(grad_node, 'adjoint_var', node)
+        return grad_node
+    if isinstance(node, gast.Subscript):
+        grad_node = create_grad(node.value, namer, tangent=tangent)
+        grad_node.ctx = gast.Load()
+        return gast.Subscript(value=grad_node, slice=node.slice, ctx=None)
+    elif utils.is_constant_str(node):
+        grad_node = create_grad(
+                gast.Name(id=node.s, ctx=None, annotation=None, type_comment=None), namer, tangent=tangent)
+        return gast.Constant(value=grad_node.id, Kind=None)
+    elif isinstance(node, gast.Tuple):
+        elts = [create_grad(nd, namer, tangent) for nd in node.elts]
+        return gast.Tuple(elts=elts, ctx=None)
+    else:
+        return _name_grad(node)
+
+
+def create_temp_grad(node, namer, tangent=False):
+    """Create a variable to store partial gradients.
+
+    Args:
+        node: See `create_grad`.
+        namer: See `create_grad`.
+        tangent: See `create_grad`.
+
+    Returns:
+        node: See `create_grad`. Returns a node representing the partial gradient.
+                Note that this is always a simple variable e.g. the temporary partial
+                of `x[i]` can be something like `_dxi`.
+
+                Nodes are given an annotation `temp_adjoint_var`.
+    """
+    if not isinstance(node, (gast.Subscript, gast.Name)):
+        raise TypeError
+
+    def _name_temp_grad(node):
+        name = namer.temp_grad(node.id, tangent)
+        temp_node = gast.Name(id=name, annotation=None, ctx=None, type_comment=None)
+        return temp_node
+    if isinstance(node, gast.Subscript):
+        temp_node = _name_temp_grad(node.value)
+    else:
+        temp_node = _name_temp_grad(node)
+    anno.setanno(temp_node, 'temp_adjoint_var', node)
+    return temp_node
+
+
+def create_temp(node, namer):
+    """Create a temporary variable.
+
+    Args:
+        node: Create a temporary variable to store this variable in.
+        namer: A naming object that guarantees the names are unique.
+
+    Returns:
+        node: See `create_grad`. Returns a temporary variable, which is always a
+                simple variable annotated with `temp_var`.
+    """
+    if isinstance(node, gast.Name):
+        name = node.id
+    elif isinstance(node, (gast.Attribute, gast.Subscript)):
+        name = node.value.id
+    elif isinstance(node, gast.Tuple) and all([isinstance(nd, gast.Name) for nd in node.elts]):
+        name = ""
+        for nd in node.elts:
+            name += f"{nd.id}_"
+        name = name[:-1]
+    else:
+        raise TypeError
+    temp_node = gast.Name(id=namer.temp(name), annotation=None, ctx=None, type_comment=None)
+    anno.setanno(temp_node, 'temp_var', node)
+    return temp_node
diff --git a/artifacts/ast_analyzer/grad/grammar.py b/artifacts/ast_analyzer/grad/grammar.py
@@ -0,0 +1,54 @@
+"""Classifications of AST nodes."""
+from __future__ import absolute_import
+import gast
+
+# LITERALS are representated by gast.Constant
+
+CONTROL_FLOW = (gast.For, gast.AsyncFor, gast.While, gast.If, gast.Try,
+                gast.Break, gast.Continue)
+
+COMPOUND_STATEMENTS = (
+    gast.FunctionDef,
+    gast.ClassDef,
+    gast.For,
+    gast.While,
+    gast.If,
+    gast.With,
+    gast.Try,
+    gast.AsyncFunctionDef,
+    gast.AsyncFor,
+    gast.AsyncWith
+)
+
+SIMPLE_STATEMENTS = (
+    gast.Return,
+    gast.Delete,
+    gast.Assign,
+    gast.AugAssign,
+    gast.Raise,
+    gast.Assert,
+    gast.Import,
+    gast.ImportFrom,
+    gast.Global,
+    gast.Nonlocal,
+    gast.Expr,
+    gast.Pass,
+    gast.Break,
+    gast.Continue
+)
+
+STATEMENTS = COMPOUND_STATEMENTS + SIMPLE_STATEMENTS
+
+BLOCKS = (
+    (gast.Module, 'body'),
+    (gast.FunctionDef, 'body'),
+    (gast.AsyncFunctionDef, 'body'),
+    (gast.For, 'body'),
+    (gast.For, 'orelse'),
+    (gast.AsyncFor, 'body'),
+    (gast.AsyncFor, 'orelse'),
+    (gast.While, 'body'),
+    (gast.While, 'orelse'),
+    (gast.If, 'body'),
+    (gast.If, 'orelse'),
+)
diff --git a/artifacts/ast_analyzer/grad/naming.py b/artifacts/ast_analyzer/grad/naming.py
@@ -0,0 +1,291 @@
+"""Tools for naming conventions."""
+from __future__ import absolute_import
+import random
+import re
+import types
+
+import gast
+import six
+
+PRIMAL_NAME = 'pri_{}{}'
+ADJOINT_NAME = '_d{}d{}'
+FORWARD_NAME = '_f{}f{}'
+JOINT_NAME = 'd{}d{}'
+STACK_NAME = '_stack'
+SUBSTACK_NAME = '_substack'
+
+
+def primal_name(func, wrt):
+    """Name for the primal of a function."""
+    if not isinstance(func, (types.FunctionType, types.MethodType)):
+        raise TypeError(func)
+    varnames = six.get_function_code(func).co_varnames
+    return PRIMAL_NAME.format(func.__name__, ''.join(varnames[i] for i in wrt))
+
+
+def _adjoint_name(func, wrt, template):
+    if not isinstance(func, (types.FunctionType, types.MethodType)):
+        raise TypeError(func)
+    varnames = six.get_function_code(func).co_varnames
+    return template.format(func.__name__, ''.join(varnames[i] for i in wrt))
+
+
+def joint_name(func, wrt):
+    """Name for a function in joint mode."""
+    return _adjoint_name(func, wrt, JOINT_NAME)
+
+
+def adjoint_name(func, wrt):
+    """Name for the adjoint of a function."""
+    return _adjoint_name(func, wrt, ADJOINT_NAME)
+
+
+def forward_name(func, wrt):
+    """Name for a function in forward mode."""
+    return _adjoint_name(func, wrt, FORWARD_NAME)
+
+
+class Names(gast.NodeVisitor):
+
+    def __init__(self):
+        self.names = set()
+
+    def visit_Name(self, node):
+        if isinstance(node.ctx, (gast.Store, gast.Param)):
+            self.names.add(node.id)
+
+
+def get_names(node):
+    """Find the arguments and variables assigned to in a certain node."""
+    names = Names()
+    names.visit(node)
+    return names.names
+
+
+def uniqify(func):
+    """Make sure that a method returns a unique name."""
+    @six.wraps(func)
+    def unique(self, *args, **kwargs):
+        return self.unique(func(self, *args, **kwargs))
+    return unique
+
+
+def uniqify_once(func):
+    """Make sure that a method returns a unique name."""
+    @six.wraps(func)
+    def unique_once(self, *args, **kwargs):
+        return self.unique_once(func(self, *args, **kwargs))
+    return unique_once
+
+
+class Namer(object):
+    """Generate human-readable names for AST nodes.
+
+    Given an AST node, this class tries to produce a sensible variable name
+    that it could be subtituted with.
+
+    In principle, it will try to construct sensible names from the operands and
+    operator e.g. `x + y` becomes `x_plus_y`. However, the length of these
+    variable names can quickly explode. In that case, we try to back off to using
+    the left hand side of the statement if possible e.g. in `z = f(x + y)` the
+    expression `x + y` could be named `_z`.
+
+    In case the LHS is not available (because it wasn't given by the calling
+    code) or if the LHS name is too long, we fall back to assigning random
+    variable names.
+
+    Some methods (such as `grad`) will return the same name when called with the
+    same inputs.
+
+    Attributes:
+        names: A set of variable names that cannot be used. Allowed to be changed.
+        target: The node that is on the LHS of the current statement. Is `None` by
+                default. Should be set by the calling code.
+    """
+    # Naming convention from 'Evaluating Derivatives', b is rev, d is fwd
+    ADJOINT_VAR = 'b{}'
+    MIDDLE_VAR = 'd{}'
+    TEMP_VAR = '_{}'
+    TEMP_ADJOINT_VAR = '_b{}'
+    TEMP_MIDDLE_VAR = '_d{}'
+    TEMP_CACHE_VAR = '_c{}'
+
+    MAX_LENGTH = 15
+
+    def __init__(self):
+        self.names = set()
+        self.name_mappings = dict()
+        # The targets field of the LHS whenever a node inside an assign statement
+        # is being named
+        self.target = None
+
+    @classmethod
+    def build(cls, node):
+        """Construct a namer object for a given function scope."""
+        if not isinstance(node, gast.FunctionDef):
+            raise ValueError
+        namer = cls()
+        namer.names.update(get_names(node))
+        return namer
+
+    def valid(self, name):
+        """Ensure a variable name is valid.
+
+        Note: Assumes variable names are ASCII, which isn't necessarily true in
+        Python 3.
+
+        Args:
+            name: A proposed variable name.
+
+        Returns:
+            A valid version of the name.
+        """
+        name = re.sub('[^0-9a-zA-Z_]', '', name)
+        if re.match('[0-9]', name):
+            name = '_' + name
+        return name
+
+    def trim(self, name):
+        """When the name is too long, use the LHS or a random string instead."""
+        if len(name) > self.MAX_LENGTH and self.target:
+            name = self.TEMP_VAR.format(self._name(self.target))
+        if len(name) > self.MAX_LENGTH:
+            while True:
+                name = '_{:04x}'.format(random.randint(0, 16 ** 4 - 1))
+                if name not in self.names:
+                    break
+        return name
+
+    def unique(self, name):
+        """Make a variable name unique by appending a number if needed."""
+        # Make sure the name is valid
+        name = self.valid(name)
+        # Make sure it's not too long
+        name = self.trim(name)
+        # Now make sure it's unique
+        unique_name = name
+        i = 2
+        while unique_name in self.names:
+            unique_name = name + str(i)
+            i += 1
+        self.names.add(unique_name)
+        return unique_name
+
+    def unique_once(self, name):
+        if name not in self.name_mappings:
+            unique_name = self.unique(name)
+            self.name_mappings[name] = unique_name
+        return self.name_mappings[name]
+
+    def __getattr__(self, attr):
+        """Access unwrapped versions of methods.
+
+        Methods are wrapped with `uniqify` to return a unique version of a
+        name. Internally the class however might want to use the original
+        version of these methods. This method makes those accessible by using a
+        leading underscore.
+        """
+        if attr.startswith('_') and hasattr(self, attr[1:]):
+            return getattr(self, attr[1:]).__wrapped__.__get__(self, Namer)
+        raise AttributeError
+
+    @uniqify
+    def name(self, node):
+        namer = getattr(self, 'name_' + node.__class__.__name__)
+        return namer(node)
+
+    @uniqify
+    def counter(self):
+        return 'i'
+
+    @uniqify_once
+    def grad(self, name, mode=False):
+        if mode:
+            var_template = self.MIDDLE_VAR
+        else:
+            var_template = self.ADJOINT_VAR
+        return var_template.format(name)
+
+    @uniqify
+    def temp_grad(self, name, mode=False):
+        if mode:
+            var_template = self.TEMP_MIDDLE_VAR
+        else:
+            var_template = self.TEMP_ADJOINT_VAR
+        return var_template.format(name)
+
+    @uniqify_once
+    def temp(self, name):
+        return self.TEMP_VAR.format(name)
+
+    @uniqify
+    def cache_fwd(self, name):
+        return self.TEMP_CACHE_VAR.format(name)
+
+    @uniqify
+    def cond(self):
+        return 'cond'
+
+    def name_Name(self, node):
+        return node.id
+
+    def name_Return(self, node):
+        return 'return'
+
+    def name_Tuple(self, node):
+        return 't'
+
+    def name_List(self, node):
+        return 'l'
+
+    def name_Call(self, node):
+        if len(node.args) <= 2:
+            return (self._name(node.func) + '_' +
+                    '_'.join(self._name(arg) for arg in node.args))
+        else:
+            return self._name(node.func)
+
+    def name_Attribute(self, node):
+        return self._name(node.value) + '_' + node.attr
+
+    def name_Subscript(self, node):
+        return self._name(node.value) + '_' + self._name(node.slice)
+
+    def name_Index(self, node):
+        return self._name(node.value)
+
+    def name_Slice(self, node):
+        return ''.join(self._name(i) if i else ''
+                       for i in (node.lower, node.upper, node.step))
+
+    def name_ExtSlice(self, node):
+        return '_'.join(self._name(d) for d in node.dims)
+
+    def name_Constant(self, node):
+        return str(node.value)
+
+    BINOP_NAMES = {
+        gast.Add: 'plus',
+        gast.Sub: 'minus',
+        gast.Mult: 'times',
+        gast.Div: 'over',
+        gast.FloorDiv: 'intdiv',
+        gast.Mod: 'modulo',
+        gast.Pow: 'to_the',
+        gast.MatMult: 'times'
+    }
+
+    def name_BinOp(self, node):
+        return '{left}_{op}_{right}'.format(left=self._name(node.left),
+                                            right=self._name(node.right),
+                                            op=self.BINOP_NAMES[type(node.op)])
+
+    UNARYOP_NAMES = {
+        gast.UAdd: 'plus',
+        gast.USub: 'minus',
+        gast.Not: 'not'
+    }
+
+    def name_UnaryOp(self, node):
+        return '{op}_{operand}'.format(op=self.UNARYOP_NAMES[type(node.op)],
+                                       operand=self._name(node.operand))
diff --git a/artifacts/ast_analyzer/grad/quoting.py b/artifacts/ast_analyzer/grad/quoting.py
@@ -0,0 +1,113 @@
+"""Moving between source code and AST."""
+from __future__ import absolute_import
+
+import astor
+import gast
+import inspect
+import textwrap
+
+from . import annotations as anno
+
+
+class GradParseError(SyntaxError):
+    pass
+
+
+class SourceWithCommentGenerator(astor.codegen.SourceGenerator):
+    """Source code generator that outputs comments."""
+
+    def __init__(self, *args, **kwargs):
+        super(SourceWithCommentGenerator, self).__init__(*args, **kwargs)
+        self.new_indentation = True
+
+    def body(self, statements):
+        self.new_indentation = True
+        super(SourceWithCommentGenerator, self).body(statements)
+
+    def visit(self, node, abort=astor.codegen.SourceGenerator.abort_visit):
+        if anno.hasanno(node, 'comment'):
+            comment = anno.getanno(node, 'comment')
+            # Preprocess the comment to fit to maximum line width of 80 characters
+            linewidth = 78
+            if comment['location'] in ('above', 'below'):
+                comment['text'] = comment['text'][:linewidth]
+            n_newlines = 1 if self.new_indentation else 2
+            if comment['location'] == 'above':
+                self.result.append('\n' * n_newlines)
+                self.result.append(self.indent_with * self.indentation)
+                self.result.append('# %s' % comment['text'])
+                super(SourceWithCommentGenerator, self).visit(node)
+            elif comment['location'] == 'below':
+                super(SourceWithCommentGenerator, self).visit(node)
+                self.result.append('\n')
+                self.result.append(self.indent_with * self.indentation)
+                self.result.append('# %s' % comment['text'])
+                self.result.append('\n' * (n_newlines - 1))
+            elif comment['location'] == 'right':
+                super(SourceWithCommentGenerator, self).visit(node)
+                self.result.append(' # %s' % comment['text'])
+            else:
+                raise GradParseError('Only valid comment locations are '
+                                     'above, below, right')
+        else:
+            self.new_indentation = False
+            super(SourceWithCommentGenerator, self).visit(node)
+
+
+def to_source(node, indentation=' ' * 4):
+    """Return source code of a given AST."""
+    if isinstance(node, gast.AST):
+        node = gast.gast_to_ast(node)
+    generator = SourceWithCommentGenerator(indentation, False,
+                                           astor.string_repr.pretty_string)
+    generator.visit(node)
+    generator.result.append('\n')
+    return astor.source_repr.pretty_source(generator.result).lstrip()
+
+
+def parse_function(fn):
+    """Get the source of a function and return its AST."""
+    try:
+        return parse_string(inspect.getsource(fn))
+    except (IOError, OSError) as e:
+        raise ValueError(
+            'Cannot differentiate function: %s. System must be able to access the '
+            'source code of the function. Functions defined in a Python '
+            'interpreter and functions backed by C extension modules do not '
+            'have accessible source code.' % e)
+
+
+def parse_string(src):
+    """Parse a string into an AST."""
+    return gast.parse(textwrap.dedent(src))
+
+
+def quote(src_string, return_expr=False):
+    """Go from source code to AST nodes.
+
+    This function returns a tree without enclosing `Module` or `Expr` nodes.
+
+    Args:
+        src_string: The source code to parse.
+        return_expr: Whether or not to return a containing expression. This can be
+                set to `True` if the result is to be part of a series of statements.
+
+    Returns:
+        An AST of the given source code.
+
+    """
+    node = parse_string(src_string)
+    body = node.body
+    if len(body) == 1:
+        if isinstance(body[0], gast.Expr) and not return_expr:
+            out = body[0].value
+        else:
+            out = body[0]
+    else:
+        out = node
+    return out
+
+
+def unquote(node):
+    """Go from an AST to source code."""
+    return to_source(node).strip()
diff --git a/artifacts/ast_analyzer/grad/template.py b/artifacts/ast_analyzer/grad/template.py
@@ -0,0 +1,191 @@
+"""Helper functions and classes for filling in templates.
+
+Functions can be used as templates. In this case, all the variables to be
+replaced should be function arguments. This allows static analysis to still
+work. For simple templates nodes can be passed as well.
+
+"""
+from __future__ import absolute_import
+
+import types
+import enum
+
+import gast
+import six
+from . import annotations as anno
+from . import ast_utils
+from . import create
+from . import naming
+from . import quoting
+from . import transformers
+from . import utils
+
+
+class ReplaceTransformer(gast.NodeTransformer):
+    """Replace variables with AST nodes.
+
+    The context of the replacements is automatically set to load or store.
+
+    """
+
+    def __init__(self, replacements):
+        self.replacements = replacements
+        self.seen = set()
+        self.is_top = True
+
+    def visit_Expr(self, node):
+        if (isinstance(node.value, gast.Name) and
+                node.value.id in self.replacements):
+            return self.visit(node.value)
+        self.generic_visit(node)
+        return node
+
+    def visit_FunctionDef(self, node):
+        node = self.generic_visit(node)
+        if node.name in self.replacements:
+            node.name = self.replacements[node.name].id
+        return node
+
+    def visit_Name(self, node):
+        if node.id in self.replacements:
+            # NOTE In principle we don't want to copy, because it might break
+            # references held in annotations, but we will copy if we have to to
+            # avoid duplicate nodes
+            if node.id in self.seen:
+                new_nodes = ast_utils.copy_node(self.replacements[node.id])
+            else:
+                self.seen.add(node.id)
+                new_nodes = self.replacements[node.id]
+            if isinstance(new_nodes, gast.AST):
+                new_nodes = [new_nodes]
+            for new_node in new_nodes:
+                anno.setanno(new_node, 'replacement', node, safe=False)
+                if 'ctx' in new_node._fields:
+                    new_node.ctx = node.ctx
+            if len(new_nodes) == 1:
+                new_nodes, = new_nodes
+            return new_nodes
+        else:
+            return node
+
+
+Replace = enum.Enum('Replace', ['NONE', 'PARTIAL', 'FULL', 'TANGENT'])
+
+
+class ReplaceGradTransformer(transformers.TreeTransformer):
+    """Interpret the gradient operator `d[x]` in templates.
+
+    The gradient of a temporary variable is the normal gradient i.e. d[_x] =
+    dx.
+
+    Args:
+        replace_grad: One of the enumerated `Replace` values. If `PARTIAL` then
+                `d[x]` will be transformed into the gradient `bx` when read, but
+                transformed into a temporary variable (e.g. `_bx`) when written to.
+                This ensures that the gradient `bx` doesn't get overwritten if it
+                already exists. If the mode is `FULL` then `d[x]` becomes the gradient
+                `bx` everywhere. `TANGENT` functions as `FULL` but creates the tangent
+                instead of the adjoint i.e. `dx`.
+        namer: A `Namer` object which decides on the names to give to the
+                gradients. This guarantess temporaries receiving unique names.
+        tangent: Whether to create tangents or adjoints i.e. whether we are in
+                reverse or forward mode.
+    """
+
+    def __init__(self, replace_grad, namer=None, tangent=False):
+        self.replace_grad = replace_grad
+        if namer is None:
+            namer = naming.Namer()
+        self.namer = namer
+
+        self.tangent = tangent
+        super(ReplaceGradTransformer, self).__init__()
+
+    def visit_Subscript(self, node):
+        if (isinstance(node.value, gast.Name) or utils.is_constant_num(node.value)) and node.value.id == 'd':
+            if (not isinstance(node.slice, gast.Index) or
+                    not (isinstance(node.slice.value,
+                                   (gast.Subscript, gast.Name)) or utils.is_constant_str(node.value))):
+                # This happens when the gradient of a constant is taken
+                if self.replace_grad == Replace.TANGENT:
+                    new_node = gast.Constant(value = 0, kind = None)
+                else:
+                    new_node = gast.Name(id='_', ctx=None, annotation=None)
+                    self.remove(new_node)
+            elif (self.replace_grad in (Replace.FULL, Replace.TANGENT) or
+                  isinstance(node.ctx, gast.Load)):
+                new_node = create.create_grad(node.slice.value, self.namer,
+                                              self.tangent)
+            elif isinstance(node.ctx, gast.Store):
+                new_node = create.create_temp_grad(node.slice.value, self.namer,
+                                                   self.tangent)
+            else:
+                raise ValueError
+            new_node.ctx = node.ctx
+            if isinstance(new_node, gast.Tuple):
+                for elt in new_node.elts:
+                    elt.ctx = node.ctx
+            node = new_node
+        return node
+
+
+def replace(template, replace_grad=Replace.PARTIAL,
+            namer=None, **replacements):
+    """Replace placeholders in a Python template (quote).
+
+    Args:
+        template: A function, AST node or string to be used as a template. Note
+                that if a function is passed, any placeholder is expected to also be a
+                function argument. If a string is passed, it must represent valid
+                Python code, and any variable it references is a placeholder.
+        replace_grad: If Replace.NONE, statements of the form `d[x]` are ignored.
+                For the other possible values, see `ReplaceGradTransformer`.
+        namer: See `ReplaceGradTransformer`.
+        **replacements: A mapping from placeholder names to (lists of) AST nodes
+                that these placeholders will be replaced by. If a string is passed,
+                `quote` will be called on it to turn it into a node.
+
+    Returns:
+        body: An AST node or list of AST nodes with the replacements made. If the
+                template was a function, a list will be returned. If the template was a
+                node, the same node will be returned. If the template was a string, an
+                AST node will be returned (a `Module` node in the case of a multi-line
+                string, an `Expr` node otherwise).
+
+    Raises:
+        ValueError: If a function is used as a template and an incorrect set of
+                replacements was passed.
+    """
+    # Handle the 3 different types of templates: funcs, nodes, and strings
+    is_function = isinstance(template, types.FunctionType)
+    if is_function:
+        tree = quoting.parse_function(template).body[0]
+        placeholders = set(arg.id for arg in tree.args.args)
+        tree.args.args = []
+        if tree.args.vararg:
+            placeholders.add(tree.args.vararg)
+            tree.args.vararg = None
+        if set(replacements.keys()) != placeholders:
+            raise ValueError('too many or few replacements')
+    elif isinstance(template, gast.AST):
+        tree = template
+    else:
+        tree = quoting.quote(template, return_expr=True)
+    # If the replacements are strings, turn them into nodes
+    for k, v in replacements.items():
+        if isinstance(v, six.string_types):
+            replacements[k] = quoting.quote(v)
+    # Perform the replacement
+    ReplaceTransformer(replacements).visit(tree)
+    # Handle the d[x] operator
+    if replace_grad is not Replace.NONE:
+        rgt = ReplaceGradTransformer(
+            replace_grad=replace_grad,
+            namer=namer,
+            tangent=replace_grad is Replace.TANGENT)
+        rgt.visit(tree)
+    # Return the AST node with replacements made
+    if is_function:
+        return tree.body
+    else:
+        return tree
diff --git a/artifacts/ast_analyzer/grad/transformers.py b/artifacts/ast_analyzer/grad/transformers.py
@@ -0,0 +1,277 @@
+"""AST visiting and transformation patterns."""
+
+from __future__ import absolute_import
+
+from collections import deque
+from copy import copy
+
+import gast
+from . import annotations as anno
+from . import grammar
+
+
+class TreeTransformer(gast.NodeTransformer):
+    """A transformer that allows for non-local changes.
+
+    An extension of the standard `NodeTransformer` in Python's `ast` package.
+    This transformer can insert statements right before or after the current
+    statement, at the end or beginning of the current block, or at the top of the
+    function.
+
+    This class is meant to be subclassed in the same way as Python's
+    `NodeTransformer` class. The subclasses can then call the `append`,
+    `prepend`, etc. methods as appropriate to transform the AST.
+
+    Note that nodes that are appended or prepended using the `append` and
+    `prepend` methods will be visited by the transformer. This means that they
+    can recursively append or prepend statements of their own. This doesn't hold
+    for statements that are appended/prepended to the block or function body;
+    these inserted statements are not visited after being inserted.
+
+    To see which nodes classify as statements or which node fields classify as
+    blocks, please see `grammar.py`.
+
+    Attributes:
+        to_remove: After the initial pass, this contains a set of nodes that will
+                be removed. A second pass is automatically performed using the `Remove`
+                transformer to actually remove those nodes.
+
+    """
+
+    def __init__(self):
+        self.to_insert = []
+        self.to_prepend = []
+        self.to_append = []
+        self.to_prepend_block = []
+        self.to_append_block = []
+        self.to_insert_top = deque()
+        self.to_remove = set()
+        self._top = True
+
+    def prepend(self, node):
+        """Prepend a statement to the current statement.
+
+        Note that multiple calls to prepend will result in the last statement to be
+        prepended to end up at the top.
+
+        Args:
+            node: The statement to prepend.
+
+        Raises:
+            ValueError: If the given node is not a statement.
+
+        """
+        if not isinstance(node, grammar.STATEMENTS):
+            raise ValueError
+        self.to_prepend[-1].appendleft(node)
+
+    def append(self, node):
+        """Append a statement to the current statement.
+
+        Note that multiple calls to append will result in the last statement to be
+        appended to end up at the bottom.
+
+        Args:
+            node: The statement to append.
+
+        Raises:
+            ValueError: If the given node is not a statement.
+
+        """
+        if not isinstance(node, grammar.STATEMENTS):
+            raise ValueError
+        self.to_append[-1].append(node)
+
+    def remove(self, node):
+        """Remove the given node."""
+        self.to_remove.add(node)
+
+    def insert_top(self, node):
+        """Insert statements at the top of the function body.
+
+        Note that multiple calls to `insert_top` will result in the statements
+        being prepended in that order; this is different behavior from `prepend`.
+
+        Args:
+            node: The statement to prepend.
+
+        Raises:
+            ValueError: If the given node is not a statement.
+
+        """
+        if not isinstance(node, grammar.STATEMENTS):
+            raise ValueError
+        self.to_insert_top.append(node)
+
+    def insert_top_last(self, node):
+        if not isinstance(node, grammar.STATEMENTS):
+            raise ValueError
+        self.to_insert_top.appendleft(node)
+
+    def prepend_block(self, node, reverse=False):
+        """Prepend a statement to the current block.
+
+        Args:
+            node: The statement to prepend.
+            reverse: When called multiple times, this flag determines whether the
+                    statement should be prepended or appended to the already inserted
+                    statements.
+
+        Raises:
+            ValueError: If the given node is not a statement.
+
+        """
+        if not isinstance(node, grammar.STATEMENTS):
+            raise ValueError
+        if reverse:
+            self.to_prepend_block[-1].appendleft(node)
+        else:
+            self.to_prepend_block[-1].append(node)
+
+    def append_block(self, node, reverse=False):
+        """Append a statement to the current block.
+
+        Args:
+            node: The statement to prepend.
+            reverse: When called multiple times, this flag determines whether the
+                    statement should be prepended or appended to the already inserted
+                    statements.
+
+        Raises:
+            ValueError: If the given node is not a statement.
+
+        """
+        if not isinstance(node, grammar.STATEMENTS):
+            raise ValueError
+        if reverse:
+            self.to_append_block[-1].appendleft(node)
+        else:
+            self.to_append_block[-1].append(node)
+
+    def visit_statements(self, nodes):
+        """Visit a series of nodes in a node body.
+
+        This function is factored out so that it can be called recursively on
+        statements that are appended or prepended. This allows e.g. a nested
+        expression to prepend a statement, and that statement can prepend a
+        statement again, etc.
+
+        Args:
+            nodes: A list of statements.
+
+        Returns:
+            A list of transformed statements.
+        """
+        for node in nodes:
+            if isinstance(node, gast.AST):
+                self.to_prepend.append(deque())
+                self.to_append.append(deque())
+                node = self.visit(node)
+                self.visit_statements(self.to_prepend.pop())
+                if isinstance(node, gast.AST):
+                    self.to_insert[-1].append(node)
+                elif node:
+                    self.to_insert[-1].extend(node)
+                self.visit_statements(self.to_append.pop())
+            else:
+                self.to_insert[-1].append(node)
+        return self.to_insert[-1]
+
+    def generic_visit(self, node):
+        is_top = False
+        if self._top:
+            is_top = True
+            self._top = False
+        for field, old_value in gast.iter_fields(node):
+            if isinstance(old_value, list):
+                if (type(node), field) in grammar.BLOCKS:
+                    self.to_prepend_block.append(deque())
+                    self.to_append_block.append(deque())
+                    self.to_insert.append(deque())
+                    new_values = copy(self.visit_statements(old_value))
+                    self.to_insert.pop()
+                else:
+                    new_values = []
+                    for value in old_value:
+                        if isinstance(value, gast.AST):
+                            value = self.visit(value)
+                            if value is None:
+                                continue
+                            elif not isinstance(value, gast.AST):
+                                new_values.extend(value)
+                                continue
+                        new_values.append(value)
+                if isinstance(node, gast.FunctionDef) and field == 'body':
+                    new_values.extendleft(self.to_insert_top)
+                    self.to_insert_top = deque([])
+                if (type(node), field) in grammar.BLOCKS:
+                    new_values.extendleft(self.to_prepend_block.pop())
+                    return_ = None
+                    if new_values and isinstance(new_values[-1], gast.Return):
+                        return_ = new_values.pop()
+                    new_values.extend(self.to_append_block.pop())
+                    if return_:
+                        new_values.append(return_)
+                old_value[:] = new_values
+            elif isinstance(old_value, gast.AST):
+                new_node = self.visit(old_value)
+                if new_node is None:
+                    delattr(node, field)
+                else:
+                    setattr(node, field, new_node)
+        if is_top and self.to_remove:
+            Remove(self.to_remove).visit(node)
+        return node
+
+
+class Remove(gast.NodeTransformer):
+    """Remove statements containing given nodes.
+
+    If an entire block was deleted, it will delete the relevant conditional or
+    loop entirely. Note that deleting an entire function body will result in an
+    invalid AST.
+
+    Calls to user functions that were generated by Tangent will not be removed
+    because this might result in incorrect writing and reading from the tape.
+
+    Args:
+        to_remove: A set of nodes that need to be removed. Note that the entire
+        statement containing this node will be removed e.g. `y = f(x)` with `x`
+        being in `to_remove` will result in the entire statement being removed.
+
+    """
+
+    def __init__(self, to_remove):
+        self.to_remove = to_remove
+        self.remove = False
+        self.is_call = False
+
+    def visit(self, node):
+        if node in self.to_remove:
+            self.remove = True
+        if anno.hasanno(node, 'pri_call') or anno.hasanno(node, 'adj_call'):
+            # We don't remove function calls for now; removing them also
+            # removes the push statements inside of them, but not the
+            # corresponding pop statements
+            self.is_call = True
+        new_node = super(Remove, self).visit(node)
+        if isinstance(node, gast.Return) and isinstance(node.value, gast.Tuple):
+            node.value.elts = list(filter(lambda x: x not in self.to_remove, node.value.elts))
+            self.remove = self.is_call = False
+        elif isinstance(node, gast.arguments):
+            node.args = list(filter(lambda x: x not in self.to_remove, node.args))
+            self.remove = self.is_call = False
+        elif isinstance(node, grammar.STATEMENTS):
+            if self.remove and not self.is_call:
+                new_node = None
+            self.remove = self.is_call = False
+        if isinstance(node, gast.If) and not node.body:
+            # If we optimized away an entire if block, we need to handle that
+            if not node.orelse:
+                return
+            else:
+                node.test = gast.UnaryOp(op=gast.Not(), operand=node.test)
+                node.body, node.orelse = node.orelse, node.body
+        elif isinstance(node, (gast.While, gast.For)) and not node.body:
+            return node.orelse
+        return new_node
diff --git a/artifacts/ast_analyzer/grad/utils.py b/artifacts/ast_analyzer/grad/utils.py
@@ -0,0 +1,16 @@
+import gast
+
+from . import quoting
+from . import annotations as anno
+
+
+def is_constant_num(node):
+    return isinstance(node, gast.Constant) and isinstance(node.value, (int, float, complex))
+
+
+def is_constant_str(node):
+    return isinstance(node, gast.Constant) and isinstance(node.value, str)
+
+
+def is_basic_node(node):
+    return isinstance(node, (gast.Constant, gast.Name))
diff --git a/artifacts/ast_analyzer/python_std/__init__.py b/artifacts/ast_analyzer/python_std/__init__.py
@@ -0,0 +1,2 @@
+from ast_analyzer.python_std.run_py_passes import *
+
diff --git a/artifacts/ast_analyzer/python_std/analyses/__init__.py b/artifacts/ast_analyzer/python_std/analyses/__init__.py
@@ -0,0 +1,14 @@
+"""The analyses submodule contains all the analyses passes offered in Pythran.
+
+This file is just for convenience and turns the import from
+
+import analyses.foo.Foo
+
+into
+
+import analyses.Foo
+"""
+
+from .ast_matcher import ASTMatcher, AST_any, AST_or, Placeholder, Check
+from .has_return import HasReturn, HasBreak, HasContinue
+from .node_count import NodeCount
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		from .workflow import test_torch_eval, workflow_fix_flag, workflow_search_flag
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		from ast_analyzer.python_std.run_py_passes import *