diff --git a/benchmark/benchmark-7b.sh b/benchmark/benchmark-7b.sh
index ebedeef0a4..16a90971f4 100755
--- a/benchmark/benchmark-7b.sh
+++ b/benchmark/benchmark-7b.sh
@@ -1,4 +1,9 @@
 #!/bin/bash
+if [ - z "$1" ]
+then
+    echo
+fi
+
 tp=1
 model_name=llama2
 model_path=/workspace/models-140/llama2/huggingface/llama-2-7b-chat/
@@ -16,12 +21,12 @@ apt-get install crudini
 crudini --set ${config_path} llama max_context_token_num 4
 crudini --set ${config_path} llama cache_chunk_size -1
 crudini --set ${config_path} llama cache_max_entry_count 1000
-crudini --set ${config_path} llama max_batch_size 256
+crudini --set ${config_path} llama max_batch_size 128
 # end of update config
 
 benchmark_rpm () {
     output_path=$1
-    mkdir -p ${output_path}
+    mkdir -p "${output_path}"
 
     batches=(64 128)
     for batch in "${batches[@]}"
@@ -40,6 +45,8 @@ benchmark_rpm () {
 
 benchmark_generation () {
     output_path=$1
+    mkdir -p "${output_path}"
+
     python3 benchmark/profile_generation.py \
     ${turbomind_model_path} \
     --concurrency 1 16 32 64 \
diff --git a/benchmark/benchmark_13b.sh b/benchmark/benchmark_13b.sh
index 983da3762a..855e2b1fe5 100755
--- a/benchmark/benchmark_13b.sh
+++ b/benchmark/benchmark_13b.sh
@@ -16,12 +16,12 @@ apt-get install crudini
 crudini --set ${config_path} llama max_context_token_num 4
 crudini --set ${config_path} llama cache_chunk_size -1
 crudini --set ${config_path} llama cache_max_entry_count 500
-crudini --set ${config_path} llama max_batch_size 256
+crudini --set ${config_path} llama max_batch_size 128
 # end of update config
 
 benchmark_rpm () {
     output_path=$1
-    mkdir -p ${output_path}
+    mkdir -p "${output_path}"
 
     batches=(64 128)
     for batch in "${batches[@]}"
@@ -40,6 +40,8 @@ benchmark_rpm () {
 
 benchmark_generation () {
     output_path=$1
+    mkdir -p "${output_path}"
+
     python3 benchmark/profile_generation.py \
     ${turbomind_model_path} \
     --concurrency 1 16 32 64 \
diff --git a/benchmark/benchmark_20b.sh b/benchmark/benchmark_20b.sh
index 1e5346c060..d6fe00d32a 100755
--- a/benchmark/benchmark_20b.sh
+++ b/benchmark/benchmark_20b.sh
@@ -16,12 +16,12 @@ apt-get install crudini
 crudini --set ${config_path} llama max_context_token_num 4
 crudini --set ${config_path} llama cache_chunk_size -1
 crudini --set ${config_path} llama cache_max_entry_count 700
-crudini --set ${config_path} llama max_batch_size 256
+crudini --set ${config_path} llama max_batch_size 128
 # end of update config
 
 benchmark_rpm () {
     output_path=$1
-    mkdir -p ${output_path}
+    mkdir -p "${output_path}"
 
     batches=(64 128)
     for batch in "${batches[@]}"
@@ -40,6 +40,8 @@ benchmark_rpm () {
 
 benchmark_generation () {
     output_path=$1
+    mkdir -p "${output_path}"
+
     python3 benchmark/profile_generation.py \
     ${turbomind_model_path} \
     --concurrency 1 16 32 64 \
diff --git a/benchmark/benchmark_70b.sh b/benchmark/benchmark_70b.sh
index e17a4b78bc..d43bd37c55 100755
--- a/benchmark/benchmark_70b.sh
+++ b/benchmark/benchmark_70b.sh
@@ -21,7 +21,7 @@ crudini --set ${config_path} llama max_batch_size 256
 
 benchmark_rpm () {
     output_path=$1
-    mkdir -p ${output_path}
+    mkdir -p "${output_path}"
 
     batches=(64 128 256)
     for batch in "${batches[@]}"
@@ -40,6 +40,8 @@ benchmark_rpm () {
 
 benchmark_generation () {
     output_path=$1
+    mkdir -p "${output_path}"
+
     python3 benchmark/profile_generation.py \
     ${turbomind_model_path} \
     --concurrency 1 64 128 256 \
diff --git a/docs/en/benchmark/a100_fp16.md b/docs/en/benchmark/a100_fp16.md
new file mode 100644
index 0000000000..4ca463b65f
--- /dev/null
+++ b/docs/en/benchmark/a100_fp16.md
@@ -0,0 +1,130 @@
+# Benchmark on A100 (FP16)
+
+All the following results are tested on (x8) A100-80G CUDA 11.8.
+
+The tested lmdeploy version is `v0.1.0a1`.
+
+The commands provided below facilitate benchmarking both [static inference performance](#static-inference-benchmark) and [request throughput](#request-throughput-benchmark) on an A100-80G(x8) for models of various sizes.
+
+```shell
+bash benchmark/benchmark-7b.sh
+bash benchmark/benchmark-13b.sh
+bash benchmark/benchmark-20b.sh
+bash benchmark/benchmark-70b.sh
+```
+
+## Static Inference Benchmark
+
+### llama2-7b
+
+| batch | tp  | prompt_tokens | completion_tokens | 1st_token_latency(min)(s) | 1st_token_latency(max)(s) | 1st_token_latency(ave)(s) | percentile50(s) | percentile75(s) | percentile95(s) | percentile99(s) | throughput(token/s) | mem_per_gpu(GB) |
+| ----- | --- | ------------- | ----------------- | ------------------------- | ------------------------- | ------------------------- | --------------- | --------------- | --------------- | --------------- | ------------------- | --------------- |
+| 1     | 1   | 1             | 128               | 0.01                      | 0.011                     | 0.011                     | 0.009           | 0.009           | 0.01            | 0.011           | 100.02              | 76.55           |
+| 1     | 1   | 128           | 128               | 0.022                     | 0.022                     | 0.022                     | 0.01            | 0.01            | 0.01            | 0.01            | 102.21              | 76.59           |
+| 1     | 1   | 128           | 2048              | 0.022                     | 0.022                     | 0.022                     | 0.01            | 0.01            | 0.01            | 0.01            | 98.92               | 76.59           |
+| 1     | 1   | 2048          | 128               | 0.139                     | 0.14                      | 0.139                     | 0.01            | 0.01            | 0.01            | 0.011           | 86.1                | 76.77           |
+| 1     | 1   | 2048          | 2048              | 0.139                     | 0.141                     | 0.14                      | 0.011           | 0.011           | 0.011           | 0.011           | 93.78               | 76.77           |
+| 16    | 1   | 1             | 128               | 0.011                     | 0.031                     | 0.021                     | 0.01            | 0.011           | 0.011           | 0.013           | 1504.72             | 76.59           |
+| 16    | 1   | 128           | 128               | 0.023                     | 0.149                     | 0.129                     | 0.011           | 0.011           | 0.012           | 0.014           | 1272.47             | 76.77           |
+| 16    | 1   | 128           | 2048              | 0.023                     | 0.144                     | 0.13                      | 0.015           | 0.018           | 0.02            | 0.021           | 1010.62             | 76.77           |
+| 16    | 1   | 2048          | 128               | 0.143                     | 3.576                     | 2.897                     | 0.02            | 0.021           | 0.022           | 0.025           | 348.87              | 78.3            |
+| 16    | 1   | 2048          | 2048              | 0.142                     | 3.084                     | 2.678                     | 0.025           | 0.028           | 0.03            | 0.031           | 601.63              | 78.3            |
+| 32    | 1   | 1             | 128               | 0.014                     | 0.725                     | 0.079                     | 0.011           | 0.012           | 0.013           | 0.021           | 2136.73             | 76.62           |
+| 32    | 1   | 128           | 128               | 0.022                     | 0.359                     | 0.214                     | 0.012           | 0.013           | 0.014           | 0.035           | 2125.47             | 76.99           |
+| 32    | 1   | 128           | 2048              | 0.026                     | 0.269                     | 0.2                       | 0.021           | 0.026           | 0.031           | 0.033           | 1462.12             | 76.99           |
+| 32    | 1   | 2048          | 128               | 0.143                     | 5.267                     | 4.288                     | 0.031           | 0.032           | 0.034           | 0.161           | 450.43              | 78.3            |
+| 32    | 1   | 2048          | 2048              | 0.19                      | 5.429                     | 4.118                     | 0.04            | 0.045           | 0.05            | 0.053           | 733.34              | 78.34           |
+| 64    | 1   | 1             | 128               | 0.013                     | 0.21                      | 0.042                     | 0.012           | 0.018           | 0.028           | 0.041           | 4154.81             | 76.71           |
+| 64    | 1   | 128           | 128               | 0.026                     | 1.061                     | 0.44                      | 0.014           | 0.018           | 0.026           | 0.158           | 3024.07             | 77.43           |
+| 64    | 1   | 128           | 2048              | 0.027                     | 1.231                     | 0.535                     | 0.03            | 0.041           | 0.048           | 0.053           | 1852.06             | 77.96           |
+| 64    | 1   | 2048          | 128               | 0.142                     | 16.235                    | 6.59                      | 0.046           | 0.049           | 0.055           | 0.767           | 493.46              | 78.4            |
+| 64    | 1   | 2048          | 2048              | 0.142                     | 116.285                   | 39.105                    | 0.047           | 0.049           | 0.051           | 0.207           | 755.65              | 78.4            |
+
+### llama2-13b
+
+| batch | tp  | prompt_tokens | completion_tokens | 1st_token_latency(min)(s) | 1st_token_latency(max)(s) | 1st_token_latency(ave)(s) | percentile50(s) | percentile75(s) | percentile95(s) | percentile99(s) | throughput(token/s) | mem_per_gpu(GB) |
+| ----- | --- | ------------- | ----------------- | ------------------------- | ------------------------- | ------------------------- | --------------- | --------------- | --------------- | --------------- | ------------------- | --------------- |
+| 1     | 1   | 1             | 128               | 0.018                     | 0.019                     | 0.018                     | 0.017           | 0.017           | 0.017           | 0.017           | 57.49               | 74.84           |
+| 1     | 1   | 128           | 128               | 0.039                     | 0.04                      | 0.04                      | 0.017           | 0.017           | 0.017           | 0.018           | 56.58               | 74.84           |
+| 1     | 1   | 128           | 2048              | 0.04                      | 0.04                      | 0.04                      | 0.018           | 0.018           | 0.018           | 0.019           | 55.29               | 74.84           |
+| 1     | 1   | 2048          | 128               | 0.242                     | 0.243                     | 0.242                     | 0.019           | 0.019           | 0.019           | 0.019           | 48.99               | 75.09           |
+| 1     | 1   | 2048          | 2048              | 0.24                      | 0.244                     | 0.243                     | 0.019           | 0.019           | 0.019           | 0.02            | 52.12               | 75.09           |
+| 16    | 1   | 1             | 128               | 0.019                     | 0.053                     | 0.036                     | 0.018           | 0.019           | 0.019           | 0.02            | 869.45              | 74.87           |
+| 16    | 1   | 128           | 128               | 0.041                     | 0.272                     | 0.252                     | 0.019           | 0.02            | 0.02            | 0.021           | 757.3               | 75.09           |
+| 16    | 1   | 128           | 2048              | 0.041                     | 0.275                     | 0.253                     | 0.026           | 0.03            | 0.033           | 0.034           | 605.88              | 75.09           |
+| 16    | 1   | 2048          | 128               | 0.245                     | 3.668                     | 3.442                     | 0.033           | 0.034           | 0.035           | 0.035           | 257.92              | 76.96           |
+| 16    | 1   | 2048          | 2048              | 0.249                     | 3.671                     | 3.122                     | 0.04            | 0.044           | 0.047           | 0.047           | 366.67              | 76.99           |
+| 32    | 1   | 1             | 128               | 0.021                     | 0.057                     | 0.034                     | 0.019           | 0.02            | 0.021           | 0.023           | 1667.5              | 74.9            |
+| 32    | 1   | 128           | 128               | 0.04                      | 0.497                     | 0.461                     | 0.021           | 0.022           | 0.023           | 0.025           | 1301.27             | 75.37           |
+| 32    | 1   | 128           | 2048              | 0.041                     | 1.151                     | 0.833                     | 0.034           | 0.042           | 0.047           | 0.048           | 860.14              | 75.84           |
+| 32    | 1   | 2048          | 128               | 0.245                     | 13.483                    | 5.315                     | 0.046           | 0.047           | 0.049           | 0.51            | 291.54              | 77.02           |
+| 32    | 1   | 2048          | 2048              | 0.245                     | 108.104                   | 38.725                    | 0.047           | 0.047           | 0.049           | 0.05            | 389.64              | 77.02           |
+| 64    | 1   | 1             | 128               | 0.025                     | 0.073                     | 0.044                     | 0.02            | 0.022           | 0.026           | 0.029           | 3049.16             | 74.96           |
+| 64    | 1   | 128           | 128               | 0.046                     | 0.951                     | 0.703                     | 0.024           | 0.026           | 0.029           | 0.032           | 2033.22             | 75.87           |
+| 64    | 1   | 128           | 2048              | 0.042                     | 60.1                      | 7.805                     | 0.045           | 0.047           | 0.05            | 0.063           | 998.86              | 76.9            |
+| 64    | 1   | 2048          | 128               | 0.245                     | 32.394                    | 19.69                     | 0.047           | 0.048           | 0.05            | 0.27            | 286.32              | 76.99           |
+| 64    | 1   | 2048          | 2048              | 0.245                     | 307.331                   | 190.453                   | 0.047           | 0.048           | 0.049           | 0.05            | 387.86              | 77.09           |
+
+### internlm-20b
+
+| batch | tp  | prompt_tokens | completion_tokens | 1st_token_latency(min)(s) | 1st_token_latency(max)(s) | 1st_token_latency(ave)(s) | percentile50(s) | percentile75(s) | percentile95(s) | percentile99(s) | throughput(token/s) | mem_per_gpu(GB) |
+| ----- | --- | ------------- | ----------------- | ------------------------- | ------------------------- | ------------------------- | --------------- | --------------- | --------------- | --------------- | ------------------- | --------------- |
+| 1     | 2   | 1             | 128               | 0.017                     | 0.019                     | 0.018                     | 0.016           | 0.016           | 0.016           | 0.018           | 61.14               | 73.55           |
+| 1     | 2   | 128           | 128               | 0.041                     | 0.043                     | 0.042                     | 0.016           | 0.016           | 0.016           | 0.017           | 60.03               | 73.55           |
+| 1     | 2   | 128           | 2048              | 0.042                     | 0.043                     | 0.042                     | 0.017           | 0.017           | 0.018           | 0.018           | 58.26               | 73.55           |
+| 1     | 2   | 2048          | 128               | 0.216                     | 0.217                     | 0.217                     | 0.018           | 0.018           | 0.018           | 0.018           | 51.93               | 73.68           |
+| 1     | 2   | 2048          | 2048              | 0.217                     | 0.217                     | 0.217                     | 0.018           | 0.018           | 0.018           | 0.018           | 56.36               | 73.68           |
+| 16    | 2   | 1             | 128               | 0.018                     | 0.051                     | 0.034                     | 0.017           | 0.018           | 0.019           | 0.02            | 903.01              | 73.65           |
+| 16    | 2   | 128           | 128               | 0.043                     | 0.248                     | 0.227                     | 0.018           | 0.019           | 0.02            | 0.021           | 794.13              | 73.74           |
+| 16    | 2   | 128           | 2048              | 0.043                     | 0.25                      | 0.227                     | 0.024           | 0.027           | 0.029           | 0.03            | 669.87              | 73.74           |
+| 16    | 2   | 2048          | 128               | 0.247                     | 4.485                     | 3.09                      | 0.029           | 0.03            | 0.031           | 0.032           | 288.60              | 75.60           |
+| 16    | 2   | 2048          | 2048              | 0.219                     | 4.442                     | 3.172                     | 0.035           | 0.037           | 0.04            | 0.041           | 441.46              | 75.61           |
+| 32    | 2   | 1             | 128               | 0.02                      | 0.066                     | 0.037                     | 0.019           | 0.02            | 0.021           | 0.023           | 1673.64             | 73.71           |
+| 32    | 2   | 128           | 128               | 0.043                     | 0.436                     | 0.351                     | 0.02            | 0.021           | 0.023           | 0.025           | 1347.57             | 73.90           |
+| 32    | 2   | 128           | 2048              | 0.042                     | 0.441                     | 0.391                     | 0.031           | 0.037           | 0.041           | 0.043           | 1025.62             | 73.90           |
+| 32    | 2   | 2048          | 128               | 0.218                     | 6.3                       | 6.062                     | 0.042           | 0.043           | 0.045           | 0.046           | 352.45              | 75.74           |
+| 32    | 2   | 2048          | 2048              | 0.222                     | 70.328                    | 10.36                     | 0.049           | 0.05            | 0.051           | 0.053           | 514.60              | 75.77           |
+| 64    | 2   | 1             | 128               | 0.029                     | 0.074                     | 0.05                      | 0.021           | 0.023           | 0.026           | 0.03            | 2954.34             | 73.82           |
+| 64    | 2   | 128           | 128               | 0.047                     | 0.808                     | 0.591                     | 0.024           | 0.026           | 0.029           | 0.032           | 2122.92             | 74.24           |
+| 64    | 2   | 128           | 2048              | 0.049                     | 41.212                    | 2.529                     | 0.042           | 0.048           | 0.052           | 0.055           | 1276.61             | 75.18           |
+| 64    | 2   | 2048          | 128               | 0.219                     | 20.986                    | 12.382                    | 0.05            | 0.051           | 0.054           | 0.249           | 350.82              | 75.88           |
+| 64    | 2   | 2048          | 2048              | 0.221                     | 211.531                   | 111.149                   | 0.05            | 0.051           | 0.052           | 0.055           | 512.37              | 76.26           |
+
+### llama2-70b
+
+| batch | tp  | prompt_tokens | completion_tokens | 1st_token_latency(min)(s) | 1st_token_latency(max)(s) | 1st_token_latency(ave)(s) | percentile50(s) | percentile75(s) | percentile95(s) | percentile99(s) | throughput(token/s) | mem_per_gpu(GB) |
+| ----- | --- | ------------- | ----------------- | ------------------------- | ------------------------- | ------------------------- | --------------- | --------------- | --------------- | --------------- | ------------------- | --------------- |
+| 1     | 4   | 1             | 128               | 0.03                      | 0.031                     | 0.031                     | 0.029           | 0.029           | 0.029           | 0.03            | 33.94               | 73.72           |
+| 1     | 4   | 128           | 128               | 0.073                     | 0.074                     | 0.074                     | 0.029           | 0.029           | 0.029           | 0.03            | 33.63               | 73.72           |
+| 1     | 4   | 128           | 2048              | 0.074                     | 0.075                     | 0.074                     | 0.031           | 0.031           | 0.031           | 0.031           | 32.38               | 73.72           |
+| 1     | 4   | 2048          | 128               | 0.401                     | 0.403                     | 0.402                     | 0.031           | 0.031           | 0.031           | 0.051           | 28.32               | 73.78           |
+| 1     | 4   | 2048          | 2048              | 0.402                     | 0.407                     | 0.405                     | 0.031           | 0.031           | 0.031           | 0.031           | 31.9                | 73.78           |
+| 16    | 4   | 1             | 128               | 0.034                     | 0.939                     | 0.071                     | 0.03            | 0.031           | 0.032           | 0.251           | 468.52              | 73.72           |
+| 16    | 4   | 128           | 128               | 0.08                      | 0.687                     | 0.437                     | 0.03            | 0.031           | 0.032           | 0.207           | 439.77              | 73.81           |
+| 16    | 4   | 128           | 2048              | 0.079                     | 0.44                      | 0.403                     | 0.033           | 0.033           | 0.035           | 0.036           | 482.99              | 73.81           |
+| 16    | 4   | 2048          | 128               | 0.437                     | 7.612                     | 5.776                     | 0.035           | 0.036           | 0.036           | 0.037           | 189.34              | 73.98           |
+| 16    | 4   | 2048          | 2048              | 0.411                     | 6.844                     | 5.773                     | 0.036           | 0.037           | 0.038           | 0.041           | 399.42              | 73.98           |
+| 32    | 4   | 1             | 128               | 0.043                     | 0.253                     | 0.098                     | 0.032           | 0.033           | 0.035           | 0.178           | 906.03              | 73.75           |
+| 32    | 4   | 128           | 128               | 0.078                     | 1.026                     | 0.749                     | 0.032           | 0.033           | 0.035           | 0.438           | 746.36              | 73.91           |
+| 32    | 4   | 128           | 2048              | 0.076                     | 1.129                     | 0.732                     | 0.036           | 0.038           | 0.041           | 0.158           | 853.56              | 73.91           |
+| 32    | 4   | 2048          | 128               | 0.408                     | 13.321                    | 11.834                    | 0.04            | 0.041           | 0.043           | 0.248           | 232.6               | 73.99           |
+| 32    | 4   | 2048          | 2048              | 0.409                     | 12.689                    | 11.711                    | 0.043           | 0.045           | 0.048           | 0.179           | 636.23              | 73.99           |
+| 64    | 4   | 1             | 128               | 0.046                     | 1.264                     | 0.213                     | 0.037           | 0.039           | 0.044           | 0.329           | 1425.79             | 73.81           |
+| 64    | 4   | 128           | 128               | 0.107                     | 2.676                     | 1.292                     | 0.037           | 0.04            | 0.045           | 0.378           | 1159.84             | 73.96           |
+| 64    | 4   | 128           | 2048              | 0.135                     | 1.623                     | 1.173                     | 0.043           | 0.047           | 0.052           | 0.251           | 1391.8              | 73.95           |
+| 64    | 4   | 2048          | 128               | 0.452                     | 24.164                    | 17.402                    | 0.05            | 0.052           | 0.057           | 0.345           | 270.47              | 74.02           |
+| 64    | 4   | 2048          | 2048              | 0.423                     | 24.498                    | 21.29                     | 0.055           | 0.059           | 0.065           | 0.299           | 930.46              | 74.01           |
+
+## Request Throughput Benchmark
+
+| model_name   | batch | tp  | num_prompts | 1st_token_latency(min)(s) | 1st_token_latency(max)(s) | 1st_token_latency(ave)(s) | output_token thr(tokens/s) | total_token thr(tokens/s) | PRS    | PRM     |
+| ------------ | ----- | --- | ----------- | ------------------------- | ------------------------- | ------------------------- | -------------------------- | ------------------------- | ------ | ------- |
+| llama2-7b    | 64    | 1   | 3000        | 0.036                     | 1.145                     | 0.092                     | 2562.435                   | 5283.547                  | 10.275 | 616.477 |
+|              | 128   | 1   | 3000        | 0.056                     | 2.241                     | 0.205                     | 3210.281                   | 6619.357                  | 12.611 | 756.677 |
+| llama2-13b   | 64    | 1   | 3000        | 0.051                     | 2.048                     | 0.159                     | 1474.786                   | 3039.398                  | 6.337  | 380.244 |
+|              | 128   | 1   | 3000        | 0.085                     | 4.445                     | 0.412                     | 1765.788                   | 3639.128                  | 7.588  | 455.273 |
+| internlm-20b | 64    | 2   | 3000        | 0.059                     | 2.461                     | 0.166                     | 1564.696                   | 3311.16                   | 7.842  | 470.516 |
+|              | 128   | 2   | 3000        | 0.079                     | 5.808                     | 0.34                      | 1950.627                   | 4127.855                  | 9.776  | 586.568 |
+| llama2-70b   | 64    | 4   | 3000        | 0.083                     | 4.689                     | 0.301                     | 1000.376                   | 2062.7                    | 4.285  | 257.08  |
+|              | 128   | 4   | 3000        | 0.107                     | 8.431                     | 0.633                     | 1361.939                   | 2808.216                  | 5.833  | 349.996 |
+|              | 256   | 4   | 3000        | 0.171                     | 19.52                     | 1.49                      | 1533.592                   | 3162.15                   | 6.568  | 394.108 |