diff --git a/README.md b/README.md
index 9098fdc..b6af93c 100644
--- a/README.md
+++ b/README.md
@@ -157,6 +157,20 @@ export PYTHONPATH=$PWD
 python3 test.py --weights 权重路径
 ```
 
+## Comparision with other methods in github
+github上也有一些其他作者实现的mobile-yolo系列，其中paddle和keras都是用mobilev1作为backbone，无法直接计算Flops。
+darknet的Flops采用作者Readme中的BFlops。
+
+|Model|resolution|COCO mAP|VOC mAP|Size|
+|----|----|----|----|----|
+|Ours|320|23.2|71.7|1x|
+|Ours|640|27.5|74.4|4x|
+|[caffe](https://github.com/eric612/MobileNet-YOLO)|352|-|71.5|x1.2|
+|[paddle](https://github.com/PaddlePaddle/PaddleDetection/blob/release/0.5/docs/MODEL_ZOO_cn.md)|608|29.3|76.2|-|
+|[darknet](https://github.com/dog-qiuqiu/MobileNet-Yolo)|352|-|70.7|x2.2|
+|[keras](https://github.com/Adamdad/keras-YOLOv3-mobilenet)|320|-|74.56|-|
+|[tensorflow](https://github.com/fsx950223/mobilenetv2-yolov3)|416|-|66.9|x1.7|
+
 ## Inference with TensorRT
 1. python3 model/onnx_export.py --weights weight_path
 2. 参考[https://github.com/Syencil/tensorRT](https://github.com/Syencil/tensorRT)
diff --git a/script/coco2darknet.py b/script/coco2darknet.py
index 87f64bd..d72fffc 100644
--- a/script/coco2darknet.py
+++ b/script/coco2darknet.py
@@ -469,13 +469,15 @@ def convert_ath_json(json_dir):  # dir contains json annotations and images
     print('Done. Output saved to %s' % Path(dir).absolute())
 
 
-def convert_coco_json(json_dir='../coco/annotations/'):
+def convert_coco_json(json_dir='/data/dataset/coco/annotations/'):
     dir = make_folders(path='out/')  # output directory
     jsons = glob.glob(json_dir + '*.json')
     coco80 = coco91_to_coco80_class()
 
     # Import json
     for json_file in sorted(jsons):
+        if 'instances' not in json_file:
+            continue
         fn = 'out/labels/%s/' % Path(json_file).stem.replace('instances_', '')  # folder name
         os.mkdir(fn)
         with open(json_file) as f:
diff --git a/test.py b/test.py
index 22c15e6..1690b65 100644
--- a/test.py
+++ b/test.py
@@ -87,10 +87,8 @@ def test(data,
                                 collate_fn=dataset.collate_fn)
 
     seen = 0
-    names = {0: 'aeroplane', 1: 'bicycle', 2: 'bird', 3: 'boat', 4: 'bottle', 5: 'bus',
-               6: 'car', 7: 'cat', 8: 'chair', 9: 'cow', 10: 'diningtable', 11: 'dog', 12: 'horse',
-               13: 'motorbike', 14: 'person', 15: 'pottedplant', 16: 'sheep', 17: 'sofa',
-               18: 'train', 19: 'tvmonitor'}
+    names = {k: v for k, v in enumerate(model.names if hasattr(model, 'names') else model.module.names)}
+
     coco91class = coco80_to_coco91_class()
     s = ('%20s' + '%12s' * 6) % ('Class', 'Images',
                                  'Targets', 'P', 'R', 'mAP@.5', 'mAP@.5:.95')
@@ -365,7 +363,8 @@ def test(data,
              opt.iou_thres,
              opt.save_json,
              opt.single_cls,
-             opt.augment)
+             opt.augment,
+             verbose=opt.verbose)
 
     elif opt.task == 'study':  # run over a range of settings and save/plot
         for weights in ['yolov5s.pt', 'yolov5m.pt',
diff --git a/train.py b/train.py
index fcedf55..4f1ceb5 100644
--- a/train.py
+++ b/train.py
@@ -201,22 +201,22 @@ def get_hooks():
                 c1 = 256
                 c2 = 512
                 c3 = 1024
-            # S_Converter_1 = Converter(32, c1, act=True)
-            # S_Converter_2 = Converter(96, c2, act=True)
-            # S_Converter_3 = Converter(320, c3, act=True)
-            # S_Converter_1.to(device)
-            # S_Converter_2.to(device)
-            # S_Converter_3.to(device)
-            # S_Converter_1.train()
-            # S_Converter_2.train()
-            # S_Converter_3.train()
-
-            # T_Converter_1 = nn.ReLU6()
-            # T_Converter_2 = nn.ReLU6()
-            # T_Converter_3 = nn.ReLU6()
-            T_Converter_1 = Converter(c1, 32, act=True)
-            T_Converter_2 = Converter(c2, 96, act=True)
-            T_Converter_3 = Converter(c3, 320, act=True)
+            S_Converter_1 = Converter(32, c1, act=True)
+            S_Converter_2 = Converter(96, c2, act=True)
+            S_Converter_3 = Converter(320, c3, act=True)
+            S_Converter_1.to(device)
+            S_Converter_2.to(device)
+            S_Converter_3.to(device)
+            S_Converter_1.train()
+            S_Converter_2.train()
+            S_Converter_3.train()
+
+            T_Converter_1 = nn.ReLU6()
+            T_Converter_2 = nn.ReLU6()
+            T_Converter_3 = nn.ReLU6()
+            # T_Converter_1 = Converter(c1, 32, act=True)
+            # T_Converter_2 = Converter(c2, 96, act=True)
+            # T_Converter_3 = Converter(c3, 320, act=True)
             T_Converter_1.to(device)
             T_Converter_2.to(device)
             T_Converter_3.to(device)
@@ -360,14 +360,16 @@ def get_hooks():
             if opt.dist:
                 if opt.d_online:
                     t_pred = t_model(imgs)
+                    for p in t_pred:
+                        p = p.detach()
                 else:
                     with torch.no_grad():
                         t_pred = t_model(imgs)
                 if opt.d_feature:
-                    # s_f1 = S_Converter_1(activation["s_f1"])
-                    # s_f2 = S_Converter_2(activation["s_f2"])
-                    # s_f3 = S_Converter_3(activation["s_f3"])
-                    # s_f = [s_f1, s_f2, s_f3]
+                    s_f1 = S_Converter_1(activation["s_f1"])
+                    s_f2 = S_Converter_2(activation["s_f2"])
+                    s_f3 = S_Converter_3(activation["s_f3"])
+                    s_f = [s_f1, s_f2, s_f3]
                     s_f = (activation["s_f1"], activation["s_f2"], activation["s_f3"])
                     t_f1 = T_Converter_1(activation["t_f1"])
                     t_f2 = T_Converter_2(activation["t_f2"])
@@ -375,7 +377,7 @@ def get_hooks():
                     t_f = [t_f1, t_f2, t_f3]
                     # t_f = (activation["t_f1"], activation["t_f2"], activation["t_f3"])
             # Loss
-            loss, loss_items = compute_loss(pred, targets.to(device), model)
+            loss, loss_items = compute_loss(pred, targets.to(device), model, None)
 
             # Sparse Learning
             if opt.sl > 0:
@@ -384,7 +386,7 @@ def get_hooks():
             # distillation
             if opt.dist:
                 if opt.d_online:
-                    loss, _ = compute_loss(t_pred, targets.to(device), model, loss)
+                    loss, _ = compute_loss(t_pred, targets.to(device), t_model, loss)
                 loss = compute_distillation_output_loss(pred, t_pred, model, loss)
                 if opt.d_feature:
                     loss = compute_distillation_feature_loss(s_f, t_f, model, loss)
@@ -524,12 +526,12 @@ def get_hooks():
     parser.add_argument('--nw', type=int, default=None, help='num of worker')
     # pruning
     parser.add_argument('--sl', default=0, type=float, help='sparse learning')
-    parser.add_argument('--ft', action='store_true', help='fine-tune')
+    parser.add_argument('--ft', action='store_true', default=False, help='fine-tune')
     # distillation
     parser.add_argument('--dist', action='store_true', help='distillation')
-    parser.add_argument('--t_weights', type=str, help='teacher model for distillation')
-    parser.add_argument('--d_feature', action='store_true', help='if true, distill both feature and output layers')
-    parser.add_argument('--d_online', action='store_true', help='if true, using online-distillation')
+    parser.add_argument('--t_weights', type=str, default="", help='teacher model for distillation')
+    parser.add_argument('--d_feature', action='store_true', default=False, help='if true, distill both feature and output layers')
+    parser.add_argument('--d_online', action='store_true', default=False, help='if true, using online-distillation')
     opt = parser.parse_args()
 
     if opt.type == "mcocos":
@@ -546,7 +548,7 @@ def get_hooks():
         opt.data = "data/coco.yaml"
         opt.name = opt.type
         opt.weights = "outputs/dmvocs/weights/best_dmvocs.pt"
-        opt.epochs = 50
+        opt.epochs = 10
         opt.batch_size = 24
         opt.multi_scale = False
         opt.dist = True
@@ -737,7 +739,7 @@ def get_hooks():
         opt.multi_scale = False
         opt.dist = True
         opt.d_online = True
-        opt.t_weights = "outputs/voc/weights/best_voc.pt"
+        opt.t_weights = "/data/checkpoints/yolov5/yolov5s.pt"
         hyp["dist"] = 1
 
     if opt.nw is None: