From cd26ae3e451b5e5f6fbd07b1624d2736752e642b Mon Sep 17 00:00:00 2001 From: Ming-Yang Liu Date: Fri, 21 Sep 2018 14:50:37 +0800 Subject: [PATCH] Add pytorch darknet to caffe code and demo script --- README.md | 50 - demo_darknet_yolov3.sh | 1 + examples/ssd/ssd_detect.cpp | 26 +- models/darknet_yolov3/README.md | 29 + models/darknet_yolov3/darknet2caffe.py | 466 +++ models/darknet_yolov3/tiny-yolov3.prototxt | 607 ++++ models/darknet_yolov3/yolov3-tiny.cfg | 182 ++ models/darknet_yolov3/yolov3.cfg | 788 +++++ models/darknet_yolov3/yolov3.prototxt | 3271 ++++++++++++++++++++ 9 files changed, 5368 insertions(+), 52 deletions(-) create mode 100644 demo_darknet_yolov3.sh create mode 100644 models/darknet_yolov3/README.md create mode 100644 models/darknet_yolov3/darknet2caffe.py create mode 100644 models/darknet_yolov3/tiny-yolov3.prototxt create mode 100644 models/darknet_yolov3/yolov3-tiny.cfg create mode 100644 models/darknet_yolov3/yolov3.cfg create mode 100644 models/darknet_yolov3/yolov3.prototxt diff --git a/README.md b/README.md index 56bb9ac..c9eb85f 100644 --- a/README.md +++ b/README.md @@ -80,56 +80,6 @@ If load success , you can see the image window like this ![alt tag](00002.jpg) -### Vehicle Dection - -[![IMAGE ALT TEXT HERE](https://img.youtube.com/vi/oagXgyQHuNA/0.jpg)](https://www.youtube.com/watch?v=oagXgyQHuNA) - -#### CLASS NAME - -``` -char* CLASSES2[6] = { "__background__","bicycle", "car", "motorbike", "person","cones" }; -``` - -[model](models/vehicle) - -## Maintenance - -I'll appreciate if you can help me to - -1. Miragrate to [modivius neural compute stick](https://github.com/eric612/YoloV2-MobileNet-NCS) -2. Mobilenet upgrade to v2 or model tunning - -## Caffe - -[![Build Status](https://travis-ci.org/BVLC/caffe.svg?branch=master)](https://travis-ci.org/BVLC/caffe) -[![License](https://img.shields.io/badge/license-BSD-blue.svg)](LICENSE) - -Caffe is a deep learning framework made with expression, speed, and modularity in mind. -It is developed by Berkeley AI Research ([BAIR](http://bair.berkeley.edu))/The Berkeley Vision and Learning Center (BVLC) and community contributors. - -Check out the [project site](http://caffe.berkeleyvision.org) for all the details like - -- [DIY Deep Learning for Vision with Caffe](https://docs.google.com/presentation/d/1UeKXVgRvvxg9OUdh_UiC5G71UMscNPlvArsWER41PsU/edit#slide=id.p) -- [Tutorial Documentation](http://caffe.berkeleyvision.org/tutorial/) -- [BAIR reference models](http://caffe.berkeleyvision.org/model_zoo.html) and the [community model zoo](https://github.com/BVLC/caffe/wiki/Model-Zoo) -- [Installation instructions](http://caffe.berkeleyvision.org/installation.html) - -and step-by-step examples. - -## Custom distributions - - - [Intel Caffe](https://github.com/BVLC/caffe/tree/intel) (Optimized for CPU and support for multi-node), in particular Xeon processors (HSW, BDW, SKX, Xeon Phi). -- [OpenCL Caffe](https://github.com/BVLC/caffe/tree/opencl) e.g. for AMD or Intel devices. -- [Windows Caffe](https://github.com/BVLC/caffe/tree/windows) - -## Community - -[![Join the chat at https://gitter.im/BVLC/caffe](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/BVLC/caffe?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) - -Please join the [caffe-users group](https://groups.google.com/forum/#!forum/caffe-users) or [gitter chat](https://gitter.im/BVLC/caffe) to ask questions and talk about methods and models. -Framework development discussions and thorough bug reports are collected on [Issues](https://github.com/BVLC/caffe/issues). - -Happy brewing! ## License and Citation diff --git a/demo_darknet_yolov3.sh b/demo_darknet_yolov3.sh new file mode 100644 index 0000000..204fcaa --- /dev/null +++ b/demo_darknet_yolov3.sh @@ -0,0 +1 @@ +./examples/ssd/ssd_detect models/darknet_yolov3/yolov3.prototxt models/darknet_yolov3/yolov3.caffemodel -file_type image -wait_time 1500 -mean_value 0.0,0.0,0.0 -normalize_value 0.0039215 -confidence_threshold 0.2 \ No newline at end of file diff --git a/examples/ssd/ssd_detect.cpp b/examples/ssd/ssd_detect.cpp index 1ca7a02..a405fba 100644 --- a/examples/ssd/ssd_detect.cpp +++ b/examples/ssd/ssd_detect.cpp @@ -34,8 +34,30 @@ //char* CLASSES[6] = { "__background__", //"bicyle", "car", "motorbike", "person","cones" //}; -char* CLASSES[5] = { "__background__", -"big car","car", "motorbike","person" +//char* CLASSES[5] = { "__background__", +//"big car","car", "motorbike","person" +//}; +char* CLASSES[81] = { "__background__", +"person", "bicycle", "car", "motorcycle", +"airplane", "bus", "train", "truck", "boat", +"traffic light", "fire hydrant", "stop sign", "parking meter", +"bench", "bird", "cat", +"dog", "horse", "sheep", "cow" +"elephant", "bear", "zebra", "giraffe" , +"backpack", "umbrella", "handbag", "tie" , +"suitcase", "frisbee", "skis", "snowboard" , +"sports ball", "kite", "baseball bat", "baseball glove" , +"skateboard", "surfboard", "tennis racket", "bottle" , +"wine glass", "cup", "fork", "knife" , +"spoon", "bowl", "banana", "apple" , +"sandwich", "orange", "broccoli", "carrot" , +"hot dog", "pizza", "donut", "cake" , +"chair", "sofa", "potted plant", "bed" , +"dining table", "toilet", "tv", "laptop" , +"mouse", "remote", "keyboard", "cell phone" , +"microwave", "oven", "toaster", "sink" , +"refrigerator", "book", "clock", "vase" , +"scissors", "teddy bear", "hair drier", "toothbrush" , }; #else char* CLASSES[21] = { "__background__", diff --git a/models/darknet_yolov3/README.md b/models/darknet_yolov3/README.md new file mode 100644 index 0000000..915e543 --- /dev/null +++ b/models/darknet_yolov3/README.md @@ -0,0 +1,29 @@ +# Pytorch darknet to caffe + +Modify from [pytorch-caffe-darknet-convert](https://github.com/marvis/pytorch-caffe-darknet-convert),[object_detetction_tools](https://github.com/BingzheWu/object_detetction_tools) + +## Modified items : + +1. yolov3 output layer +2. when pooling layer stide =1 , size =2 , assign size = 1 +3. upsample layer + +## Usage : + +1. Download weights from original darknet web +2. Unmark custom_class in examples\ssd\ssd_detect.cpp +3. Remake project + +``` +> python darknet2caffe.py yolov3.cfg yolov3.weights yolov3.prototxt yolov3.caffemodel +> cd $caffe_root +> sh demo_darknet_yolov3.sh +``` + + +## To do list : + +1. verify accuracy and compare result +2. retrain + +I'm not sure the result was same as darknet now , contribution is welcome \ No newline at end of file diff --git a/models/darknet_yolov3/darknet2caffe.py b/models/darknet_yolov3/darknet2caffe.py new file mode 100644 index 0000000..d026bef --- /dev/null +++ b/models/darknet_yolov3/darknet2caffe.py @@ -0,0 +1,466 @@ +import sys +#sys.path.append('~/MobileNet-YOLO/caffe/python') +import caffe +import numpy as np +from collections import OrderedDict +from cfg import * +from prototxt import * + +def darknet2caffe(cfgfile, weightfile, protofile, caffemodel): + net_info = cfg2prototxt(cfgfile) + save_prototxt(net_info , protofile, region=False) + + net = caffe.Net(protofile, caffe.TEST) + params = net.params + + blocks = parse_cfg(cfgfile) + fp = open(weightfile, "rb") + header = np.fromfile(fp, dtype = np.int32, count = 5) + buf = np.fromfile(fp, dtype = np.float32) + fp.close() + + layers = [] + layer_id = 1 + start = 0 + for block in blocks: + if start >= buf.size: + break + + if block['type'] == 'net': + continue + elif block['type'] == 'convolutional': + batch_normalize = int(block['batch_normalize']) + if 'name' in block.keys(): + conv_layer_name = block['name'] + bn_layer_name = '%s-bn' % block['name'] + scale_layer_name = '%s-scale' % block['name'] + else: + conv_layer_name = 'layer%d-conv' % layer_id + bn_layer_name = 'layer%d-bn' % layer_id + scale_layer_name = 'layer%d-scale' % layer_id + + if batch_normalize: + start = load_conv_bn2caffe(buf, start, params[conv_layer_name], params[bn_layer_name], params[scale_layer_name]) + else: + start = load_conv2caffe(buf, start, params[conv_layer_name]) + layer_id = layer_id+1 + elif block['type'] == 'connected': + if 'name' in block.keys(): + fc_layer_name = block['name'] + else: + fc_layer_name = 'layer%d-fc' % layer_id + start = load_fc2caffe(buf, start, params[fc_layer_name]) + layer_id = layer_id+1 + elif block['type'] == 'maxpool': + layer_id = layer_id+1 + elif block['type'] == 'avgpool': + layer_id = layer_id+1 + elif block['type'] == 'region': + layer_id = layer_id + 1 + elif block['type'] == 'route': + layer_id = layer_id + 1 + elif block['type'] == 'shortcut': + layer_id = layer_id + 1 + elif block['type'] == 'softmax': + layer_id = layer_id + 1 + elif block['type'] == 'cost': + layer_id = layer_id + 1 + elif block['type'] == 'upsample': + layer_id = layer_id + 1 + elif block['type'] == 'yolo': + layer_id = layer_id + 1 + else: + print('unknow layer type %s ' % block['type']) + layer_id = layer_id + 1 + print('save prototxt to %s' % protofile) + save_prototxt(net_info , protofile, region=True) + print('save caffemodel to %s' % caffemodel) + net.save(caffemodel) + +def load_conv2caffe(buf, start, conv_param): + weight = conv_param[0].data + bias = conv_param[1].data + conv_param[1].data[...] = np.reshape(buf[start:start+bias.size], bias.shape); start = start + bias.size + conv_param[0].data[...] = np.reshape(buf[start:start+weight.size], weight.shape); start = start + weight.size + return start + +def load_fc2caffe(buf, start, fc_param): + weight = fc_param[0].data + bias = fc_param[1].data + fc_param[1].data[...] = np.reshape(buf[start:start+bias.size], bias.shape); start = start + bias.size + fc_param[0].data[...] = np.reshape(buf[start:start+weight.size], weight.shape); start = start + weight.size + return start + + +def load_conv_bn2caffe(buf, start, conv_param, bn_param, scale_param): + conv_weight = conv_param[0].data + running_mean = bn_param[0].data + running_var = bn_param[1].data + scale_weight = scale_param[0].data + scale_bias = scale_param[1].data + + scale_param[1].data[...] = np.reshape(buf[start:start+scale_bias.size], scale_bias.shape); start = start + scale_bias.size + scale_param[0].data[...] = np.reshape(buf[start:start+scale_weight.size], scale_weight.shape); start = start + scale_weight.size + bn_param[0].data[...] = np.reshape(buf[start:start+running_mean.size], running_mean.shape); start = start + running_mean.size + bn_param[1].data[...] = np.reshape(buf[start:start+running_var.size], running_var.shape); start = start + running_var.size + bn_param[2].data[...] = np.array([1.0]) + conv_param[0].data[...] = np.reshape(buf[start:start+conv_weight.size], conv_weight.shape); start = start + conv_weight.size + return start + +def cfg2prototxt(cfgfile): + blocks = parse_cfg(cfgfile) + + layers = [] + props = OrderedDict() + bottom = 'data' + layer_id = 1 + topnames = dict() + yolo_count = 0 + mask = [] + bottom_yolo = [] + anchors_scale = [] + scale = 1 + num_out = 0 + for block in blocks: + if block['type'] == 'net': + props['name'] = 'Darkent2Caffe' + props['input'] = 'data' + props['input_dim'] = ['1'] + props['input_dim'].append(block['channels']) + props['input_dim'].append(block['height']) + props['input_dim'].append(block['width']) + continue + elif block['type'] == 'convolutional': + conv_layer = OrderedDict() + conv_layer['bottom'] = bottom + if block.has_key('name'): + conv_layer['top'] = block['name'] + conv_layer['name'] = block['name'] + else: + conv_layer['top'] = 'layer%d-conv' % layer_id + conv_layer['name'] = 'layer%d-conv' % layer_id + conv_layer['type'] = 'Convolution' + convolution_param = OrderedDict() + convolution_param['num_output'] = block['filters'] + convolution_param['kernel_size'] = block['size'] + if block['pad'] == '1': + convolution_param['pad'] = str(int(convolution_param['kernel_size'])/2) + convolution_param['pad'] = str(int(1)) + if block['size'] == '1': + convolution_param['pad'] = 0 + convolution_param['stride'] = block['stride'] + if int(block['stride'])==2: + scale = scale * 2 + if block['batch_normalize'] == '1': + convolution_param['bias_term'] = 'false' + else: + convolution_param['bias_term'] = 'true' + conv_layer['convolution_param'] = convolution_param + layers.append(conv_layer) + bottom = conv_layer['top'] + num_out = int(block['filters']) + if block['batch_normalize'] == '1': + bn_layer = OrderedDict() + bn_layer['bottom'] = bottom + bn_layer['top'] = bottom + if block.has_key('name'): + bn_layer['name'] = '%s-bn' % block['name'] + else: + bn_layer['name'] = 'layer%d-bn' % layer_id + bn_layer['type'] = 'BatchNorm' + batch_norm_param = OrderedDict() + batch_norm_param['use_global_stats'] = 'true' + bn_layer['batch_norm_param'] = batch_norm_param + layers.append(bn_layer) + + scale_layer = OrderedDict() + scale_layer['bottom'] = bottom + scale_layer['top'] = bottom + if block.has_key('name'): + scale_layer['name'] = '%s-scale' % block['name'] + else: + scale_layer['name'] = 'layer%d-scale' % layer_id + scale_layer['type'] = 'Scale' + scale_param = OrderedDict() + scale_param['bias_term'] = 'true' + scale_layer['scale_param'] = scale_param + layers.append(scale_layer) + + if block['activation'] != 'linear': + relu_layer = OrderedDict() + relu_layer['bottom'] = bottom + relu_layer['top'] = bottom + if block.has_key('name'): + relu_layer['name'] = '%s-act' % block['name'] + else: + relu_layer['name'] = 'layer%d-act' % layer_id + relu_layer['type'] = 'ReLU' + if block['activation'] == 'leaky': + relu_param = OrderedDict() + relu_param['negative_slope'] = '0.1' + relu_layer['relu_param'] = relu_param + layers.append(relu_layer) + topnames[layer_id] = bottom + layer_id = layer_id+1 + elif block['type'] == 'maxpool': + max_layer = OrderedDict() + max_layer['bottom'] = bottom + if block.has_key('name'): + max_layer['top'] = block['name'] + max_layer['name'] = block['name'] + else: + max_layer['top'] = 'layer%d-maxpool' % layer_id + max_layer['name'] = 'layer%d-maxpool' % layer_id + max_layer['type'] = 'Pooling' + pooling_param = OrderedDict() + pooling_param['kernel_size'] = block['size'] + pooling_param['stride'] = block['stride'] + + pooling_param['pool'] = 'MAX' + if block.has_key('pad') and int(block['pad']) == 1: + pooling_param['pad'] = str((int(block['size'])-1)/2) + #if int(block['stride']) == 1 : + # pooling_param['pad'] = 0 + max_layer['pooling_param'] = pooling_param + layers.append(max_layer) + bottom = max_layer['top'] + topnames[layer_id] = bottom + if int(block['stride']) == 2 : + scale = scale * 2 + layer_id = layer_id+1 + elif block['type'] == 'avgpool': + avg_layer = OrderedDict() + avg_layer['bottom'] = bottom + if block.has_key('name'): + avg_layer['top'] = block['name'] + avg_layer['name'] = block['name'] + else: + avg_layer['top'] = 'layer%d-avgpool' % layer_id + avg_layer['name'] = 'layer%d-avgpool' % layer_id + avg_layer['type'] = 'Pooling' + pooling_param = OrderedDict() + pooling_param['kernel_size'] = 7 + pooling_param['stride'] = 1 + pooling_param['pool'] = 'AVE' + avg_layer['pooling_param'] = pooling_param + layers.append(avg_layer) + bottom = avg_layer['top'] + topnames[layer_id] = bottom + layer_id = layer_id+1 + elif block['type'] == 'region': + if True: + region_layer = OrderedDict() + region_layer['bottom'] = bottom + if block.has_key('name'): + region_layer['top'] = block['name'] + region_layer['name'] = block['name'] + else: + region_layer['top'] = 'layer%d-region' % layer_id + region_layer['name'] = 'layer%d-region' % layer_id + region_layer['type'] = 'Region' + region_param = OrderedDict() + region_param['anchors'] = block['anchors'].strip() + region_param['classes'] = block['classes'] + region_param['num'] = block['num'] + region_layer['region_param'] = region_param + layers.append(region_layer) + bottom = region_layer['top'] + topnames[layer_id] = bottom + layer_id = layer_id + 1 + elif block['type'] == 'route': + route_layer = OrderedDict() + layer_name = str(block['layers']).split(',') + bottom_layer_dim = len(layer_name) + if (bottom_layer_dim == 1): + prev_layer_id = layer_id + int(block['layers']) + bottom = topnames[prev_layer_id] + #topnames[layer_id] = bottom + route_layer['bottom'] = bottom + if (bottom_layer_dim == 2): + layer_name = [layer_id + int(idx) if int(idx) < 0 else int(idx) + 1 for idx in layer_name ] + prev_layer_id1 = int(layer_name[0]) + prev_layer_id2 = int(layer_name[1]) + bottom1 = topnames[prev_layer_id1] + bottom2 = topnames[prev_layer_id2] + route_layer['bottom'] = [bottom1, bottom2] + if 'name' in block.keys(): + route_layer['top'] = block['name'] + route_layer['name'] = block['name'] + else: + route_layer['top'] = 'layer%d-route' % layer_id + route_layer['name'] = 'layer%d-route' % layer_id + route_layer['type'] = 'Concat' + layers.append(route_layer) + bottom = route_layer['top'] + topnames[layer_id] = bottom + layer_id = layer_id + 1 + elif block['type'] == 'upsample': + upsample_layer = OrderedDict() + upsample_layer['bottom'] = bottom + if 'name' in block.keys(): + upsample_layer['top'] = block['name'] + upsample_layer['name'] = block['name'] + else: + upsample_layer['top'] = 'layer%d-upsample' % layer_id + upsample_layer['name'] = 'layer%d-upsample' % layer_id + upsample_layer['type'] = 'Deconvolution' + convolution_param = OrderedDict() + convolution_param['stride'] = block['stride'] + convolution_param['kernel_size'] = 4 + prev_layer_id = layer_id - 4 + convolution_param['num_output'] = num_out + convolution_param['group'] = num_out + convolution_param['pad'] = 1 + weight_filler = OrderedDict() + weight_filler['type'] = 'bilinear' + convolution_param['bias_term'] = 'false' + convolution_param['weight_filler'] = weight_filler + upsample_layer['convolution_param'] = convolution_param + layers.append(upsample_layer) + bottom = upsample_layer['top'] + topnames[layer_id] = bottom + scale = scale /2 + layer_id = layer_id + 1 + elif block['type'] == 'yolo': + + anchor_len = len(block['anchors'].split(','))/2 + for i in block['mask'].split(',') : + mask.append(i) + #bottom_layer_dim = bottom['num_output'] + #print(scale) + #print(anchor_len) + anchors_scale.append(scale) + if len(mask)