From 3b7f1fe2705d86f916eb374dd4e16b486a1346d6 Mon Sep 17 00:00:00 2001 From: Ruotian Luo Date: Fri, 5 May 2017 01:18:08 -0500 Subject: [PATCH] Remove the original coco preprocess. Fix a bug in train_rl_tb.py --- coco/coco_preprocess.ipynb | 188 ------------------------------------- train_rl_tb.py | 1 + 2 files changed, 1 insertion(+), 188 deletions(-) delete mode 100644 coco/coco_preprocess.ipynb diff --git a/coco/coco_preprocess.ipynb b/coco/coco_preprocess.ipynb deleted file mode 100644 index 18a39cc2..00000000 --- a/coco/coco_preprocess.ipynb +++ /dev/null @@ -1,188 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# COCO data preprocessing\n", - "\n", - "This code will download the caption anotations for coco and preprocess them into an hdf5 file and a json file. \n", - "\n", - "These will then be read by the COCO data loader in Lua and trained on." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "data": { - "text/plain": [ - "0" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# lets download the annotations from http://mscoco.org/dataset/#download\n", - "import os\n", - "os.system('wget http://msvocds.blob.core.windows.net/annotations-1-0-3/captions_train-val2014.zip') # ~19MB" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "data": { - "text/plain": [ - "0" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "os.system('unzip captions_train-val2014.zip')" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "import json\n", - "val = json.load(open('annotations/captions_val2014.json', 'r'))\n", - "train = json.load(open('annotations/captions_train2014.json', 'r'))" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[u'info', u'images', u'licenses', u'annotations']\n", - "{u'description': u'This is stable 1.0 version of the 2014 MS COCO dataset.', u'url': u'http://mscoco.org', u'version': u'1.0', u'year': 2014, u'contributor': u'Microsoft COCO group', u'date_created': u'2015-01-27 09:11:52.357475'}\n", - "40504\n", - "202654\n", - "{u'license': 3, u'file_name': u'COCO_val2014_000000391895.jpg', u'coco_url': u'http://mscoco.org/images/391895', u'height': 360, u'width': 640, u'date_captured': u'2013-11-14 11:18:45', u'flickr_url': u'http://farm9.staticflickr.com/8186/8119368305_4e622c8349_z.jpg', u'id': 391895}\n", - "{u'image_id': 203564, u'id': 37, u'caption': u'A bicycle replica with a clock as the front wheel.'}\n" - ] - } - ], - "source": [ - "print val.keys()\n", - "print val['info']\n", - "print len(val['images'])\n", - "print len(val['annotations'])\n", - "print val['images'][0]\n", - "print val['annotations'][0]" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "import json\n", - "import os\n", - "\n", - "# combine all images and annotations together\n", - "imgs = val['images'] + train['images']\n", - "annots = val['annotations'] + train['annotations']\n", - "\n", - "# for efficiency lets group annotations by image\n", - "itoa = {}\n", - "for a in annots:\n", - " imgid = a['image_id']\n", - " if not imgid in itoa: itoa[imgid] = []\n", - " itoa[imgid].append(a)\n", - "\n", - "# create the json blob\n", - "out = []\n", - "for i,img in enumerate(imgs):\n", - " imgid = img['id']\n", - " \n", - " # coco specific here, they store train/val images separately\n", - " loc = 'train2014' if 'train' in img['file_name'] else 'val2014'\n", - " \n", - " jimg = {}\n", - " jimg['file_path'] = os.path.join(loc, img['file_name'])\n", - " jimg['id'] = imgid\n", - " \n", - " sents = []\n", - " annotsi = itoa[imgid]\n", - " for a in annotsi:\n", - " sents.append(a['caption'])\n", - " jimg['captions'] = sents\n", - " out.append(jimg)\n", - " \n", - "json.dump(out, open('coco_raw.json', 'w'))" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'captions': [u'A man with a red helmet on a small moped on a dirt road. ', u'Man riding a motor bike on a dirt road on the countryside.', u'A man riding on the back of a motorcycle.', u'A dirt path with a young person on a motor bike rests to the foreground of a verdant area with a bridge and a background of cloud-wreathed mountains. ', u'A man in a red shirt and a red hat is on a motorcycle on a hill side.'], 'file_path': u'val2014/COCO_val2014_000000391895.jpg', 'id': 391895}\n" - ] - } - ], - "source": [ - "# lets see what they look like\n", - "print out[0]" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 2", - "language": "python", - "name": "python2" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 2 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.6" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} diff --git a/train_rl_tb.py b/train_rl_tb.py index 51493977..52d35186 100644 --- a/train_rl_tb.py +++ b/train_rl_tb.py @@ -65,6 +65,7 @@ def train(opt): # Assure in training mode model.train() + crit = utils.LanguageModelCriterion() rl_crit = utils.RewardCriterion() optimizer = optim.Adam(model.parameters(), lr=opt.learning_rate, weight_decay=opt.weight_decay)