-
Notifications
You must be signed in to change notification settings - Fork 43
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
new file: 02_1_simple-RNN-diffrent-sequence-length.ipynb new file: 02_2_simple-RNN-diffrent-sequence-length.ipynb new file: 03_1_Cryptocurrency-predicting.ipynb new file: 03_2_Cryptocurrency-predicting.ipynb new file: 04_simple-CNN-LSTM.ipynb new file: 05-1-video-action-recognition-train-extract-features-with-cnn.ipynb new file: 05-2_video-action-recognition-train-rnn.ipynb new file: 06_analogy-using-embeddings.ipynb new file: 07_text-classification-Emojify.ipynb new file: 08_shahnameh-text-generation-language-model.ipynb new file: 09_add-numbers-with-seq2seq.ipynb new file: 10_Neural-machine-translation-with-attention-for-date-convert.ipynb new file: 11_nmt-with-attention.ipynb new file: 12_image-captioning-with-attention.ipynb new file: TimeDistributed.ipynb new file: final_cnn_lstm.ipynb new file: images/attn_mechanism.png new file: images/attn_model.png new file: images/cosine_sim.png new file: images/data_set.png new file: images/date_attention.png new file: images/date_attention2.png new file: images/embedding1.png new file: images/emojifier-v2.png new file: images/image_1.png new file: images/poorly_trained_model.png new file: images/table.png new file: logo.png new file: nmt_utils.py
- Loading branch information
1 parent
403bd9f
commit 341ef3e
Showing
30 changed files
with
13,675 additions
and
0 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
326 changes: 326 additions & 0 deletions
326
05-1-video-action-recognition-train-extract-features-with-cnn.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,326 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"<center><div style=\"direction:rtl;font-family:B Lotus, B Nazanin, Tahoma\">به نام خدا</div></center>\n", | ||
"<img src=\"./logo.png\" alt=\"class.vision\" style=\"width: 200px;\"/>\n", | ||
"<h1><center><div style=\"direction:rtl;font-family:B Lotus, B Nazanin, Tahoma\">طبقه بندی ویدیو با شبکههای بازگشتی - استخراج ویژگی</div></center></h1>" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## <div style=\"direction:rtl;text-align:right;font-family:B Lotus, B Nazanin, Tahoma\">مجموعه داده</div>\n", | ||
"\n", | ||
"\n", | ||
"<div style=\"direction:rtl;text-align:right;font-family:Tahoma\">\n", | ||
"قبلا 6 کلاس از دیتاست UCF-101 را به عنوان نمونه انتخاب و فریمهای ویدیوهای متعلق به این 6 کلاس از این مجموعه داده را استخراج کرده ایم و اطلاعات هر ویدیو نظیر اسم - کلاس و تعداد فریم را در یک فایل متنی قرار داده ایم.\n", | ||
"<br/>\n", | ||
" \n", | ||
"این 6 کلاس که برای این آموزش آماده شده است را از اینجا دانلود کنید: \n", | ||
"</div>\n", | ||
"\n", | ||
"http://dataset.class.vision/rnn/RNN-Video-6action.zip\n", | ||
"\n", | ||
"<br/>\n", | ||
"<div style=\"direction:rtl;text-align:right;font-family:Tahoma\">\n", | ||
" همچنین\n", | ||
" دیتاست اصلی شامل 101 کلاس مختلف را میتوانید از لینک زیر دانلود کنید:\n", | ||
"</div>\n", | ||
"\n", | ||
"<strong>UCF-101</strong>\n", | ||
"[https://www.crcv.ucf.edu/data/UCF101.php](https://www.crcv.ucf.edu/data/UCF101.php)\n", | ||
"\n", | ||
"\n", | ||
"\n", | ||
"\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stderr", | ||
"output_type": "stream", | ||
"text": [ | ||
"Using TensorFlow backend.\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"from keras.preprocessing import image\n", | ||
"from keras.applications.inception_v3 import InceptionV3, preprocess_input\n", | ||
"from keras.models import Model, load_model\n", | ||
"from keras.layers import Input\n", | ||
"import numpy as np\n", | ||
"import os.path\n", | ||
"from tqdm import tqdm\n", | ||
"import csv\n", | ||
"import random\n", | ||
"import glob\n", | ||
"import os.path\n", | ||
"import sys\n", | ||
"import operator\n", | ||
"import threading\n", | ||
"from keras.utils import to_categorical\n", | ||
"from keras.preprocessing.image import img_to_array, load_img" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"seq_length= 40\n", | ||
"max_frames = 300\n", | ||
"image_shape=(224, 224, 3)\n", | ||
"base_path = \"D:/dataset/RNN-Video\"\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"with open(os.path.join('D:/dataset/RNN-Video/data_file_5class.csv'), 'r') as fin:\n", | ||
" reader = csv.reader(fin)\n", | ||
" data = list(reader)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 4, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"['CricketBowling',\n", | ||
" 'CricketShot',\n", | ||
" 'FieldHockeyPenalty',\n", | ||
" 'HandstandPushups',\n", | ||
" 'HandstandWalking',\n", | ||
" 'SoccerPenalty']" | ||
] | ||
}, | ||
"execution_count": 4, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"train_path = os.path.join(base_path, 'train')\n", | ||
"classes =os.listdir(train_path)\n", | ||
"classes = sorted(classes)\n", | ||
"classes" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"<div style=\"direction:rtl;text-align:right;font-family:Tahoma\">\n", | ||
" در اینجا آن ویدیوهایی که حداقل 40 فریم و حداکثر 300 فریم دارند را لود میکنیم.\n", | ||
"</div>" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 6, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"data_clean = []\n", | ||
"for item in data:\n", | ||
" if int(item[3]) >= seq_length and int(item[3]) <= max_frames:\n", | ||
" data_clean.append(item)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 7, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"439" | ||
] | ||
}, | ||
"execution_count": 7, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"len(data_clean)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 8, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"def get_n_sample_from_video(sample, seq_length):\n", | ||
" path = os.path.join(base_path, sample[0], sample[1])\n", | ||
" filename = sample[2]\n", | ||
" images = sorted(glob.glob(os.path.join(path, filename + '*jpg')))\n", | ||
"\n", | ||
" #Given a list and a size, return a rescaled/samples list. For example,\n", | ||
" #if we want a list of size 5 and we have a list of size 25, return a new\n", | ||
" #list of size five which is every 5th element of the origina list.\n", | ||
" # Get the number to skip between iterations.\n", | ||
" skip = len(images) // seq_length\n", | ||
"\n", | ||
" # Build our new output.\n", | ||
" output = [images[i] for i in range(0, len(images), skip)]\n", | ||
"\n", | ||
" # Cut off the last one if needed.\n", | ||
" return output[:seq_length]" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 9, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"['train', 'HandstandWalking', 'v_HandstandWalking_g24_c06', '151']" | ||
] | ||
}, | ||
"execution_count": 9, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"data_clean[3]" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 10, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"40" | ||
] | ||
}, | ||
"execution_count": 10, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"len(get_n_sample_from_video(data_clean[3], 40))" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 11, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Get model with pretrained weights.\n", | ||
"base_model = InceptionV3(weights='imagenet', include_top=True)\n", | ||
"\n", | ||
"# We'll extract features at the final pool layer.\n", | ||
"model = Model(inputs=base_model.input,\n", | ||
" outputs=base_model.get_layer('avg_pool').output)\n", | ||
"\n", | ||
"def model_predict(image_path):\n", | ||
" img = image.load_img(image_path, target_size=(299, 299))\n", | ||
" x = image.img_to_array(img)\n", | ||
" x = np.expand_dims(x, axis=0)\n", | ||
" x = preprocess_input(x)\n", | ||
"\n", | ||
" # Get the prediction.\n", | ||
" features = model.predict(x)\n", | ||
" return features[0]" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 12, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stderr", | ||
"output_type": "stream", | ||
"text": [ | ||
"100%|████████████████████████████████████████████████████████████████████████████████| 439/439 [18:31<00:00, 2.85s/it]\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"os.makedirs('sequences', exist_ok=True)\n", | ||
"for video in tqdm(data_clean):\n", | ||
"\n", | ||
" # Get the path to the sequence for this video.\n", | ||
" path = os.path.join('sequences', video[2] + '-' + str(seq_length) + \\\n", | ||
" '-features') # numpy will auto-append .npy\n", | ||
"\n", | ||
" # Check if we already have it.\n", | ||
" if os.path.isfile(path + '.npy'):\n", | ||
" continue\n", | ||
"\n", | ||
" # Get the frames for this video.\n", | ||
" frames = get_n_sample_from_video(video, seq_length)\n", | ||
"\n", | ||
" # Now loop through and extract features to build the sequence.\n", | ||
" sequence = []\n", | ||
" for frame in frames:\n", | ||
" features = model_predict(frame)\n", | ||
" sequence.append(features)\n", | ||
"\n", | ||
" # Save the sequence.\n", | ||
" np.save(path, sequence)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"<div class=\"alert alert-block alert-info\">\n", | ||
"<div style=\"direction:rtl;text-align:right;font-family:B Lotus, B Nazanin, Tahoma\"> دوره پیشرفته یادگیری عمیق<br>علیرضا اخوان پور<br> آبان و آذر 1399<br>\n", | ||
"</div>\n", | ||
"<a href=\"http://class.vision\">Class.Vision</a> - <a href=\"http://AkhavanPour.ir\">AkhavanPour.ir</a> - <a href=\"https://github.com/Alireza-Akhavan/\">GitHub</a>\n", | ||
"\n", | ||
"</div>" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "tensorflow", | ||
"language": "python", | ||
"name": "tensorflow" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.6.8" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
Oops, something went wrong.