Skip to content

Commit 2231d2b

Browse files
Add files via upload
Below I have added the dataset link "https://www.kaggle.com/shadabhussain/automated-image-captioning-flickr8/data"
0 parents  commit 2231d2b

File tree

5 files changed

+59158
-0
lines changed

5 files changed

+59158
-0
lines changed

Image Captioning with Resnet50.ipynb

Lines changed: 58933 additions & 0 deletions
Large diffs are not rendered by default.

app.py

Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
from flask import Flask, render_template, request
2+
import cv2
3+
from keras.models import load_model
4+
import numpy as np
5+
from keras.applications import ResNet50
6+
from keras.optimizers import Adam
7+
from keras.layers import Dense, Flatten,Input, Convolution2D, Dropout, LSTM, TimeDistributed, Embedding, Bidirectional, Activation, RepeatVector,Concatenate
8+
from keras.models import Sequential, Model
9+
from keras.utils import np_utils
10+
from keras.preprocessing import image, sequence
11+
import cv2
12+
from keras.preprocessing.sequence import pad_sequences
13+
import tensorflow
14+
from tqdm import tqdm
15+
from keras.applications import resnet50
16+
from werkzeug.utils import secure_filename
17+
import os
18+
from tensorflow.compat.v1 import ConfigProto
19+
from tensorflow.compat.v1 import InteractiveSession
20+
21+
config = ConfigProto()
22+
config.gpu_options.per_process_gpu_memory_fraction = 0.5
23+
config.gpu_options.allow_growth = True
24+
session = InteractiveSession(config=config)
25+
26+
vocab = np.load('vocab.npy', allow_pickle=True)
27+
#vocab = np.load('C:\\Users\\Balaji\\Documents\\Machine Learning\\Deep Learning\\Image Captioning\\Flickr_Data\\Flickr_Data\vocab.npy',allow_pickle=True)
28+
vocab = vocab.item()
29+
30+
inv_vocab = {v:k for k,v in vocab.items()}
31+
32+
33+
#print("+"*50)
34+
#print("vocabulary loaded")
35+
36+
37+
embedding_size = 128
38+
vocab_size = len(vocab)
39+
max_len = 40
40+
41+
42+
image_model = Sequential()
43+
44+
image_model.add(Dense(embedding_size, input_shape=(2048,), activation='relu'))
45+
image_model.add(RepeatVector(max_len))
46+
47+
48+
language_model = Sequential()
49+
50+
language_model.add(Embedding(input_dim=vocab_size, output_dim=embedding_size, input_length=max_len))
51+
language_model.add(LSTM(256, return_sequences=True))
52+
language_model.add(TimeDistributed(Dense(embedding_size)))
53+
54+
55+
conca = Concatenate()([image_model.output, language_model.output])
56+
x = LSTM(128, return_sequences=True)(conca)
57+
x = LSTM(512, return_sequences=False)(x)
58+
x = Dense(vocab_size)(x)
59+
out = Activation('softmax')(x)
60+
model = Model(inputs=[image_model.input, language_model.input], outputs = out)
61+
62+
model.compile(loss='categorical_crossentropy', optimizer='RMSprop', metrics=['accuracy'])
63+
64+
model.load_weights('C:\\Users\\Balaji\\Documents\\Machine Learning\\Deep Learning\\Image Captioning\\Flickr_Data\\Flickr_Data\\model.h5')
65+
66+
print("="*150)
67+
print("MODEL LOADED")
68+
69+
resnet = ResNet50(include_top=False,weights='imagenet',input_shape=(224,224,3),pooling='avg')
70+
71+
72+
#resnet = load_model('resnet.h5')
73+
74+
print("="*150)
75+
print("RESNET MODEL LOADED")
76+
77+
78+
79+
80+
app = Flask(__name__)
81+
82+
app.config['SEND_FILE_MAX_AGE_DEFAULT'] = 1
83+
84+
85+
@app.route('/')
86+
def index():
87+
return render_template('index.html')
88+
89+
@app.route('/after', methods=['GET', 'POST'])
90+
def after():
91+
92+
global model, resnet, vocab, inv_vocab
93+
94+
f = request.files['file1']
95+
#'C:\\Users\\Balaji\\Documents\\Machine Learning\Deep Learning\Image Captioning\\Flickr_Data\\Flickr_Data
96+
basepath = "C:\\Users\\Balaji\\Documents\\Machine Learning\Deep Learning\\Image Captioning\\Flickr_Data\\Flickr_Data\\Images"
97+
file_path = os.path.join(basepath, secure_filename(f.filename))
98+
f.save(file_path)
99+
100+
print("="*50)
101+
print("IMAGE SAVED")
102+
103+
104+
105+
image = cv2.imread(file_path)
106+
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
107+
108+
image = cv2.resize(image, (224,224))
109+
110+
image = np.reshape(image, (1,224,224,3))
111+
112+
113+
114+
incept = resnet.predict(image).reshape(1,2048)
115+
116+
print("="*50)
117+
print("Predict Features")
118+
119+
120+
text_in = ['startofseq']
121+
122+
final = ''
123+
124+
print("="*50)
125+
print("GETING Captions")
126+
127+
count = 0
128+
while tqdm(count < 20):
129+
130+
count += 1
131+
132+
encoded = []
133+
for i in text_in:
134+
encoded.append(vocab[i])
135+
136+
padded = pad_sequences([encoded], maxlen=max_len, padding='post', truncating='post').reshape(1,max_len)
137+
138+
sampled_index = np.argmax(model.predict([incept, padded]))
139+
140+
sampled_word = inv_vocab[sampled_index]
141+
142+
if sampled_word != 'endofseq':
143+
final = final + ' ' + sampled_word
144+
145+
text_in.append(sampled_word)
146+
147+
148+
149+
return render_template('after.html', data=final)
150+
151+
if __name__ == "__main__":
152+
app.run(debug=False,threaded=False)
153+
154+

templates/after.html

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
{%extends "base.html"%}
2+
3+
{%block content%}
4+
5+
<center>
6+
7+
<h1 class='display-2'>Predicted Caption </h1>
8+
9+
<br>
10+
<br>
11+
12+
<img src="{{url_for('static', filename='file.jpg')}}" alt="image" height=340px>
13+
14+
<br>
15+
16+
<h2 class='text-white'>
17+
{{data}}
18+
</h2>
19+
20+
</center>
21+
22+
{%endblock%}

templates/base.html

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
<!doctype html>
2+
<html lang="en">
3+
<head>
4+
<!-- Required meta tags -->
5+
<meta charset="utf-8">
6+
<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
7+
8+
<!-- Bootstrap CSS -->
9+
<link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.5.2/css/bootstrap.min.css" integrity="sha384-JcKb8q3iqJ61gNV9KGb8thSsNjpSL0n8PARn9HuZOnIxN0hoP+VmmDGMN5t9UJ0Z" crossorigin="anonymous">
10+
11+
<link rel="stylesheet" href="{{url_for('static', filename='styles.css')}}">
12+
13+
<title>Image Captioning</title>
14+
</head>
15+
<body style="background-color: #b8de6f">
16+
17+
18+
{%block content%}
19+
20+
{%endblock%}
21+
22+
23+
24+
<!-- Optional JavaScript -->
25+
<!-- jQuery first, then Popper.js, then Bootstrap JS -->
26+
<script src="https://code.jquery.com/jquery-3.5.1.slim.min.js" integrity="sha384-DfXdz2htPH0lsSSs5nCTpuj/zy4C+OGpamoFVy38MVBnE+IbbVYUew+OrCXaRkfj" crossorigin="anonymous"></script>
27+
<script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/umd/popper.min.js" integrity="sha384-9/reFTGAW83EW2RDu2S0VKaIzap3H66lZH81PoYlFhbGU+6BZp6G7niu735Sk7lN" crossorigin="anonymous"></script>
28+
<script src="https://stackpath.bootstrapcdn.com/bootstrap/4.5.2/js/bootstrap.min.js" integrity="sha384-B4gt1jrGC7Jh4AgTPSdUtOBvfO8shuf57BaghqFfPlYxofvL8/KUEfYiJOMMV+rV" crossorigin="anonymous"></script>
29+
</body>
30+
</html>

templates/index.html

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
{%extends "base.html"%}
2+
3+
{%block content%}
4+
5+
<center>
6+
7+
<h1 class='display-1'>Automated Image</h1>
8+
<h1 class='display-1'>Captioning</h1>
9+
10+
<br>
11+
<br>
12+
13+
<form action="{{url_for('after')}}" method='POST' enctype='multipart/form-data'>
14+
<input type="file" name='file1'>
15+
<input type="submit" name="btn" value='predict caption' class='btn btn-warning'>
16+
</form>
17+
</center>
18+
19+
{%endblock%}

0 commit comments

Comments
 (0)