Skip to content

Commit 7e8a994

Browse files
committed
update
Former-commit-id: 7b6acdae12580edc42944a28947eab434e5a7f2f
1 parent ce73222 commit 7e8a994

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

57 files changed

+7288
-2
lines changed

.vscode/settings.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
{
2-
"python.pythonPath": "C:\\Users\\wangzhankun\\AppData\\Local\\conda\\conda\\envs\\PT-GPU\\python.exe"
2+
"python.pythonPath": "C:\\Users\\Administrator\\AppData\\Local\\conda\\conda\\envs\\pt\\python.exe"
33
}

README.md

+7-1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,8 @@
11
# python-learn
2-
This is about my learnning way of python3.
2+
This is about my learnning way of python3.<br/>
3+
主要内容是nlp学习。下面贴出一些比较优秀的nlp入门:<br/>
4+
```
5+
https://github.com/zergtant/pytorch-handbook
6+
https://github.com/graykode/nlp-tutorial
7+
https://github.com/yandexdataschool/nlp_course
8+
```

nlp-tutorial-master/.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
.idea
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 1,
6+
"metadata": {
7+
"colab": {
8+
"base_uri": "https://localhost:8080/",
9+
"height": 129
10+
},
11+
"colab_type": "code",
12+
"id": "mvlw9p3tPJjr",
13+
"outputId": "2a4a8f52-315e-42b3-9d49-e9c7c3358979"
14+
},
15+
"outputs": [
16+
{
17+
"name": "stdout",
18+
"output_type": "stream",
19+
"text": [
20+
"Epoch: 1000 cost = 0.147408\n",
21+
"Epoch: 2000 cost = 0.026562\n",
22+
"Epoch: 3000 cost = 0.010481\n",
23+
"Epoch: 4000 cost = 0.005095\n",
24+
"Epoch: 5000 cost = 0.002696\n",
25+
"[['i', 'like'], ['i', 'love'], ['i', 'hate']] -> ['dog', 'coffee', 'milk']\n"
26+
]
27+
}
28+
],
29+
"source": [
30+
"# code by Tae Hwan Jung @graykode\n",
31+
"import numpy as np\n",
32+
"import torch\n",
33+
"import torch.nn as nn\n",
34+
"import torch.optim as optim\n",
35+
"from torch.autograd import Variable\n",
36+
"\n",
37+
"dtype = torch.FloatTensor\n",
38+
"\n",
39+
"sentences = [ \"i like dog\", \"i love coffee\", \"i hate milk\"]\n",
40+
"\n",
41+
"word_list = \" \".join(sentences).split()\n",
42+
"word_list = list(set(word_list))\n",
43+
"word_dict = {w: i for i, w in enumerate(word_list)}\n",
44+
"number_dict = {i: w for i, w in enumerate(word_list)}\n",
45+
"n_class = len(word_dict) # number of Vocabulary\n",
46+
"\n",
47+
"# NNLM Parameter\n",
48+
"n_step = 2 # n-1 in paper\n",
49+
"n_hidden = 2 # h in paper\n",
50+
"m = 2 # m in paper\n",
51+
"\n",
52+
"def make_batch(sentences):\n",
53+
" input_batch = []\n",
54+
" target_batch = []\n",
55+
"\n",
56+
" for sen in sentences:\n",
57+
" word = sen.split()\n",
58+
" input = [word_dict[n] for n in word[:-1]]\n",
59+
" target = word_dict[word[-1]]\n",
60+
"\n",
61+
" input_batch.append(input)\n",
62+
" target_batch.append(target)\n",
63+
"\n",
64+
" return input_batch, target_batch\n",
65+
"\n",
66+
"# Model\n",
67+
"class NNLM(nn.Module):\n",
68+
" def __init__(self):\n",
69+
" super(NNLM, self).__init__()\n",
70+
" self.C = nn.Embedding(n_class, m)\n",
71+
" self.H = nn.Parameter(torch.randn(n_step * m, n_hidden).type(dtype))\n",
72+
" self.W = nn.Parameter(torch.randn(n_step * m, n_class).type(dtype))\n",
73+
" self.d = nn.Parameter(torch.randn(n_hidden).type(dtype))\n",
74+
" self.U = nn.Parameter(torch.randn(n_hidden, n_class).type(dtype))\n",
75+
" self.b = nn.Parameter(torch.randn(n_class).type(dtype))\n",
76+
"\n",
77+
" def forward(self, X):\n",
78+
" X = self.C(X)\n",
79+
" X = X.view(-1, n_step * m) # [batch_size, n_step * n_class]\n",
80+
" tanh = torch.tanh(self.d + torch.mm(X, self.H)) # [batch_size, n_hidden]\n",
81+
" output = self.b + torch.mm(X, self.W) + torch.mm(tanh, self.U) # [batch_size, n_class]\n",
82+
" return output\n",
83+
"\n",
84+
"model = NNLM()\n",
85+
"\n",
86+
"criterion = nn.CrossEntropyLoss()\n",
87+
"optimizer = optim.Adam(model.parameters(), lr=0.001)\n",
88+
"\n",
89+
"input_batch, target_batch = make_batch(sentences)\n",
90+
"input_batch = Variable(torch.LongTensor(input_batch))\n",
91+
"target_batch = Variable(torch.LongTensor(target_batch))\n",
92+
"\n",
93+
"# Training\n",
94+
"for epoch in range(5000):\n",
95+
"\n",
96+
" optimizer.zero_grad()\n",
97+
" output = model(input_batch)\n",
98+
"\n",
99+
" # output : [batch_size, n_class], target_batch : [batch_size] (LongTensor, not one-hot)\n",
100+
" loss = criterion(output, target_batch)\n",
101+
" if (epoch + 1)%1000 == 0:\n",
102+
" print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\n",
103+
"\n",
104+
" loss.backward()\n",
105+
" optimizer.step()\n",
106+
"\n",
107+
"# Predict\n",
108+
"predict = model(input_batch).data.max(1, keepdim=True)[1]\n",
109+
"\n",
110+
"# Test\n",
111+
"print([sen.split()[:2] for sen in sentences], '->', [number_dict[n.item()] for n in predict.squeeze()])\n"
112+
]
113+
}
114+
],
115+
"metadata": {
116+
"accelerator": "GPU",
117+
"colab": {
118+
"collapsed_sections": [],
119+
"name": "NNLM-Torch.ipynb",
120+
"provenance": [],
121+
"version": "0.3.2"
122+
},
123+
"kernelspec": {
124+
"display_name": "Python 3",
125+
"language": "python",
126+
"name": "python3"
127+
},
128+
"language_info": {
129+
"codemirror_mode": {
130+
"name": "ipython",
131+
"version": 3
132+
},
133+
"file_extension": ".py",
134+
"mimetype": "text/x-python",
135+
"name": "python",
136+
"nbconvert_exporter": "python",
137+
"pygments_lexer": "ipython3",
138+
"version": "3.7.0"
139+
}
140+
},
141+
"nbformat": 4,
142+
"nbformat_minor": 1
143+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
# code by Tae Hwan Jung @graykode
2+
import tensorflow as tf
3+
import numpy as np
4+
5+
tf.reset_default_graph()
6+
7+
sentences = [ "i like dog", "i love coffee", "i hate milk"]
8+
9+
word_list = " ".join(sentences).split()
10+
word_list = list(set(word_list))
11+
word_dict = {w: i for i, w in enumerate(word_list)}
12+
number_dict = {i: w for i, w in enumerate(word_list)}
13+
n_class = len(word_dict) # number of Vocabulary
14+
15+
# NNLM Parameter
16+
n_step = 2 # number of steps ['i like', 'i love', 'i hate']
17+
n_hidden = 2 # number of hidden units
18+
19+
def make_batch(sentences):
20+
input_batch = []
21+
target_batch = []
22+
23+
for sen in sentences:
24+
word = sen.split()
25+
input = [word_dict[n] for n in word[:-1]]
26+
target = word_dict[word[-1]]
27+
28+
input_batch.append(np.eye(n_class)[input])
29+
target_batch.append(np.eye(n_class)[target])
30+
31+
return input_batch, target_batch
32+
33+
# Model
34+
X = tf.placeholder(tf.float32, [None, n_step, n_class]) # [batch_size, number of steps, number of Vocabulary]
35+
Y = tf.placeholder(tf.float32, [None, n_class])
36+
37+
input = tf.reshape(X, shape=[-1, n_step * n_class]) # [batch_size, n_step * n_class]
38+
H = tf.Variable(tf.random_normal([n_step * n_class, n_hidden]))
39+
d = tf.Variable(tf.random_normal([n_hidden]))
40+
U = tf.Variable(tf.random_normal([n_hidden, n_class]))
41+
b = tf.Variable(tf.random_normal([n_class]))
42+
43+
tanh = tf.nn.tanh(d + tf.matmul(input, H)) # [batch_size, n_hidden]
44+
model = tf.matmul(tanh, U) + b # [batch_size, n_class]
45+
46+
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=model, labels=Y))
47+
optimizer = tf.train.AdamOptimizer(0.001).minimize(cost)
48+
prediction =tf.argmax(model, 1)
49+
50+
# Training
51+
init = tf.global_variables_initializer()
52+
sess = tf.Session()
53+
sess.run(init)
54+
55+
input_batch, target_batch = make_batch(sentences)
56+
57+
for epoch in range(5000):
58+
_, loss = sess.run([optimizer, cost], feed_dict={X: input_batch, Y: target_batch})
59+
if (epoch + 1)%1000 == 0:
60+
print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))
61+
62+
# Predict
63+
predict = sess.run([prediction], feed_dict={X: input_batch})
64+
65+
# Test
66+
input = [sen.split()[:2] for sen in sentences]
67+
print([sen.split()[:2] for sen in sentences], '->', [number_dict[n] for n in predict[0]])
+82
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
# code by Tae Hwan Jung @graykode
2+
import numpy as np
3+
import torch
4+
import torch.nn as nn
5+
import torch.optim as optim
6+
from torch.autograd import Variable
7+
8+
dtype = torch.FloatTensor
9+
10+
sentences = [ "i like dog", "i love coffee", "i hate milk"]
11+
12+
word_list = " ".join(sentences).split()
13+
word_list = list(set(word_list))
14+
word_dict = {w: i for i, w in enumerate(word_list)}
15+
number_dict = {i: w for i, w in enumerate(word_list)}
16+
n_class = len(word_dict) # number of Vocabulary
17+
18+
# NNLM Parameter
19+
n_step = 2 # n-1 in paper
20+
n_hidden = 2 # h in paper
21+
m = 2 # m in paper
22+
23+
def make_batch(sentences):
24+
input_batch = []
25+
target_batch = []
26+
27+
for sen in sentences:
28+
word = sen.split()
29+
input = [word_dict[n] for n in word[:-1]]
30+
target = word_dict[word[-1]]
31+
32+
input_batch.append(input)
33+
target_batch.append(target)
34+
35+
return input_batch, target_batch
36+
37+
# Model
38+
class NNLM(nn.Module):
39+
def __init__(self):
40+
super(NNLM, self).__init__()
41+
self.C = nn.Embedding(n_class, m)
42+
self.H = nn.Parameter(torch.randn(n_step * m, n_hidden).type(dtype))
43+
self.W = nn.Parameter(torch.randn(n_step * m, n_class).type(dtype))
44+
self.d = nn.Parameter(torch.randn(n_hidden).type(dtype))
45+
self.U = nn.Parameter(torch.randn(n_hidden, n_class).type(dtype))
46+
self.b = nn.Parameter(torch.randn(n_class).type(dtype))
47+
48+
def forward(self, X):
49+
X = self.C(X)
50+
X = X.view(-1, n_step * m) # [batch_size, n_step * n_class]
51+
tanh = torch.tanh(self.d + torch.mm(X, self.H)) # [batch_size, n_hidden]
52+
output = self.b + torch.mm(X, self.W) + torch.mm(tanh, self.U) # [batch_size, n_class]
53+
return output
54+
55+
model = NNLM()
56+
57+
criterion = nn.CrossEntropyLoss()
58+
optimizer = optim.Adam(model.parameters(), lr=0.001)
59+
60+
input_batch, target_batch = make_batch(sentences)
61+
input_batch = Variable(torch.LongTensor(input_batch))
62+
target_batch = Variable(torch.LongTensor(target_batch))
63+
64+
# Training
65+
for epoch in range(5000):
66+
67+
optimizer.zero_grad()
68+
output = model(input_batch)
69+
70+
# output : [batch_size, n_class], target_batch : [batch_size] (LongTensor, not one-hot)
71+
loss = criterion(output, target_batch)
72+
if (epoch + 1)%1000 == 0:
73+
print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))
74+
75+
loss.backward()
76+
optimizer.step()
77+
78+
# Predict
79+
predict = model(input_batch).data.max(1, keepdim=True)[1]
80+
81+
# Test
82+
print([sen.split()[:2] for sen in sentences], '->', [number_dict[n.item()] for n in predict.squeeze()])

0 commit comments

Comments
 (0)