wangzhankun
diff --git a/‎.vscode/settings.json
Lines changed: 1 addition & 1 deletion b/‎.vscode/settings.json
Lines changed: 1 addition & 1 deletion
diff --git a/‎README.md
Lines changed: 7 additions & 1 deletion b/‎README.md
Lines changed: 7 additions & 1 deletion
diff --git a/‎nlp-tutorial-master/.gitignore
Lines changed: 1 addition & 0 deletions b/‎nlp-tutorial-master/.gitignore
Lines changed: 1 addition & 0 deletions
diff --git a/‎nlp-tutorial-master/1-1.NNLM/.ipynb_checkpoints/NNLM_Torch-checkpoint.ipynb
Lines changed: 143 additions & 0 deletions b/‎nlp-tutorial-master/1-1.NNLM/.ipynb_checkpoints/NNLM_Torch-checkpoint.ipynb
Lines changed: 143 additions & 0 deletions
diff --git a/‎nlp-tutorial-master/1-1.NNLM/NNLM-Tensor.py
Lines changed: 67 additions & 0 deletions b/‎nlp-tutorial-master/1-1.NNLM/NNLM-Tensor.py
Lines changed: 67 additions & 0 deletions
diff --git a/‎nlp-tutorial-master/1-1.NNLM/NNLM-Torch.py
Lines changed: 82 additions & 0 deletions b/‎nlp-tutorial-master/1-1.NNLM/NNLM-Torch.py
Lines changed: 82 additions & 0 deletions
@@ -1,3 +1,3 @@
 {
-    "python.pythonPath": "C:\\Users\\wangzhankun\\AppData\\Local\\conda\\conda\\envs\\PT-GPU\\python.exe"
+    "python.pythonPath": "C:\\Users\\Administrator\\AppData\\Local\\conda\\conda\\envs\\pt\\python.exe"
 }
@@ -1,2 +1,8 @@
 # python-learn
-This is about my learnning way of python3.
+This is about my learnning way of python3.<br/>
+主要内容是nlp学习。下面贴出一些比较优秀的nlp入门：<br/>
+```
+https://github.com/zergtant/pytorch-handbook
+https://github.com/graykode/nlp-tutorial
+https://github.com/yandexdataschool/nlp_course
+```
@@ -0,0 +1 @@
+.idea
@@ -0,0 +1,143 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 129
+    },
+    "colab_type": "code",
+    "id": "mvlw9p3tPJjr",
+    "outputId": "2a4a8f52-315e-42b3-9d49-e9c7c3358979"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch: 1000 cost = 0.147408\n",
+      "Epoch: 2000 cost = 0.026562\n",
+      "Epoch: 3000 cost = 0.010481\n",
+      "Epoch: 4000 cost = 0.005095\n",
+      "Epoch: 5000 cost = 0.002696\n",
+      "[['i', 'like'], ['i', 'love'], ['i', 'hate']] -> ['dog', 'coffee', 'milk']\n"
+     ]
+    }
+   ],
+   "source": [
+    "# code by Tae Hwan Jung @graykode\n",
+    "import numpy as np\n",
+    "import torch\n",
+    "import torch.nn as nn\n",
+    "import torch.optim as optim\n",
+    "from torch.autograd import Variable\n",
+    "\n",
+    "dtype = torch.FloatTensor\n",
+    "\n",
+    "sentences = [ \"i like dog\", \"i love coffee\", \"i hate milk\"]\n",
+    "\n",
+    "word_list = \" \".join(sentences).split()\n",
+    "word_list = list(set(word_list))\n",
+    "word_dict = {w: i for i, w in enumerate(word_list)}\n",
+    "number_dict = {i: w for i, w in enumerate(word_list)}\n",
+    "n_class = len(word_dict) # number of Vocabulary\n",
+    "\n",
+    "# NNLM Parameter\n",
+    "n_step = 2 # n-1 in paper\n",
+    "n_hidden = 2 # h in paper\n",
+    "m = 2 # m in paper\n",
+    "\n",
+    "def make_batch(sentences):\n",
+    "    input_batch = []\n",
+    "    target_batch = []\n",
+    "\n",
+    "    for sen in sentences:\n",
+    "        word = sen.split()\n",
+    "        input = [word_dict[n] for n in word[:-1]]\n",
+    "        target = word_dict[word[-1]]\n",
+    "\n",
+    "        input_batch.append(input)\n",
+    "        target_batch.append(target)\n",
+    "\n",
+    "    return input_batch, target_batch\n",
+    "\n",
+    "# Model\n",
+    "class NNLM(nn.Module):\n",
+    "    def __init__(self):\n",
+    "        super(NNLM, self).__init__()\n",
+    "        self.C = nn.Embedding(n_class, m)\n",
+    "        self.H = nn.Parameter(torch.randn(n_step * m, n_hidden).type(dtype))\n",
+    "        self.W = nn.Parameter(torch.randn(n_step * m, n_class).type(dtype))\n",
+    "        self.d = nn.Parameter(torch.randn(n_hidden).type(dtype))\n",
+    "        self.U = nn.Parameter(torch.randn(n_hidden, n_class).type(dtype))\n",
+    "        self.b = nn.Parameter(torch.randn(n_class).type(dtype))\n",
+    "\n",
+    "    def forward(self, X):\n",
+    "        X = self.C(X)\n",
+    "        X = X.view(-1, n_step * m) # [batch_size, n_step * n_class]\n",
+    "        tanh = torch.tanh(self.d + torch.mm(X, self.H)) # [batch_size, n_hidden]\n",
+    "        output = self.b + torch.mm(X, self.W) + torch.mm(tanh, self.U) # [batch_size, n_class]\n",
+    "        return output\n",
+    "\n",
+    "model = NNLM()\n",
+    "\n",
+    "criterion = nn.CrossEntropyLoss()\n",
+    "optimizer = optim.Adam(model.parameters(), lr=0.001)\n",
+    "\n",
+    "input_batch, target_batch = make_batch(sentences)\n",
+    "input_batch = Variable(torch.LongTensor(input_batch))\n",
+    "target_batch = Variable(torch.LongTensor(target_batch))\n",
+    "\n",
+    "# Training\n",
+    "for epoch in range(5000):\n",
+    "\n",
+    "    optimizer.zero_grad()\n",
+    "    output = model(input_batch)\n",
+    "\n",
+    "    # output : [batch_size, n_class], target_batch : [batch_size] (LongTensor, not one-hot)\n",
+    "    loss = criterion(output, target_batch)\n",
+    "    if (epoch + 1)%1000 == 0:\n",
+    "        print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))\n",
+    "\n",
+    "    loss.backward()\n",
+    "    optimizer.step()\n",
+    "\n",
+    "# Predict\n",
+    "predict = model(input_batch).data.max(1, keepdim=True)[1]\n",
+    "\n",
+    "# Test\n",
+    "print([sen.split()[:2] for sen in sentences], '->', [number_dict[n.item()] for n in predict.squeeze()])\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "accelerator": "GPU",
+  "colab": {
+   "collapsed_sections": [],
+   "name": "NNLM-Torch.ipynb",
+   "provenance": [],
+   "version": "0.3.2"
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}
@@ -0,0 +1,67 @@
+# code by Tae Hwan Jung @graykode
+import tensorflow as tf
+import numpy as np
+
+tf.reset_default_graph()
+
+sentences = [ "i like dog", "i love coffee", "i hate milk"]
+
+word_list = " ".join(sentences).split()
+word_list = list(set(word_list))
+word_dict = {w: i for i, w in enumerate(word_list)}
+number_dict = {i: w for i, w in enumerate(word_list)}
+n_class = len(word_dict) # number of Vocabulary
+
+# NNLM Parameter
+n_step = 2 # number of steps ['i like', 'i love', 'i hate']
+n_hidden = 2 # number of hidden units
+
+def make_batch(sentences):
+    input_batch = []
+    target_batch = []
+
+    for sen in sentences:
+        word = sen.split()
+        input = [word_dict[n] for n in word[:-1]]
+        target = word_dict[word[-1]]
+
+        input_batch.append(np.eye(n_class)[input])
+        target_batch.append(np.eye(n_class)[target])
+
+    return input_batch, target_batch
+
+# Model
+X = tf.placeholder(tf.float32, [None, n_step, n_class]) # [batch_size, number of steps, number of Vocabulary]
+Y = tf.placeholder(tf.float32, [None, n_class])
+
+input = tf.reshape(X, shape=[-1, n_step * n_class]) # [batch_size, n_step * n_class]
+H = tf.Variable(tf.random_normal([n_step * n_class, n_hidden]))
+d = tf.Variable(tf.random_normal([n_hidden]))
+U = tf.Variable(tf.random_normal([n_hidden, n_class]))
+b = tf.Variable(tf.random_normal([n_class]))
+
+tanh = tf.nn.tanh(d + tf.matmul(input, H)) # [batch_size, n_hidden]
+model = tf.matmul(tanh, U) + b # [batch_size, n_class]
+
+cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=model, labels=Y))
+optimizer = tf.train.AdamOptimizer(0.001).minimize(cost)
+prediction =tf.argmax(model, 1)
+
+# Training
+init = tf.global_variables_initializer()
+sess = tf.Session()
+sess.run(init)
+
+input_batch, target_batch = make_batch(sentences)
+
+for epoch in range(5000):
+    _, loss = sess.run([optimizer, cost], feed_dict={X: input_batch, Y: target_batch})
+    if (epoch + 1)%1000 == 0:
+        print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))
+
+# Predict
+predict =  sess.run([prediction], feed_dict={X: input_batch})
+
+# Test
+input = [sen.split()[:2] for sen in sentences]
+print([sen.split()[:2] for sen in sentences], '->', [number_dict[n] for n in predict[0]])
@@ -0,0 +1,82 @@
+# code by Tae Hwan Jung @graykode
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from torch.autograd import Variable
+
+dtype = torch.FloatTensor
+
+sentences = [ "i like dog", "i love coffee", "i hate milk"]
+
+word_list = " ".join(sentences).split()
+word_list = list(set(word_list))
+word_dict = {w: i for i, w in enumerate(word_list)}
+number_dict = {i: w for i, w in enumerate(word_list)}
+n_class = len(word_dict) # number of Vocabulary
+
+# NNLM Parameter
+n_step = 2 # n-1 in paper
+n_hidden = 2 # h in paper
+m = 2 # m in paper
+
+def make_batch(sentences):
+    input_batch = []
+    target_batch = []
+
+    for sen in sentences:
+        word = sen.split()
+        input = [word_dict[n] for n in word[:-1]]
+        target = word_dict[word[-1]]
+
+        input_batch.append(input)
+        target_batch.append(target)
+
+    return input_batch, target_batch
+
+# Model
+class NNLM(nn.Module):
+    def __init__(self):
+        super(NNLM, self).__init__()
+        self.C = nn.Embedding(n_class, m)
+        self.H = nn.Parameter(torch.randn(n_step * m, n_hidden).type(dtype))
+        self.W = nn.Parameter(torch.randn(n_step * m, n_class).type(dtype))
+        self.d = nn.Parameter(torch.randn(n_hidden).type(dtype))
+        self.U = nn.Parameter(torch.randn(n_hidden, n_class).type(dtype))
+        self.b = nn.Parameter(torch.randn(n_class).type(dtype))
+
+    def forward(self, X):
+        X = self.C(X)
+        X = X.view(-1, n_step * m) # [batch_size, n_step * n_class]
+        tanh = torch.tanh(self.d + torch.mm(X, self.H)) # [batch_size, n_hidden]
+        output = self.b + torch.mm(X, self.W) + torch.mm(tanh, self.U) # [batch_size, n_class]
+        return output
+
+model = NNLM()
+
+criterion = nn.CrossEntropyLoss()
+optimizer = optim.Adam(model.parameters(), lr=0.001)
+
+input_batch, target_batch = make_batch(sentences)
+input_batch = Variable(torch.LongTensor(input_batch))
+target_batch = Variable(torch.LongTensor(target_batch))
+
+# Training
+for epoch in range(5000):
+
+    optimizer.zero_grad()
+    output = model(input_batch)
+
+    # output : [batch_size, n_class], target_batch : [batch_size] (LongTensor, not one-hot)
+    loss = criterion(output, target_batch)
+    if (epoch + 1)%1000 == 0:
+        print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))
+
+    loss.backward()
+    optimizer.step()
+
+# Predict
+predict = model(input_batch).data.max(1, keepdim=True)[1]
+
+# Test
+print([sen.split()[:2] for sen in sentences], '->', [number_dict[n.item()] for n in predict.squeeze()])
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,3 @@`
`1`	`1`	`{`
`2`		`- "python.pythonPath": "C:\\Users\\wangzhankun\\AppData\\Local\\conda\\conda\\envs\\PT-GPU\\python.exe"`
	`2`	`+ "python.pythonPath": "C:\\Users\\Administrator\\AppData\\Local\\conda\\conda\\envs\\pt\\python.exe"`
`3`	`3`	`}`