Assignmet 4 written part (1. g. and 2. b. remain)

daviddwlee84 · Nov 5, 2019 · 3d8116e · 3d8116e
1 parent 0b6ad5d
commit 3d8116e
Show file tree

Hide file tree

Showing 4 changed files with 426 additions and 6 deletions.
diff --git a/Assignments/a4/written/BLEU_Verify.ipynb b/Assignments/a4/written/BLEU_Verify.ipynb
@@ -0,0 +1,300 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# BLEU Varify"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from nltk.translate.bleu_score import sentence_bleu\n",
+    "import numpy as np\n",
+    "import warnings\n",
+    "warnings.filterwarnings(\"ignore\") # ignore nltk warnings"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "references = ['love can always find a way'.split(),\n",
+    "              'love makes anything possible'.split()]\n",
+    "candidate1 = 'the love can always do'.split()\n",
+    "candidate2 = 'love can make anything possible'.split()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": "0.6\n0.8\n"
+    }
+   ],
+   "source": [
+    "# Candidate1 unigram\n",
+    "score = sentence_bleu(references, candidate1, weights=(1, 0, 0, 0))\n",
+    "print(score)\n",
+    "# Candidate2 unigram\n",
+    "score = sentence_bleu(references, candidate2, weights=(1, 0, 0, 0))\n",
+    "print(score)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": "0.5\n0.5\n"
+    }
+   ],
+   "source": [
+    "# Candidate1 bigram\n",
+    "score = sentence_bleu(references, candidate1, weights=(0, 1, 0, 0))\n",
+    "print(score)\n",
+    "# Candidate2 bigram\n",
+    "score = sentence_bleu(references, candidate2, weights=(0, 1, 0, 0))\n",
+    "print(score)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": "0.5477225575051662\n0.6324555320336759\n"
+    }
+   ],
+   "source": [
+    "# Candidate1 unigram + bigram\n",
+    "score = sentence_bleu(references, candidate1, weights=(0.5, 0.5, 0, 0))\n",
+    "print(score)\n",
+    "# Candidate2 unigram + bigram\n",
+    "score = sentence_bleu(references, candidate2, weights=(0.5, 0.5, 0, 0))\n",
+    "print(score)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "reference1 = ['love can always find a way'.split()]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": "0.49123845184678916\n0.3274923012311928\n"
+    }
+   ],
+   "source": [
+    "# Candidate1 unigram\n",
+    "score = sentence_bleu(reference1, candidate1, weights=(1, 0, 0, 0))\n",
+    "print(score)\n",
+    "# Candidate2 unigram\n",
+    "score = sentence_bleu(reference1, candidate2, weights=(1, 0, 0, 0))\n",
+    "print(score)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": "0.40936537653899097\n0.20468268826949548\n"
+    }
+   ],
+   "source": [
+    "# Candidate1 bigram\n",
+    "score = sentence_bleu(reference1, candidate1, weights=(0, 1, 0, 0))\n",
+    "print(score)\n",
+    "# Candidate2 bigram\n",
+    "score = sentence_bleu(reference1, candidate2, weights=(0, 1, 0, 0))\n",
+    "print(score)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": "0.448437301984003\n0.25890539701513365\n"
+    }
+   ],
+   "source": [
+    "# Candidate1 unigram + bigram\n",
+    "score = sentence_bleu(reference1, candidate1, weights=(0.5, 0.5, 0, 0))\n",
+    "print(score)\n",
+    "# Candidate2 unigram + bigram\n",
+    "score = sentence_bleu(reference1, candidate2, weights=(0.5, 0.5, 0, 0))\n",
+    "print(score)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": "0.5477225575051662"
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "np.exp((0.5 * np.log(0.6) + 0.5 * np.log(0.5)))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": "0.6324555320336759"
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "np.exp((0.5 * np.log(0.8) + 0.5 * np.log(0.5)))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": "0.8187307530779819"
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "np.exp(1 - 6/5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": "0.448437301984003"
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "np.exp(1 - 6/5) * np.exp((0.5 * np.log(0.6) + 0.5 * np.log(0.5)))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": "0.25890539701513365"
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "np.exp(1 - 6/5) * np.exp((0.5 * np.log(0.4) + 0.5 * np.log(0.25)))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.5"
+  },
+  "latex_envs": {
+   "LaTeX_envs_menu_present": true,
+   "autoclose": false,
+   "autocomplete": true,
+   "bibliofile": "biblio.bib",
+   "cite_by": "apalike",
+   "current_citInitial": 1,
+   "eqLabelWithNumbers": true,
+   "eqNumInitial": 1,
+   "hotkeys": {
+    "equation": "Ctrl-E",
+    "itemize": "Ctrl-I"
+   },
+   "labels_anchors": false,
+   "latex_user_defs": false,
+   "report_style_numbering": false,
+   "user_envs_cfg": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/Assignments/a4/written/assignment4.pdf b/Assignments/a4/written/assignment4.pdf