Skip to content

Commit

Permalink
RL Files
Browse files Browse the repository at this point in the history
  • Loading branch information
RonakDedhiya authored Sep 30, 2024
1 parent b6982df commit 575e1d6
Show file tree
Hide file tree
Showing 20 changed files with 351 additions and 0 deletions.
Binary file added Notes/RL/05Nov22.pdf
Binary file not shown.
Binary file added Notes/RL/08Oct22.pdf
Binary file not shown.
Binary file added Notes/RL/10Sep22.pdf
Binary file not shown.
Binary file added Notes/RL/12Nov22.pdf
Binary file not shown.
Binary file added Notes/RL/15Oct22.pdf
Binary file not shown.
Binary file added Notes/RL/17Sep22.pdf
Binary file not shown.
Binary file added Notes/RL/19Nov22.pdf
Binary file not shown.
Binary file added Notes/RL/22Oct22.pdf
Binary file not shown.
Binary file added Notes/RL/24Sep22.pdf
Binary file not shown.
Binary file added Notes/RL/Final exam Solutions.pdf
Binary file not shown.
351 changes: 351 additions & 0 deletions Notes/RL/Gamblers Problem.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,351 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"states = 20\n",
"pf = 0.4\n",
"V = np.zeros(states+1)\n",
"V[states] = 1"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"for i in range(10000):\n",
" for j in range(1,states):\n",
" actions = range(1,min(j,states-j)+1)\n",
" max_val = -1*np.inf\n",
" for a in actions:\n",
" temp = pf*V[j+a] + (1-pf)*V[j-a]\n",
" if temp >= max_val:\n",
" max_val = temp\n",
" V[j] = max_val"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"V"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"policy = -1*np.ones(states+1)\n",
"for j in range(1,states):\n",
" actions = range(1,min(j,states-j)+1)\n",
" max_val = -1*np.inf\n",
" for a in actions:\n",
" temp = pf*V[j+a] + (1-pf)*V[j-a]\n",
" print(\"(\",j,\",\",a,\")\",\":\",temp)\n",
" if temp >= max_val:\n",
" max_val = temp\n",
" policy[j] = a\n",
" print(\"*******************\")\n",
"# V[j] = max_val"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
"plt.plot(range(1,states),V[1:states])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#Monte-Carlo Control\n",
"Q = np.random.random((states+1,states+1))\n",
"returns = {}\n",
"for i in range(10000): #Number of episodes\n",
" done = False\n",
" episode = []\n",
" s = np.random.randint(9) + 1\n",
" while not done:\n",
" if np.random.random() < 0.1:\n",
" a = np.random.randint(min(s,states-s))+1\n",
" else:\n",
" actions = range(1,min(s,states-s)+1)\n",
" a = np.argmax(Q[s,1:min(s,states-s)+1])\n",
" a = actions[a]\n",
" \n",
" if np.random.random() < pf:\n",
" ns = s+a\n",
" else:\n",
" ns = s-a\n",
" \n",
" if ns == states:\n",
" reward = 1\n",
" done = True\n",
" elif ns == 0:\n",
" reward = 0\n",
" done = True\n",
" else:\n",
" reward = 0\n",
" done = False\n",
" \n",
" episode.append((s,a,reward,ns))\n",
" s = ns\n",
" \n",
" total_reward = 0 \n",
" for l in reversed(episode):\n",
"# print(l)\n",
" index = (l[0],l[1])\n",
" if index not in returns.keys():\n",
" returns[index] = []\n",
" total_reward += l[2]\n",
" returns[index].append(total_reward)\n",
" \n",
" for i in list(returns.keys()):\n",
"# print(i[0],i[1])\n",
" Q[i[0],i[1]] = np.average(returns[i])\n",
"# print(Q[i[0],i[1]])\n",
" \n",
" \n",
" \n",
" \n",
" \n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"policy_MC = -1*np.ones(states+1)\n",
"for s in range(1,states):\n",
" actions = range(1,min(s,states-s)+1)\n",
" a = np.argmax(Q[s,1:min(s,states-s)+1])\n",
" policy_MC[s] = actions[a]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"policy_MC[1:10]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#Off-Policy Monte-Carlo Control\n",
"Q = np.zeros((states+1,states+1))\n",
"C = np.zeros((states+1,states+1))\n",
"returns = {}\n",
"for i in range(100000): #Number of episodes\n",
" done = False\n",
" episode = []\n",
" s = np.random.randint(9) + 1\n",
" while not done:\n",
" a = np.random.randint(min(s,states-s))+1 \n",
" if np.random.random() < pf:\n",
" ns = s+a\n",
" else:\n",
" ns = s-a\n",
" \n",
" if ns == states:\n",
" reward = 1\n",
" done = True\n",
" elif ns == 0:\n",
" reward = 0\n",
" done = True\n",
" else:\n",
" reward = 0\n",
" done = False\n",
" \n",
" episode.append((s,a,reward,ns))\n",
" s = ns\n",
" \n",
" total_reward = 0 \n",
" W = 1\n",
" for l in reversed(episode):\n",
"# print(l)\n",
" total_reward += l[2]\n",
" C[l[0],l[1]] += W\n",
" Q[l[0],l[1]] += (W/C[l[0],l[1]])*(total_reward - Q[l[0],l[1]])\n",
" \n",
" actions = range(1,min(l[0],states-l[0])+1)\n",
" a = np.argmax(Q[l[0],1:min(l[0],states-l[0])+1])\n",
" \n",
" if actions[a] != l[1]:\n",
" break\n",
" W = W*(1/(1/len(actions)))\n",
"# print(\"******************\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"policy_MC_off = -1*np.ones(states+1)\n",
"for s in range(1,states):\n",
" actions = range(1,min(s,states-s)+1)\n",
" a = np.argmax(Q[s,1:min(s,states-s)+1])\n",
" policy_MC_off[s] = actions[a]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"policy_MC_off[1:10]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#Q-Learning\n",
"Q = np.zeros((states+1,states+1))\n",
"count = np.zeros((states+1,states+1))\n",
"for i in range(10000): #Number of episodes\n",
" done = False\n",
" s = np.random.randint(9) + 1\n",
" while not done:\n",
" if np.random.random() < 0.2:\n",
" a = np.random.randint(min(s,states-s))+1\n",
" else:\n",
" actions = range(1,min(s,states-s)+1)\n",
" a = np.argmax(Q[s,1:min(s,states-s)+1])\n",
" a = actions[a]\n",
" \n",
" ns = get_next_state(s,a)\n",
" \n",
" if ns == states:\n",
" reward = 1\n",
" done = True\n",
" elif ns == 0:\n",
" reward = 0\n",
" done = True\n",
" else:\n",
" reward = 0\n",
" done = False\n",
" \n",
" if ns == 0 or ns == states:\n",
" next_reward = 0\n",
" else:\n",
" next_reward = np.max(Q[ns,1:min(ns,states-ns)+1])\n",
" \n",
" count[s,a] +=1\n",
" alpha = 1/count[s,a]\n",
" \n",
" Q[s,a] = (1-alpha)*Q[s,a] + alpha*(reward + next_reward)\n",
" \n",
" s = ns\n",
" \n",
" \n",
" \n",
" \n",
" \n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#environment \n",
"def get_next_state(s,a):\n",
" if np.random.random() < pf:\n",
" ns = s+a\n",
" else:\n",
" ns = s-a\n",
" return ns"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"policy_Q = -1*np.ones(states+1)\n",
"for s in range(1,states):\n",
" actions = range(1,min(s,states-s)+1)\n",
" a = np.argmax(Q[s,1:min(s,states-s)+1])\n",
" policy_Q[s] = actions[a]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"policy_Q[1:states]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Binary file added Notes/RL/Midterm-Solutions.pdf
Binary file not shown.
Binary file added Notes/RL/Note 06-Dec-2022.pdf
Binary file not shown.
Binary file added Notes/RL/OnlineMtech_RL_FinalExam_2022.pdf
Binary file not shown.
Binary file added Notes/RL/OnlineMtech_RL_Practice.pdf
Binary file not shown.
Binary file added Notes/RL/Online_RL_Practice_Questions.pdf
Binary file not shown.
Binary file added Notes/RL/Practice Solutions 1.pdf
Binary file not shown.
Binary file added Notes/RL/Practice Solutions 2.pdf
Binary file not shown.
Binary file added Notes/RL/Practice Solutions.pdf
Binary file not shown.
Binary file added Notes/RL/Quiz solutions .pdf
Binary file not shown.

0 comments on commit 575e1d6

Please sign in to comment.