From 74e5d1abe0084d62a9d26c5b7f80207d40e459bb Mon Sep 17 00:00:00 2001 From: Prashant Singh Rana Date: Wed, 19 Jun 2024 09:10:05 +0530 Subject: [PATCH] Created using Colab --- Linear_Model.ipynb | 447 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 447 insertions(+) create mode 100644 Linear_Model.ipynb diff --git a/Linear_Model.ipynb b/Linear_Model.ipynb new file mode 100644 index 0000000..5e78939 --- /dev/null +++ b/Linear_Model.ipynb @@ -0,0 +1,447 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "authorship_tag": "ABX9TyNKMPDP85TxLBcFdUQmoEqD", + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "source": [ + "**1. Import Libraries**" + ], + "metadata": { + "id": "R7_OeMy1Q8eE" + } + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "iX51Kn3CQ5Hg" + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "\n", + "from sklearn.linear_model import LinearRegression\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.metrics import mean_squared_error, r2_score" + ] + }, + { + "cell_type": "markdown", + "source": [ + "**2. Creating random dataset**" + ], + "metadata": { + "id": "Ixt1BJl4RPC-" + } + }, + { + "cell_type": "code", + "source": [ + "X = np.random.rand(20).reshape(-1,1)\n", + "Y = np.random.rand(20)\n", + "\n", + "print(\"X = \", X)\n", + "print(\"\\nY = \", Y)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "N25KYrILRSnL", + "outputId": "919a6186-f515-4ab8-d1bb-d148e5fa3066" + }, + "execution_count": 2, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "X = [[0.78353344]\n", + " [0.99928113]\n", + " [0.72295665]\n", + " [0.31040895]\n", + " [0.28749016]\n", + " [0.37163667]\n", + " [0.98226679]\n", + " [0.50707929]\n", + " [0.47808077]\n", + " [0.51092768]\n", + " [0.28925052]\n", + " [0.48766622]\n", + " [0.30722435]\n", + " [0.93064507]\n", + " [0.91562345]\n", + " [0.5489895 ]\n", + " [0.91544885]\n", + " [0.49987293]\n", + " [0.40493747]\n", + " [0.14684876]]\n", + "\n", + "Y = [0.48674845 0.01009211 0.03883565 0.64845882 0.1020579 0.86302998\n", + " 0.42414442 0.15472004 0.6642622 0.64368917 0.79475244 0.74610127\n", + " 0.25411568 0.41302081 0.30833708 0.75945603 0.49482498 0.10741174\n", + " 0.76733307 0.91431607]\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "**3. Splitting data into train and test dataset**" + ], + "metadata": { + "id": "XuJIYpOWRdm2" + } + }, + { + "cell_type": "code", + "source": [ + "X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.3, random_state = 20 )\n", + "\n", + "print(\"X_train\",X_train)\n", + "\n", + "print(\"\\nX_test\",X_test)\n", + "\n", + "print(\"\\nY_train\",Y_train)\n", + "\n", + "print(\"\\nY_test\",Y_test)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "W5Sj-Y8_Rdm2", + "outputId": "db4a29e3-db28-45a4-b182-fca0d6af8060" + }, + "execution_count": 3, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "X_train [[0.49987293]\n", + " [0.30722435]\n", + " [0.14684876]\n", + " [0.37163667]\n", + " [0.47808077]\n", + " [0.40493747]\n", + " [0.78353344]\n", + " [0.72295665]\n", + " [0.50707929]\n", + " [0.98226679]\n", + " [0.48766622]\n", + " [0.51092768]\n", + " [0.5489895 ]\n", + " [0.31040895]]\n", + "\n", + "X_test [[0.91544885]\n", + " [0.93064507]\n", + " [0.28925052]\n", + " [0.28749016]\n", + " [0.99928113]\n", + " [0.91562345]]\n", + "\n", + "Y_train [0.10741174 0.25411568 0.91431607 0.86302998 0.6642622 0.76733307\n", + " 0.48674845 0.03883565 0.15472004 0.42414442 0.74610127 0.64368917\n", + " 0.75945603 0.64845882]\n", + "\n", + "Y_test [0.49482498 0.41302081 0.79475244 0.1020579 0.01009211 0.30833708]\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "**4. Creating an instance for the linear regression model**\n" + ], + "metadata": { + "id": "R9azRaIsVtHV" + } + }, + { + "cell_type": "code", + "source": [ + "lr = LinearRegression()\n", + "lr" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 94 + }, + "id": "2SEmE08uV1ta", + "outputId": "f64f8c23-1ac6-4508-a81e-e208b8ecae1c" + }, + "execution_count": 4, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "LinearRegression()" + ], + "text/html": [ + "
LinearRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ] + }, + "metadata": {}, + "execution_count": 4 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "**5. Creating/Train Linear Model**" + ], + "metadata": { + "id": "PbPgkB0ZV816" + } + }, + { + "cell_type": "code", + "source": [ + "# Training the model by passing the dependent and independent features of the training dataset\n", + "lr.fit(X_train, Y_train)\n", + "lr" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 94 + }, + "id": "AHvhr9ABWBYX", + "outputId": "4739df9a-617d-435c-84b8-86387101e666" + }, + "execution_count": 5, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "LinearRegression()" + ], + "text/html": [ + "
LinearRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ] + }, + "metadata": {}, + "execution_count": 5 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "**6. Make prediction on test dataset**" + ], + "metadata": { + "id": "By_a7suiZV8c" + } + }, + { + "cell_type": "code", + "source": [ + "Y_pred = lr.predict( X_test)\n", + "\n", + "print(Y_pred)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "PgGa5n_eZYvN", + "outputId": "44b33718-afd7-45c0-c74e-0f2497f17b72" + }, + "execution_count": 6, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[0.29712591 0.28837648 0.65766811 0.65868166 0.24885834 0.29702538]\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "**7. Model Evaluation**" + ], + "metadata": { + "id": "iJUoVUsIjXWY" + } + }, + { + "cell_type": "markdown", + "source": [ + "**7.1 Get the coefficients**" + ], + "metadata": { + "id": "MMHK-dHGjiVE" + } + }, + { + "cell_type": "code", + "source": [ + "# The value of the coefficients for the independent feature through the multiple regression model\n", + "print(\"Value of the oefficients: \\n\", lr.coef_)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "DjBh16z-Zy2t", + "outputId": "a76fbadd-07de-4b03-a9f5-e2e9f779729e" + }, + "execution_count": 7, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Value of the oefficients: \n", + " [-0.57576358]\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "**7.2 Get Mean squared error between Actual and Predicted**" + ], + "metadata": { + "id": "706dGL7MjxWd" + } + }, + { + "cell_type": "code", + "source": [ + "# The value of the mean squared error\n", + "print(f\"Mean square error: {mean_squared_error( Y_test, Y_pred)}\")\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "mfI6QqPzjx1-", + "outputId": "dbafe8ce-83d0-4af1-f3d0-bdb5385dc884" + }, + "execution_count": 8, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Mean square error: 0.07339675249663291\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "**7.3 Get R Square between Actual and Predicted**" + ], + "metadata": { + "id": "1Dty9dAAj-m1" + } + }, + { + "cell_type": "code", + "source": [ + "# The value of the coefficient of determination, i.e., R-square score of the model\n", + "print(f\"Coefficient of determination: {r2_score( Y_test, Y_pred )}\")\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Yp7EcVIYj-yI", + "outputId": "d841082b-0438-4b8b-df3e-51567244179b" + }, + "execution_count": 9, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Coefficient of determination: -0.09708334672591712\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "**7.4 Scatter Plot**" + ], + "metadata": { + "id": "RAcx9vPQkOA3" + } + }, + { + "cell_type": "code", + "source": [ + "# Plotting the output\n", + "plt.scatter(X_test, Y_test, color = \"black\", label = \"original data\")\n", + "plt.plot(X_test, Y_pred, color = \"blue\", linewidth=3, label = \"regression line\")\n", + "plt.xlabel(\"Independent Feature\")\n", + "plt.ylabel(\"Target Values\")\n", + "plt.title(\"Simple Linear Regression\")\n", + "plt.show()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 492 + }, + "id": "be-I_2-dkOLK", + "outputId": "3d72ab91-5204-4e33-c62e-f83c7a9859e1" + }, + "execution_count": 10, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": {} + } + ] + } + ] +} \ No newline at end of file