From 895e515042d17e6b05118695364d1920ba1f54bf Mon Sep 17 00:00:00 2001
From: sandeep chauhan <64914145+sandeep92134@users.noreply.github.com>
Date: Mon, 18 Jan 2021 19:16:52 +0530
Subject: [PATCH 1/2] Created using Colaboratory
---
..._of_the_Median_Values_of_Our_Dataset.ipynb | 356 ++++++++++++++++++
1 file changed, 356 insertions(+)
create mode 100644 module 11/Exercise_144_Using_Linear_Regression_to_Predict_the_Accuracy_of_the_Median_Values_of_Our_Dataset.ipynb
diff --git a/module 11/Exercise_144_Using_Linear_Regression_to_Predict_the_Accuracy_of_the_Median_Values_of_Our_Dataset.ipynb b/module 11/Exercise_144_Using_Linear_Regression_to_Predict_the_Accuracy_of_the_Median_Values_of_Our_Dataset.ipynb
new file mode 100644
index 0000000..cae21e9
--- /dev/null
+++ b/module 11/Exercise_144_Using_Linear_Regression_to_Predict_the_Accuracy_of_the_Median_Values_of_Our_Dataset.ipynb
@@ -0,0 +1,356 @@
+{
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+ "colab": {
+ "name": "Exercise 144: Using Linear Regression to Predict the Accuracy of the Median Values of Our Dataset",
+ "provenance": [],
+ "authorship_tag": "ABX9TyOarmSrG4xeAOWn1sZdPz4j",
+ "include_colab_link": true
+ },
+ "kernelspec": {
+ "name": "python3",
+ "display_name": "Python 3"
+ }
+ },
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "view-in-github",
+ "colab_type": "text"
+ },
+ "source": [
+ "
"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "WB7mxEAsryNC"
+ },
+ "source": [
+ "The goal of this exercise is to build a machine learning model using linear regression. Your model will predict the median value of Boston houses and, based on this, we will come to a conclusion about whether the value is optimal or not.\r\n",
+ "\r\n",
+ "This exercise will be performed on a Jupyter Notebook.\r\n",
+ "1. Open a new notebook file.\r\n",
+ "2. Now, **import** all the necessary libraries, as shown in the following code snippet:\r\n",
+ "```\r\n",
+ " import pandas as pd\r\n",
+ " import numpy as np\r\n",
+ " from sklearn.linear_model import LinearRegression\r\n",
+ " from sklearn.metrics import mean_squared_error\r\n",
+ " from sklearn.model_selection import train_test_split\r\n",
+ "```\r\n",
+ "Now that we have imported the libraries, we will load the data.\r\n",
+ "3. Load the dataset and view the DataFrames to look at the first five rows:\r\n",
+ "```\r\n",
+ " # load data\r\n",
+ " housing_df = pd.read_csv('HousingData.csv')\r\n",
+ " housing_df.head()\r\n",
+ "```\r\n",
+ "Recall that, as mentioned in Chapter 10, Data Analytics with pandas and NumPy, **housing_df = pd.read_cs('HousingData.csv')** will read the **CSV** file in parentheses and store it in a **DataFrame** called housing_df. Then, **housing_df.head()** will display the first five rows of the housing_df **DataFrame** by default.\r\n",
+ "4. Next, enter the following code to clean the dataset of null values using **.dropna()**:\r\n",
+ "```\r\n",
+ " # drop null values\r\n",
+ " housing_df = housing_df.dropna()\r\n",
+ "```\r\n",
+ "5. Now, declare the X and y variables, where you use X for the **predictor** columns and y for the **target** column:\r\n",
+ "```\r\n",
+ " # declare X and y\r\n",
+ " X = housing_df.iloc[:,:-1]\r\n",
+ " y = housing_df.iloc[:, -1]\r\n",
+ "```\r\n",
+ "6. Now we build the actual linear regression model.\r\n",
+ "7. Now, find how accurate the model is. Here, we can test it on unseen data:\r\n",
+ "8. We can now test the prediction by comparing the predicted **y-values**, which is **y_pred**, to the actual **y-values**, which is **y_test**:\r\n",
+ "\r\n",
+ "\r\n",
+ "\r\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "gj9xHtcjqLWu"
+ },
+ "source": [
+ "import pandas as pd\r\n",
+ "import numpy as np\r\n",
+ "from sklearn.linear_model import LinearRegression\r\n",
+ "from sklearn.metrics import mean_squared_error\r\n",
+ "from sklearn.model_selection import train_test_split"
+ ],
+ "execution_count": 1,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 204
+ },
+ "id": "t8UNBeDQuPyM",
+ "outputId": "aaf99d0a-7a2f-40b5-fe71-a45eada1f058"
+ },
+ "source": [
+ "# load data\r\n",
+ "housing_df = pd.read_csv('HousingData.csv')\r\n",
+ "housing_df.head()"
+ ],
+ "execution_count": 2,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " CRIM | \n",
+ " ZN | \n",
+ " INDUS | \n",
+ " CHAS | \n",
+ " NOX | \n",
+ " RM | \n",
+ " AGE | \n",
+ " DIS | \n",
+ " RAD | \n",
+ " TAX | \n",
+ " PTRATIO | \n",
+ " B | \n",
+ " LSTAT | \n",
+ " MEDV | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 0.00632 | \n",
+ " 18.0 | \n",
+ " 2.31 | \n",
+ " 0.0 | \n",
+ " 0.538 | \n",
+ " 6.575 | \n",
+ " 65.2 | \n",
+ " 4.0900 | \n",
+ " 1 | \n",
+ " 296 | \n",
+ " 15.3 | \n",
+ " 396.90 | \n",
+ " 4.98 | \n",
+ " 24.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 0.02731 | \n",
+ " 0.0 | \n",
+ " 7.07 | \n",
+ " 0.0 | \n",
+ " 0.469 | \n",
+ " 6.421 | \n",
+ " 78.9 | \n",
+ " 4.9671 | \n",
+ " 2 | \n",
+ " 242 | \n",
+ " 17.8 | \n",
+ " 396.90 | \n",
+ " 9.14 | \n",
+ " 21.6 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 0.02729 | \n",
+ " 0.0 | \n",
+ " 7.07 | \n",
+ " 0.0 | \n",
+ " 0.469 | \n",
+ " 7.185 | \n",
+ " 61.1 | \n",
+ " 4.9671 | \n",
+ " 2 | \n",
+ " 242 | \n",
+ " 17.8 | \n",
+ " 392.83 | \n",
+ " 4.03 | \n",
+ " 34.7 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 0.03237 | \n",
+ " 0.0 | \n",
+ " 2.18 | \n",
+ " 0.0 | \n",
+ " 0.458 | \n",
+ " 6.998 | \n",
+ " 45.8 | \n",
+ " 6.0622 | \n",
+ " 3 | \n",
+ " 222 | \n",
+ " 18.7 | \n",
+ " 394.63 | \n",
+ " 2.94 | \n",
+ " 33.4 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 0.06905 | \n",
+ " 0.0 | \n",
+ " 2.18 | \n",
+ " 0.0 | \n",
+ " 0.458 | \n",
+ " 7.147 | \n",
+ " 54.2 | \n",
+ " 6.0622 | \n",
+ " 3 | \n",
+ " 222 | \n",
+ " 18.7 | \n",
+ " 396.90 | \n",
+ " NaN | \n",
+ " 36.2 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " CRIM ZN INDUS CHAS NOX ... TAX PTRATIO B LSTAT MEDV\n",
+ "0 0.00632 18.0 2.31 0.0 0.538 ... 296 15.3 396.90 4.98 24.0\n",
+ "1 0.02731 0.0 7.07 0.0 0.469 ... 242 17.8 396.90 9.14 21.6\n",
+ "2 0.02729 0.0 7.07 0.0 0.469 ... 242 17.8 392.83 4.03 34.7\n",
+ "3 0.03237 0.0 2.18 0.0 0.458 ... 222 18.7 394.63 2.94 33.4\n",
+ "4 0.06905 0.0 2.18 0.0 0.458 ... 222 18.7 396.90 NaN 36.2\n",
+ "\n",
+ "[5 rows x 14 columns]"
+ ]
+ },
+ "metadata": {
+ "tags": []
+ },
+ "execution_count": 2
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "gkhFUuT6uTUR"
+ },
+ "source": [
+ "# drop null values\r\n",
+ "housing_df = housing_df.dropna()\r\n"
+ ],
+ "execution_count": 3,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "aHDHQkZGvTpx"
+ },
+ "source": [
+ "# declare X and y\r\n",
+ "X = housing_df.iloc[:,:-1]\r\n",
+ "y = housing_df.iloc[:, -1]"
+ ],
+ "execution_count": 4,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "YmvQbsrpuyA7"
+ },
+ "source": [
+ "#Create training and test sets\r\n",
+ "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)"
+ ],
+ "execution_count": 5,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "ylYyCDbhvXvT"
+ },
+ "source": [
+ "#Create the regressor: reg\r\n",
+ "reg = LinearRegression()"
+ ],
+ "execution_count": 6,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "MVOnE4E0u6eZ",
+ "outputId": "ab6cb5ef-f180-41b5-d424-2d8ff0ef13f5"
+ },
+ "source": [
+ "#Fit the regressor to the training data\r\n",
+ "reg.fit(X_train, y_train)"
+ ],
+ "execution_count": 7,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)"
+ ]
+ },
+ "metadata": {
+ "tags": []
+ },
+ "execution_count": 7
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "VYIfTckQvACx",
+ "outputId": "dbaa661b-1b6c-4874-b359-c2c90a1558dd"
+ },
+ "source": [
+ "# Predict on the test data: y_pred\r\n",
+ "y_pred = reg.predict(X_test)\r\n",
+ "# Compute and print RMSE\r\n",
+ "rmse = np.sqrt(mean_squared_error(y_test, y_pred))\r\n",
+ "print(\"Root Mean Squared Error: {}\".format(rmse))"
+ ],
+ "execution_count": 8,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "Root Mean Squared Error: 4.035874116638531\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ }
+ ]
+}
\ No newline at end of file
From 4c9f35cce503975777714c0384dfd4073a4de024 Mon Sep 17 00:00:00 2001
From: sandeep chauhan <64914145+sandeep92134@users.noreply.github.com>
Date: Mon, 18 Jan 2021 20:29:22 +0530
Subject: [PATCH 2/2] Delete module 11 directory
---
..._of_the_Median_Values_of_Our_Dataset.ipynb | 356 ------------------
1 file changed, 356 deletions(-)
delete mode 100644 module 11/Exercise_144_Using_Linear_Regression_to_Predict_the_Accuracy_of_the_Median_Values_of_Our_Dataset.ipynb
diff --git a/module 11/Exercise_144_Using_Linear_Regression_to_Predict_the_Accuracy_of_the_Median_Values_of_Our_Dataset.ipynb b/module 11/Exercise_144_Using_Linear_Regression_to_Predict_the_Accuracy_of_the_Median_Values_of_Our_Dataset.ipynb
deleted file mode 100644
index cae21e9..0000000
--- a/module 11/Exercise_144_Using_Linear_Regression_to_Predict_the_Accuracy_of_the_Median_Values_of_Our_Dataset.ipynb
+++ /dev/null
@@ -1,356 +0,0 @@
-{
- "nbformat": 4,
- "nbformat_minor": 0,
- "metadata": {
- "colab": {
- "name": "Exercise 144: Using Linear Regression to Predict the Accuracy of the Median Values of Our Dataset",
- "provenance": [],
- "authorship_tag": "ABX9TyOarmSrG4xeAOWn1sZdPz4j",
- "include_colab_link": true
- },
- "kernelspec": {
- "name": "python3",
- "display_name": "Python 3"
- }
- },
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "view-in-github",
- "colab_type": "text"
- },
- "source": [
- "
"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "WB7mxEAsryNC"
- },
- "source": [
- "The goal of this exercise is to build a machine learning model using linear regression. Your model will predict the median value of Boston houses and, based on this, we will come to a conclusion about whether the value is optimal or not.\r\n",
- "\r\n",
- "This exercise will be performed on a Jupyter Notebook.\r\n",
- "1. Open a new notebook file.\r\n",
- "2. Now, **import** all the necessary libraries, as shown in the following code snippet:\r\n",
- "```\r\n",
- " import pandas as pd\r\n",
- " import numpy as np\r\n",
- " from sklearn.linear_model import LinearRegression\r\n",
- " from sklearn.metrics import mean_squared_error\r\n",
- " from sklearn.model_selection import train_test_split\r\n",
- "```\r\n",
- "Now that we have imported the libraries, we will load the data.\r\n",
- "3. Load the dataset and view the DataFrames to look at the first five rows:\r\n",
- "```\r\n",
- " # load data\r\n",
- " housing_df = pd.read_csv('HousingData.csv')\r\n",
- " housing_df.head()\r\n",
- "```\r\n",
- "Recall that, as mentioned in Chapter 10, Data Analytics with pandas and NumPy, **housing_df = pd.read_cs('HousingData.csv')** will read the **CSV** file in parentheses and store it in a **DataFrame** called housing_df. Then, **housing_df.head()** will display the first five rows of the housing_df **DataFrame** by default.\r\n",
- "4. Next, enter the following code to clean the dataset of null values using **.dropna()**:\r\n",
- "```\r\n",
- " # drop null values\r\n",
- " housing_df = housing_df.dropna()\r\n",
- "```\r\n",
- "5. Now, declare the X and y variables, where you use X for the **predictor** columns and y for the **target** column:\r\n",
- "```\r\n",
- " # declare X and y\r\n",
- " X = housing_df.iloc[:,:-1]\r\n",
- " y = housing_df.iloc[:, -1]\r\n",
- "```\r\n",
- "6. Now we build the actual linear regression model.\r\n",
- "7. Now, find how accurate the model is. Here, we can test it on unseen data:\r\n",
- "8. We can now test the prediction by comparing the predicted **y-values**, which is **y_pred**, to the actual **y-values**, which is **y_test**:\r\n",
- "\r\n",
- "\r\n",
- "\r\n"
- ]
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "gj9xHtcjqLWu"
- },
- "source": [
- "import pandas as pd\r\n",
- "import numpy as np\r\n",
- "from sklearn.linear_model import LinearRegression\r\n",
- "from sklearn.metrics import mean_squared_error\r\n",
- "from sklearn.model_selection import train_test_split"
- ],
- "execution_count": 1,
- "outputs": []
- },
- {
- "cell_type": "code",
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 204
- },
- "id": "t8UNBeDQuPyM",
- "outputId": "aaf99d0a-7a2f-40b5-fe71-a45eada1f058"
- },
- "source": [
- "# load data\r\n",
- "housing_df = pd.read_csv('HousingData.csv')\r\n",
- "housing_df.head()"
- ],
- "execution_count": 2,
- "outputs": [
- {
- "output_type": "execute_result",
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " CRIM | \n",
- " ZN | \n",
- " INDUS | \n",
- " CHAS | \n",
- " NOX | \n",
- " RM | \n",
- " AGE | \n",
- " DIS | \n",
- " RAD | \n",
- " TAX | \n",
- " PTRATIO | \n",
- " B | \n",
- " LSTAT | \n",
- " MEDV | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 0.00632 | \n",
- " 18.0 | \n",
- " 2.31 | \n",
- " 0.0 | \n",
- " 0.538 | \n",
- " 6.575 | \n",
- " 65.2 | \n",
- " 4.0900 | \n",
- " 1 | \n",
- " 296 | \n",
- " 15.3 | \n",
- " 396.90 | \n",
- " 4.98 | \n",
- " 24.0 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 0.02731 | \n",
- " 0.0 | \n",
- " 7.07 | \n",
- " 0.0 | \n",
- " 0.469 | \n",
- " 6.421 | \n",
- " 78.9 | \n",
- " 4.9671 | \n",
- " 2 | \n",
- " 242 | \n",
- " 17.8 | \n",
- " 396.90 | \n",
- " 9.14 | \n",
- " 21.6 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 0.02729 | \n",
- " 0.0 | \n",
- " 7.07 | \n",
- " 0.0 | \n",
- " 0.469 | \n",
- " 7.185 | \n",
- " 61.1 | \n",
- " 4.9671 | \n",
- " 2 | \n",
- " 242 | \n",
- " 17.8 | \n",
- " 392.83 | \n",
- " 4.03 | \n",
- " 34.7 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 0.03237 | \n",
- " 0.0 | \n",
- " 2.18 | \n",
- " 0.0 | \n",
- " 0.458 | \n",
- " 6.998 | \n",
- " 45.8 | \n",
- " 6.0622 | \n",
- " 3 | \n",
- " 222 | \n",
- " 18.7 | \n",
- " 394.63 | \n",
- " 2.94 | \n",
- " 33.4 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 0.06905 | \n",
- " 0.0 | \n",
- " 2.18 | \n",
- " 0.0 | \n",
- " 0.458 | \n",
- " 7.147 | \n",
- " 54.2 | \n",
- " 6.0622 | \n",
- " 3 | \n",
- " 222 | \n",
- " 18.7 | \n",
- " 396.90 | \n",
- " NaN | \n",
- " 36.2 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " CRIM ZN INDUS CHAS NOX ... TAX PTRATIO B LSTAT MEDV\n",
- "0 0.00632 18.0 2.31 0.0 0.538 ... 296 15.3 396.90 4.98 24.0\n",
- "1 0.02731 0.0 7.07 0.0 0.469 ... 242 17.8 396.90 9.14 21.6\n",
- "2 0.02729 0.0 7.07 0.0 0.469 ... 242 17.8 392.83 4.03 34.7\n",
- "3 0.03237 0.0 2.18 0.0 0.458 ... 222 18.7 394.63 2.94 33.4\n",
- "4 0.06905 0.0 2.18 0.0 0.458 ... 222 18.7 396.90 NaN 36.2\n",
- "\n",
- "[5 rows x 14 columns]"
- ]
- },
- "metadata": {
- "tags": []
- },
- "execution_count": 2
- }
- ]
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "gkhFUuT6uTUR"
- },
- "source": [
- "# drop null values\r\n",
- "housing_df = housing_df.dropna()\r\n"
- ],
- "execution_count": 3,
- "outputs": []
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "aHDHQkZGvTpx"
- },
- "source": [
- "# declare X and y\r\n",
- "X = housing_df.iloc[:,:-1]\r\n",
- "y = housing_df.iloc[:, -1]"
- ],
- "execution_count": 4,
- "outputs": []
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "YmvQbsrpuyA7"
- },
- "source": [
- "#Create training and test sets\r\n",
- "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)"
- ],
- "execution_count": 5,
- "outputs": []
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "ylYyCDbhvXvT"
- },
- "source": [
- "#Create the regressor: reg\r\n",
- "reg = LinearRegression()"
- ],
- "execution_count": 6,
- "outputs": []
- },
- {
- "cell_type": "code",
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "MVOnE4E0u6eZ",
- "outputId": "ab6cb5ef-f180-41b5-d424-2d8ff0ef13f5"
- },
- "source": [
- "#Fit the regressor to the training data\r\n",
- "reg.fit(X_train, y_train)"
- ],
- "execution_count": 7,
- "outputs": [
- {
- "output_type": "execute_result",
- "data": {
- "text/plain": [
- "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)"
- ]
- },
- "metadata": {
- "tags": []
- },
- "execution_count": 7
- }
- ]
- },
- {
- "cell_type": "code",
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "VYIfTckQvACx",
- "outputId": "dbaa661b-1b6c-4874-b359-c2c90a1558dd"
- },
- "source": [
- "# Predict on the test data: y_pred\r\n",
- "y_pred = reg.predict(X_test)\r\n",
- "# Compute and print RMSE\r\n",
- "rmse = np.sqrt(mean_squared_error(y_test, y_pred))\r\n",
- "print(\"Root Mean Squared Error: {}\".format(rmse))"
- ],
- "execution_count": 8,
- "outputs": [
- {
- "output_type": "stream",
- "text": [
- "Root Mean Squared Error: 4.035874116638531\n"
- ],
- "name": "stdout"
- }
- ]
- }
- ]
-}
\ No newline at end of file