{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "**Import the necessary modules and prepare the data**" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "k5QR71xFLGkh" }, "outputs": [], "source": [ "%matplotlib inline\n", "import matplotlib as mpl\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "import statsmodels.formula.api as smf\n", "import statsmodels.graphics.api as smg\n", "import pandas as pd\n", "import numpy as np\n", "import patsy\n", "from statsmodels.graphics.correlation import plot_corr\n", "from sklearn.model_selection import train_test_split\n", "plt.style.use('seaborn')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "RpkR60AxLNkl" }, "outputs": [], "source": [ "rawBostonData = pd.read_csv('../Dataset/Boston.csv')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "2Qwjq3ODLWUF" }, "outputs": [], "source": [ "rawBostonData = rawBostonData.dropna()\n", "rawBostonData = rawBostonData.drop_duplicates()\n", "renamedBostonData = rawBostonData.rename(columns = {'CRIM':'crimeRatePerCapita',\n", " ' ZN ':'landOver25K_sqft',\n", " 'INDUS ':'non-retailLandProptn',\n", " 'CHAS':'riverDummy',\n", " 'NOX':'nitrixOxide_pp10m',\n", " 'RM':'AvgNo.RoomsPerDwelling',\n", " 'AGE':'ProptnOwnerOccupied',\n", " 'DIS':'weightedDist',\n", " 'RAD':'radialHighwaysAccess',\n", " 'TAX':'propTaxRate_per10K',\n", " 'PTRATIO':'pupilTeacherRatio',\n", " 'LSTAT':'pctLowerStatus',\n", " 'MEDV':'medianValue_Ks'})\n", "X = renamedBostonData.drop('crimeRatePerCapita', axis = 1)\n", "y = renamedBostonData[['crimeRatePerCapita']]\n", "seed = 10 \n", "test_data_size = 0.3 \n", "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = test_data_size, random_state = seed)\n", "train_data = pd.concat([X_train, y_train], axis = 1)\n", "test_data = pd.concat([X_test, y_test], axis = 1)" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "3B9t1_oXO6GB" }, "source": [ "**Exercise 2.02: Graphical investigation of linear relationships using Python**" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 441 }, "colab_type": "code", "id": "o_CbjkexLfyy", "outputId": "eae3ef65-3a95-468b-c1e8-2e3f75dd3e85" }, "outputs": [], "source": [ "fig, ax = plt.subplots(figsize=(10, 6))\n", "sns.regplot(x='medianValue_Ks', y='crimeRatePerCapita', ci=None,\n", "data=train_data, ax=ax, color='k', scatter_kws={\"s\": 20,\"color\":\\\n", "\"royalblue\", \"alpha\":1})\n", "ax.set_ylabel('Crime rate per Capita', fontsize=15, fontname='DejaVu Sans')\n", "ax.set_xlabel(\"Median value of owner-occupied homes in $1000's\",\\\n", "fontsize=15, fontname='DejaVu Sans')\n", "ax.set_xlim(left=None, right=None)\n", "ax.set_ylim(bottom=None, top=30)\n", "ax.tick_params(axis='both', which='major', labelsize=12)\n", "fig.tight_layout()" ] } ], "metadata": { "colab": { "name": "Exercise2.02.ipynb", "provenance": [] }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.6" } }, "nbformat": 4, "nbformat_minor": 1 }