{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "GalWZL42gUJL" }, "outputs": [], "source": [ "%matplotlib inline\n", "import matplotlib as mpl\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "import statsmodels.formula.api as smf\n", "import statsmodels.graphics.api as smg\n", "import pandas as pd\n", "import numpy as np\n", "import patsy\n", "from statsmodels.graphics.correlation import plot_corr\n", "from sklearn.model_selection import train_test_split\n", "plt.style.use('seaborn')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "6_lKgFqPgeyk" }, "outputs": [], "source": [ "rawBostonData = pd.read_csv('../Dataset/Boston.csv')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 204 }, "colab_type": "code", "id": "yB1hG1WKglOM", "outputId": "2030133d-d616-4059-f647-da075e03ed12" }, "outputs": [], "source": [ "rawBostonData.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "1krtNoLygsJx" }, "outputs": [], "source": [ "rawBostonData = rawBostonData.dropna()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "Eyxd8QXwgw_d" }, "outputs": [], "source": [ "rawBostonData = rawBostonData.drop_duplicates()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 238 }, "colab_type": "code", "id": "1Z62CWO_gzc5", "outputId": "8868804a-e4f7-41fa-c07f-6b759bd63eb5" }, "outputs": [], "source": [ "list(rawBostonData.columns)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 241 }, "colab_type": "code", "id": "EB7FBISzg2yx", "outputId": "92c8bde0-5493-442a-ee48-8eefdc48aaba" }, "outputs": [], "source": [ "renamedBostonData = rawBostonData.rename(columns = {'CRIM':'crimeRatePerCapita',\n", " ' ZN ':'landOver25K_sqft',\n", " 'INDUS ':'non-retailLandProptn',\n", " 'CHAS':'riverDummy',\n", " 'NOX':'nitrixOxide_pp10m',\n", " 'RM':'AvgNo.RoomsPerDwelling',\n", " 'AGE':'ProptnOwnerOccupied',\n", " 'DIS':'weightedDist',\n", " 'RAD':'radialHighwaysAccess',\n", " 'TAX':'propTaxRate_per10K',\n", " 'PTRATIO':'pupilTeacherRatio',\n", " 'LSTAT':'pctLowerStatus',\n", " 'MEDV':'medianValue_Ks'})\n", "renamedBostonData.head()\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 323 }, "colab_type": "code", "id": "N_cmLg_5g56i", "outputId": "f11f011c-9d31-4ff5-a03b-e380356fc2a3" }, "outputs": [], "source": [ "renamedBostonData.info()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 452 }, "colab_type": "code", "id": "X8khkdpjgv-K", "outputId": "478bc4de-75c2-413b-dc1c-7156d8bd7c82" }, "outputs": [], "source": [ "renamedBostonData.describe(include=[np.number]).T" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "oO_etrVcg-gV" }, "outputs": [], "source": [ "X = renamedBostonData.drop('crimeRatePerCapita', axis = 1)\n", "y = renamedBostonData[['crimeRatePerCapita']]\n", "seed = 10 \n", "test_data_size = 0.3 \n", "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = test_data_size, random_state = seed)\n", "train_data = pd.concat([X_train, y_train], axis = 1)\n", "test_data = pd.concat([X_test, y_test], axis = 1)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 793 }, "colab_type": "code", "id": "dwCbKxJqhBee", "outputId": "d1604316-dc31-4a5f-af82-75cfc8b70faf" }, "outputs": [], "source": [ "corrMatrix = train_data.corr(method = 'pearson')\n", "xnames=list(train_data.columns)\n", "ynames=list(train_data.columns)\n", "plot_corr(corrMatrix, xnames=xnames, ynames=ynames,\\\n", " title=None, normcolor=False, cmap='RdYlBu_r')" ] } ], "metadata": { "colab": { "name": "Exercise2.01.ipynb", "provenance": [] }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.6" } }, "nbformat": 4, "nbformat_minor": 1 }