Files
fenago f3b24b4b7f added
2021-02-07 15:16:01 +05:00

148 lines
4.0 KiB
Plaintext

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**Import the necessary modules and prepare the data**"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "k5QR71xFLGkh"
},
"outputs": [],
"source": [
"%matplotlib inline\n",
"import matplotlib as mpl\n",
"import seaborn as sns\n",
"import matplotlib.pyplot as plt\n",
"import statsmodels.formula.api as smf\n",
"import statsmodels.graphics.api as smg\n",
"import pandas as pd\n",
"import numpy as np\n",
"import patsy\n",
"from statsmodels.graphics.correlation import plot_corr\n",
"from sklearn.model_selection import train_test_split\n",
"plt.style.use('seaborn')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "RpkR60AxLNkl"
},
"outputs": [],
"source": [
"rawBostonData = pd.read_csv('../Dataset/Boston.csv')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "2Qwjq3ODLWUF"
},
"outputs": [],
"source": [
"rawBostonData = rawBostonData.dropna()\n",
"rawBostonData = rawBostonData.drop_duplicates() \n",
"renamedBostonData = rawBostonData.rename(columns = {'CRIM':'crimeRatePerCapita',\n",
" ' ZN ':'landOver25K_sqft',\n",
" 'INDUS ':'non-retailLandProptn',\n",
" 'CHAS':'riverDummy',\n",
" 'NOX':'nitrixOxide_pp10m',\n",
" 'RM':'AvgNo.RoomsPerDwelling',\n",
" 'AGE':'ProptnOwnerOccupied',\n",
" 'DIS':'weightedDist',\n",
" 'RAD':'radialHighwaysAccess',\n",
" 'TAX':'propTaxRate_per10K',\n",
" 'PTRATIO':'pupilTeacherRatio',\n",
" 'LSTAT':'pctLowerStatus',\n",
" 'MEDV':'medianValue_Ks'})\n",
"X = renamedBostonData.drop('crimeRatePerCapita', axis = 1)\n",
"y = renamedBostonData[['crimeRatePerCapita']]\n",
"seed = 10 \n",
"test_data_size = 0.3 \n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = test_data_size, random_state = seed)\n",
"train_data = pd.concat([X_train, y_train], axis = 1)\n",
"test_data = pd.concat([X_test, y_test], axis = 1)"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "3B9t1_oXO6GB"
},
"source": [
"**Exercise 2.03: Examining a possible log-linear relationship using Python**"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 441
},
"colab_type": "code",
"id": "o_CbjkexLfyy",
"outputId": "134f3112-ad3e-4752-ecf3-3e899d2b0f4d"
},
"outputs": [],
"source": [
"# Use the seaborn function regplot to create a log-linear plot and fit a regression line through it\n",
"\n",
"fig, ax = plt.subplots(figsize=(10, 6))\n",
"y = np.log(train_data['crimeRatePerCapita'])\n",
"sns.regplot(x='medianValue_Ks', y=y, ci=95, data=train_data, ax=ax,\\\n",
"color='k', scatter_kws={\"s\": 20,\"color\": \"royalblue\", \"alpha\":1})\n",
"ax.set_ylabel('log of Crime rate per Capita', fontsize=15,\\\n",
"fontname='DejaVu Sans')\n",
"ax.set_xlabel(\"Median value of owner-occupied homes in $1000's\",\\\n",
"fontsize=15, fontname='DejaVu Sans')\n",
"ax.set_xlim(left=None, right=None)\n",
"ax.set_ylim(bottom=None, top=None)\n",
"ax.tick_params(axis='both', which='major', labelsize=12)\n",
"fig.tight_layout()"
]
}
],
"metadata": {
"colab": {
"name": "Exercise2.03.ipynb",
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.6"
}
},
"nbformat": 4,
"nbformat_minor": 1
}