Files
mlessentials/Lab02/Activity2.01/Activity2_01.ipynb
T
fenago f3b24b4b7f added
2021-02-07 15:16:01 +05:00

172 lines
3.7 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "htcWC_rahuEO"
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import statsmodels.formula.api as smf\n",
"from sklearn.model_selection import train_test_split"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "nMp4vGhHh24o"
},
"outputs": [],
"source": [
"rawBostonData = pd.read_csv('../Dataset/Boston.csv')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "rinZvyYSh5E1"
},
"outputs": [],
"source": [
"rawBostonData = rawBostonData.dropna()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "66P7Nrqrh7R3"
},
"outputs": [],
"source": [
"rawBostonData = rawBostonData.drop_duplicates()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "SvaVeiVYh9bI"
},
"outputs": [],
"source": [
"renamedBostonData = rawBostonData.rename(columns = {'CRIM':'crimeRatePerCapita',\n",
" ' ZN ':'landOver25K_sqft',\n",
" 'INDUS ':'non-retailLandProptn',\n",
" 'CHAS':'riverDummy',\n",
" 'NOX':'nitrixOxide_pp10m',\n",
" 'RM':'AvgNo.RoomsPerDwelling',\n",
" 'AGE':'ProptnOwnerOccupied',\n",
" 'DIS':'weightedDist',\n",
" 'RAD':'radialHighwaysAccess',\n",
" 'TAX':'propTaxRate_per10K',\n",
" 'PTRATIO':'pupilTeacherRatio',\n",
" 'LSTAT':'pctLowerStatus',\n",
" 'MEDV':'medianValue_Ks'})"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "2bdQ0pNxh_ux"
},
"outputs": [],
"source": [
"X = renamedBostonData.drop('crimeRatePerCapita', axis = 1)\n",
"y = renamedBostonData[['crimeRatePerCapita']]\n",
"seed = 10 \n",
"test_data_size = 0.3 \n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = test_data_size, random_state = seed)\n",
"train_data = pd.concat([X_train, y_train], axis = 1)\n",
"test_data = pd.concat([X_test, y_test], axis = 1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "pS55MGThiChz"
},
"outputs": [],
"source": [
"logLinearModel = smf.ols(formula='np.log(crimeRatePerCapita) ~ medianValue_Ks', data=train_data)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "T2EmISCBkr1t"
},
"outputs": [],
"source": [
"logLinearModResult = logLinearModel.fit()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 442
},
"colab_type": "code",
"id": "n-D9pwOSkthe",
"outputId": "4a111658-9370-491c-d38b-d480f783e8b8"
},
"outputs": [],
"source": [
"print(logLinearModResult.summary())"
]
}
],
"metadata": {
"colab": {
"name": "Activity2_01.ipynb",
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.6"
}
},
"nbformat": 4,
"nbformat_minor": 1
}