Files
fenago f3b24b4b7f added
2021-02-07 15:16:01 +05:00

646 lines
16 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "8UMFqsCD0xyF"
},
"outputs": [],
"source": [
"# Importing necessary packages\n",
"import pandas as pd\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "HSXgY0ze09cY"
},
"outputs": [],
"source": [
"file_url = '../bank-full.csv'\n",
"bankData = pd.read_csv(file_url, sep=\";\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 204
},
"colab_type": "code",
"executionInfo": {
"elapsed": 8374,
"status": "ok",
"timestamp": 1573003026166,
"user": {
"displayName": "Anthony So",
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
"userId": "11809607246124237079"
},
"user_tz": -660
},
"id": "ecnqNxm0TZay",
"outputId": "515916d0-69e2-4b67-8da1-b11ebc7bb5d0"
},
"outputs": [],
"source": [
"# Normalising data\n",
"from sklearn import preprocessing\n",
"x = bankData[['balance']].values.astype(float)\n",
"# Creating the scaling function\n",
"minmaxScaler = preprocessing.MinMaxScaler()\n",
"# Transforming the balance data by normalising it with minmaxScalre\n",
"bankData['balanceTran'] = minmaxScaler.fit_transform(x)\n",
"# Printing the head of the data\n",
"bankData.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "oISJ1v9sTg_S"
},
"outputs": [],
"source": [
"# Adding a small numerical constant to eliminate 0 values\n",
"\n",
"bankData['balanceTran'] = bankData['balanceTran'] + 0.00001"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 224
},
"colab_type": "code",
"executionInfo": {
"elapsed": 1190,
"status": "ok",
"timestamp": 1573003030600,
"user": {
"displayName": "Anthony So",
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
"userId": "11809607246124237079"
},
"user_tz": -660
},
"id": "GsDGKLQzTy9O",
"outputId": "d89e4bc9-09e9-4f54-d071-b2edc3784914"
},
"outputs": [],
"source": [
"# Let us transform values for loan data\n",
"bankData['loanTran'] = 1\n",
"# Giving a weight of 5 if there is no loan\n",
"bankData.loc[bankData['loan'] == 'no', 'loanTran'] = 5\n",
"bankData.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 224
},
"colab_type": "code",
"executionInfo": {
"elapsed": 1415,
"status": "ok",
"timestamp": 1573003040146,
"user": {
"displayName": "Anthony So",
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
"userId": "11809607246124237079"
},
"user_tz": -660
},
"id": "tPrwaWORT2wt",
"outputId": "3687fb10-b25c-4ad4-d818-8f0ba4b6965c"
},
"outputs": [],
"source": [
"# Let us transform values for Housing data\n",
"bankData['houseTran'] = 5\n",
"# Giving a weight of 1 if the customer has a house\n",
"bankData.loc[bankData['housing'] == 'no', 'houseTran'] = 1\n",
"\n",
"bankData.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 224
},
"colab_type": "code",
"executionInfo": {
"elapsed": 1023,
"status": "ok",
"timestamp": 1573003043755,
"user": {
"displayName": "Anthony So",
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
"userId": "11809607246124237079"
},
"user_tz": -660
},
"id": "C3a2zadTUGLq",
"outputId": "ce60c876-3080-43c3-b3fa-8e44d17a123f"
},
"outputs": [],
"source": [
"# Let us now create the new variable which is a product of all these\n",
"bankData['assetIndex'] = bankData['balanceTran'] * bankData['loanTran'] * bankData['houseTran']\n",
"bankData.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"colab_type": "code",
"executionInfo": {
"elapsed": 1199,
"status": "ok",
"timestamp": 1573003063439,
"user": {
"displayName": "Anthony So",
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
"userId": "11809607246124237079"
},
"user_tz": -660
},
"id": "I2eFF9GLUSn0",
"outputId": "68c4ab47-ac24-4e42-9588-b6414d6826fd"
},
"outputs": [],
"source": [
"# Finding the quantile\n",
"np.quantile(bankData['assetIndex'],[0.25,0.5,0.75])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 224
},
"colab_type": "code",
"executionInfo": {
"elapsed": 1500,
"status": "ok",
"timestamp": 1573003081578,
"user": {
"displayName": "Anthony So",
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
"userId": "11809607246124237079"
},
"user_tz": -660
},
"id": "dklJXEaNUbf7",
"outputId": "646550ac-4c22-494c-a792-fa8e5c7d9b97"
},
"outputs": [],
"source": [
"# Creating quantiles from the assetindex data\n",
"bankData['assetClass'] = 'Quant1'\n",
"\n",
"bankData.loc[(bankData['assetIndex'] > 0.38) & (bankData['assetIndex'] < 0.57), 'assetClass'] = 'Quant2'\n",
"\n",
"bankData.loc[(bankData['assetIndex'] > 0.57) & (bankData['assetIndex'] < 1.9), 'assetClass'] = 'Quant3'\n",
"\n",
"bankData.loc[bankData['assetIndex'] > 1.9, 'assetClass'] = 'Quant4'\n",
"\n",
"bankData.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "JAt_inPjSo75"
},
"outputs": [],
"source": [
"# Calculating total of each asset class\n",
"assetTot = bankData.groupby('assetClass')['y'].agg(assetTot='count').reset_index()\n",
"# Calculating the category wise counts\n",
"assetProp = bankData.groupby(['assetClass', 'y'])['y'].agg(assetCat='count').reset_index()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 297
},
"colab_type": "code",
"executionInfo": {
"elapsed": 1143,
"status": "ok",
"timestamp": 1573003235697,
"user": {
"displayName": "Anthony So",
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
"userId": "11809607246124237079"
},
"user_tz": -660
},
"id": "tTGT5nBGSzgP",
"outputId": "5a262e17-17ba-420e-ec7d-e9b470723f7f"
},
"outputs": [],
"source": [
"# Merging both the data frames\n",
"assetComb = pd.merge(assetProp, assetTot, on=['assetClass'])\n",
"assetComb['catProp'] = (assetComb.assetCat / assetComb.assetTot)*100\n",
"assetComb"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "iJG_qZRzlVc1"
},
"outputs": [],
"source": [
"# Categorical variables, removing loan and housing\n",
"bankCat1 = pd.get_dummies(bankData[['job','marital','education','default','contact','month','poutcome']])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 204
},
"colab_type": "code",
"executionInfo": {
"elapsed": 1007,
"status": "ok",
"timestamp": 1573003254182,
"user": {
"displayName": "Anthony So",
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
"userId": "11809607246124237079"
},
"user_tz": -660
},
"id": "N6j-jur1ljLP",
"outputId": "567676f2-e3e5-48cf-8308-6a7276c09051"
},
"outputs": [],
"source": [
"bankNum1 = bankData[['age','day','duration','campaign','pdays','previous','assetIndex']]\n",
"bankNum1.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "rhbHeJ3PlyiM"
},
"outputs": [],
"source": [
"# Normalise some of the numerical variables\n",
"from sklearn import preprocessing"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "zG-wd9qgl4xW"
},
"outputs": [],
"source": [
"# Creating the scaling function\n",
"minmaxScaler = preprocessing.MinMaxScaler()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "3wDY6fPfmAki"
},
"outputs": [],
"source": [
"# Creating the transformation variables\n",
"ageT1 = bankNum1[['age']].values.astype(float)\n",
"dayT1 = bankNum1[['day']].values.astype(float)\n",
"durT1 = bankNum1[['duration']].values.astype(float)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 323
},
"colab_type": "code",
"executionInfo": {
"elapsed": 1121,
"status": "ok",
"timestamp": 1573003270320,
"user": {
"displayName": "Anthony So",
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
"userId": "11809607246124237079"
},
"user_tz": -660
},
"id": "5MT1CP24mDBx",
"outputId": "628d9b83-87b0-476a-9331-6a09ed5eb071"
},
"outputs": [],
"source": [
"# Transforming the balance data by normalising it with minmaxScalre\n",
"bankNum1['ageTran'] = minmaxScaler.fit_transform(ageT1)\n",
"bankNum1['dayTran'] = minmaxScaler.fit_transform(dayT1)\n",
"bankNum1['durTran'] = minmaxScaler.fit_transform(durT1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 204
},
"colab_type": "code",
"executionInfo": {
"elapsed": 1345,
"status": "ok",
"timestamp": 1573003276269,
"user": {
"displayName": "Anthony So",
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
"userId": "11809607246124237079"
},
"user_tz": -660
},
"id": "L2LfnZDWmNJ3",
"outputId": "6e72f0da-1dfc-4376-c9e5-c4a261e265f6"
},
"outputs": [],
"source": [
"# Let us create a new numerical variable by selecting the transformed variables\n",
"bankNum2 = bankNum1[['ageTran','dayTran','durTran','campaign','pdays','previous','assetIndex']]\n",
"\n",
"# Printing the head of the data\n",
"bankNum2.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 275
},
"colab_type": "code",
"executionInfo": {
"elapsed": 1430,
"status": "ok",
"timestamp": 1573003278578,
"user": {
"displayName": "Anthony So",
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
"userId": "11809607246124237079"
},
"user_tz": -660
},
"id": "hD6suIpqmYTd",
"outputId": "58fc69cb-ff8b-483e-8286-5b3f863a7f2f"
},
"outputs": [],
"source": [
"# Preparing the X variables\n",
"X = pd.concat([bankCat1, bankNum2], axis=1)\n",
"print(X.shape)\n",
"# Preparing the Y variable\n",
"Y = bankData['y']\n",
"print(Y.shape)\n",
"X.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "4c7Jyxymmiaj"
},
"outputs": [],
"source": [
"\n",
"from sklearn.model_selection import train_test_split\n",
"# Splitting the data into train and test sets\n",
"X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=123)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 156
},
"colab_type": "code",
"executionInfo": {
"elapsed": 1265,
"status": "ok",
"timestamp": 1573003281602,
"user": {
"displayName": "Anthony So",
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
"userId": "11809607246124237079"
},
"user_tz": -660
},
"id": "EVSGc5Lom2vj",
"outputId": "28442c3a-4832-4df5-f84d-3d5ef637ed02"
},
"outputs": [],
"source": [
"from sklearn.linear_model import LogisticRegression\n",
"# Defining the LogisticRegression function\n",
"bankModel = LogisticRegression()\n",
"bankModel.fit(X_train, y_train)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"colab_type": "code",
"executionInfo": {
"elapsed": 838,
"status": "ok",
"timestamp": 1573003282951,
"user": {
"displayName": "Anthony So",
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
"userId": "11809607246124237079"
},
"user_tz": -660
},
"id": "GhFXSRnknr3p",
"outputId": "e88952b2-01bc-497d-be39-8c2ad85f00bc"
},
"outputs": [],
"source": [
"pred = bankModel.predict(X_test)\n",
"print('Accuracy of Logistic regression model prediction on test set: {:.2f}'.format(bankModel.score(X_test, y_test)))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 51
},
"colab_type": "code",
"executionInfo": {
"elapsed": 1127,
"status": "ok",
"timestamp": 1573003284105,
"user": {
"displayName": "Anthony So",
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
"userId": "11809607246124237079"
},
"user_tz": -660
},
"id": "DVBvXZJknuJ3",
"outputId": "d865240b-fdbc-4526-bd3f-01b9e42792de"
},
"outputs": [],
"source": [
"# Confusion Matrix for the model\n",
"from sklearn.metrics import confusion_matrix\n",
"confusionMatrix = confusion_matrix(y_test, pred)\n",
"print(confusionMatrix)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 170
},
"colab_type": "code",
"executionInfo": {
"elapsed": 1461,
"status": "ok",
"timestamp": 1573003287122,
"user": {
"displayName": "Anthony So",
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
"userId": "11809607246124237079"
},
"user_tz": -660
},
"id": "BqcxgdLun0HY",
"outputId": "2956690b-bee6-42db-826b-670f4c47d5ae"
},
"outputs": [],
"source": [
"from sklearn.metrics import classification_report\n",
"print(classification_report(y_test, pred))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"colab": {
"collapsed_sections": [],
"name": "Activity3.02_updated.ipynb",
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.6"
}
},
"nbformat": 4,
"nbformat_minor": 1
}