{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "8UMFqsCD0xyF" }, "outputs": [], "source": [ "# Importing necessary packages\n", "import pandas as pd\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "HSXgY0ze09cY" }, "outputs": [], "source": [ "file_url = '../bank-full.csv'\n", "bankData = pd.read_csv(file_url, sep=\";\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 204 }, "colab_type": "code", "executionInfo": { "elapsed": 8374, "status": "ok", "timestamp": 1573003026166, "user": { "displayName": "Anthony So", "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64", "userId": "11809607246124237079" }, "user_tz": -660 }, "id": "ecnqNxm0TZay", "outputId": "515916d0-69e2-4b67-8da1-b11ebc7bb5d0" }, "outputs": [], "source": [ "# Normalising data\n", "from sklearn import preprocessing\n", "x = bankData[['balance']].values.astype(float)\n", "# Creating the scaling function\n", "minmaxScaler = preprocessing.MinMaxScaler()\n", "# Transforming the balance data by normalising it with minmaxScalre\n", "bankData['balanceTran'] = minmaxScaler.fit_transform(x)\n", "# Printing the head of the data\n", "bankData.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "oISJ1v9sTg_S" }, "outputs": [], "source": [ "# Adding a small numerical constant to eliminate 0 values\n", "\n", "bankData['balanceTran'] = bankData['balanceTran'] + 0.00001" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 224 }, "colab_type": "code", "executionInfo": { "elapsed": 1190, "status": "ok", "timestamp": 1573003030600, "user": { "displayName": "Anthony So", "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64", "userId": "11809607246124237079" }, "user_tz": -660 }, "id": "GsDGKLQzTy9O", "outputId": "d89e4bc9-09e9-4f54-d071-b2edc3784914" }, "outputs": [], "source": [ "# Let us transform values for loan data\n", "bankData['loanTran'] = 1\n", "# Giving a weight of 5 if there is no loan\n", "bankData.loc[bankData['loan'] == 'no', 'loanTran'] = 5\n", "bankData.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 224 }, "colab_type": "code", "executionInfo": { "elapsed": 1415, "status": "ok", "timestamp": 1573003040146, "user": { "displayName": "Anthony So", "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64", "userId": "11809607246124237079" }, "user_tz": -660 }, "id": "tPrwaWORT2wt", "outputId": "3687fb10-b25c-4ad4-d818-8f0ba4b6965c" }, "outputs": [], "source": [ "# Let us transform values for Housing data\n", "bankData['houseTran'] = 5\n", "# Giving a weight of 1 if the customer has a house\n", "bankData.loc[bankData['housing'] == 'no', 'houseTran'] = 1\n", "\n", "bankData.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 224 }, "colab_type": "code", "executionInfo": { "elapsed": 1023, "status": "ok", "timestamp": 1573003043755, "user": { "displayName": "Anthony So", "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64", "userId": "11809607246124237079" }, "user_tz": -660 }, "id": "C3a2zadTUGLq", "outputId": "ce60c876-3080-43c3-b3fa-8e44d17a123f" }, "outputs": [], "source": [ "# Let us now create the new variable which is a product of all these\n", "bankData['assetIndex'] = bankData['balanceTran'] * bankData['loanTran'] * bankData['houseTran']\n", "bankData.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "colab_type": "code", "executionInfo": { "elapsed": 1199, "status": "ok", "timestamp": 1573003063439, "user": { "displayName": "Anthony So", "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64", "userId": "11809607246124237079" }, "user_tz": -660 }, "id": "I2eFF9GLUSn0", "outputId": "68c4ab47-ac24-4e42-9588-b6414d6826fd" }, "outputs": [], "source": [ "# Finding the quantile\n", "np.quantile(bankData['assetIndex'],[0.25,0.5,0.75])" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 224 }, "colab_type": "code", "executionInfo": { "elapsed": 1500, "status": "ok", "timestamp": 1573003081578, "user": { "displayName": "Anthony So", "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64", "userId": "11809607246124237079" }, "user_tz": -660 }, "id": "dklJXEaNUbf7", "outputId": "646550ac-4c22-494c-a792-fa8e5c7d9b97" }, "outputs": [], "source": [ "# Creating quantiles from the assetindex data\n", "bankData['assetClass'] = 'Quant1'\n", "\n", "bankData.loc[(bankData['assetIndex'] > 0.38) & (bankData['assetIndex'] < 0.57), 'assetClass'] = 'Quant2'\n", "\n", "bankData.loc[(bankData['assetIndex'] > 0.57) & (bankData['assetIndex'] < 1.9), 'assetClass'] = 'Quant3'\n", "\n", "bankData.loc[bankData['assetIndex'] > 1.9, 'assetClass'] = 'Quant4'\n", "\n", "bankData.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "JAt_inPjSo75" }, "outputs": [], "source": [ "# Calculating total of each asset class\n", "assetTot = bankData.groupby('assetClass')['y'].agg(assetTot='count').reset_index()\n", "# Calculating the category wise counts\n", "assetProp = bankData.groupby(['assetClass', 'y'])['y'].agg(assetCat='count').reset_index()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 297 }, "colab_type": "code", "executionInfo": { "elapsed": 1143, "status": "ok", "timestamp": 1573003235697, "user": { "displayName": "Anthony So", "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64", "userId": "11809607246124237079" }, "user_tz": -660 }, "id": "tTGT5nBGSzgP", "outputId": "5a262e17-17ba-420e-ec7d-e9b470723f7f" }, "outputs": [], "source": [ "# Merging both the data frames\n", "assetComb = pd.merge(assetProp, assetTot, on=['assetClass'])\n", "assetComb['catProp'] = (assetComb.assetCat / assetComb.assetTot)*100\n", "assetComb" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "iJG_qZRzlVc1" }, "outputs": [], "source": [ "# Categorical variables, removing loan and housing\n", "bankCat1 = pd.get_dummies(bankData[['job','marital','education','default','contact','month','poutcome']])" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 204 }, "colab_type": "code", "executionInfo": { "elapsed": 1007, "status": "ok", "timestamp": 1573003254182, "user": { "displayName": "Anthony So", "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64", "userId": "11809607246124237079" }, "user_tz": -660 }, "id": "N6j-jur1ljLP", "outputId": "567676f2-e3e5-48cf-8308-6a7276c09051" }, "outputs": [], "source": [ "bankNum1 = bankData[['age','day','duration','campaign','pdays','previous','assetIndex']]\n", "bankNum1.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "rhbHeJ3PlyiM" }, "outputs": [], "source": [ "# Normalise some of the numerical variables\n", "from sklearn import preprocessing" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "zG-wd9qgl4xW" }, "outputs": [], "source": [ "# Creating the scaling function\n", "minmaxScaler = preprocessing.MinMaxScaler()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "3wDY6fPfmAki" }, "outputs": [], "source": [ "# Creating the transformation variables\n", "ageT1 = bankNum1[['age']].values.astype(float)\n", "dayT1 = bankNum1[['day']].values.astype(float)\n", "durT1 = bankNum1[['duration']].values.astype(float)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 323 }, "colab_type": "code", "executionInfo": { "elapsed": 1121, "status": "ok", "timestamp": 1573003270320, "user": { "displayName": "Anthony So", "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64", "userId": "11809607246124237079" }, "user_tz": -660 }, "id": "5MT1CP24mDBx", "outputId": "628d9b83-87b0-476a-9331-6a09ed5eb071" }, "outputs": [], "source": [ "# Transforming the balance data by normalising it with minmaxScalre\n", "bankNum1['ageTran'] = minmaxScaler.fit_transform(ageT1)\n", "bankNum1['dayTran'] = minmaxScaler.fit_transform(dayT1)\n", "bankNum1['durTran'] = minmaxScaler.fit_transform(durT1)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 204 }, "colab_type": "code", "executionInfo": { "elapsed": 1345, "status": "ok", "timestamp": 1573003276269, "user": { "displayName": "Anthony So", "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64", "userId": "11809607246124237079" }, "user_tz": -660 }, "id": "L2LfnZDWmNJ3", "outputId": "6e72f0da-1dfc-4376-c9e5-c4a261e265f6" }, "outputs": [], "source": [ "# Let us create a new numerical variable by selecting the transformed variables\n", "bankNum2 = bankNum1[['ageTran','dayTran','durTran','campaign','pdays','previous','assetIndex']]\n", "\n", "# Printing the head of the data\n", "bankNum2.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 275 }, "colab_type": "code", "executionInfo": { "elapsed": 1430, "status": "ok", "timestamp": 1573003278578, "user": { "displayName": "Anthony So", "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64", "userId": "11809607246124237079" }, "user_tz": -660 }, "id": "hD6suIpqmYTd", "outputId": "58fc69cb-ff8b-483e-8286-5b3f863a7f2f" }, "outputs": [], "source": [ "# Preparing the X variables\n", "X = pd.concat([bankCat1, bankNum2], axis=1)\n", "print(X.shape)\n", "# Preparing the Y variable\n", "Y = bankData['y']\n", "print(Y.shape)\n", "X.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "4c7Jyxymmiaj" }, "outputs": [], "source": [ "\n", "from sklearn.model_selection import train_test_split\n", "# Splitting the data into train and test sets\n", "X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=123)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 156 }, "colab_type": "code", "executionInfo": { "elapsed": 1265, "status": "ok", "timestamp": 1573003281602, "user": { "displayName": "Anthony So", "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64", "userId": "11809607246124237079" }, "user_tz": -660 }, "id": "EVSGc5Lom2vj", "outputId": "28442c3a-4832-4df5-f84d-3d5ef637ed02" }, "outputs": [], "source": [ "from sklearn.linear_model import LogisticRegression\n", "# Defining the LogisticRegression function\n", "bankModel = LogisticRegression()\n", "bankModel.fit(X_train, y_train)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "colab_type": "code", "executionInfo": { "elapsed": 838, "status": "ok", "timestamp": 1573003282951, "user": { "displayName": "Anthony So", "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64", "userId": "11809607246124237079" }, "user_tz": -660 }, "id": "GhFXSRnknr3p", "outputId": "e88952b2-01bc-497d-be39-8c2ad85f00bc" }, "outputs": [], "source": [ "pred = bankModel.predict(X_test)\n", "print('Accuracy of Logistic regression model prediction on test set: {:.2f}'.format(bankModel.score(X_test, y_test)))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 51 }, "colab_type": "code", "executionInfo": { "elapsed": 1127, "status": "ok", "timestamp": 1573003284105, "user": { "displayName": "Anthony So", "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64", "userId": "11809607246124237079" }, "user_tz": -660 }, "id": "DVBvXZJknuJ3", "outputId": "d865240b-fdbc-4526-bd3f-01b9e42792de" }, "outputs": [], "source": [ "# Confusion Matrix for the model\n", "from sklearn.metrics import confusion_matrix\n", "confusionMatrix = confusion_matrix(y_test, pred)\n", "print(confusionMatrix)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 170 }, "colab_type": "code", "executionInfo": { "elapsed": 1461, "status": "ok", "timestamp": 1573003287122, "user": { "displayName": "Anthony So", "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64", "userId": "11809607246124237079" }, "user_tz": -660 }, "id": "BqcxgdLun0HY", "outputId": "2956690b-bee6-42db-826b-670f4c47d5ae" }, "outputs": [], "source": [ "from sklearn.metrics import classification_report\n", "print(classification_report(y_test, pred))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "colab": { "collapsed_sections": [], "name": "Activity3.02_updated.ipynb", "provenance": [] }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.6" } }, "nbformat": 4, "nbformat_minor": 1 }