{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "3WCj36D5L7_G" }, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "HSXgY0ze09cY" }, "outputs": [], "source": [ "file_url = '../bank-full.csv'\n", "bankData = pd.read_csv(file_url, sep=\";\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 204 }, "colab_type": "code", "executionInfo": { "elapsed": 1259, "status": "ok", "timestamp": 1573001725172, "user": { "displayName": "Anthony So", "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64", "userId": "11809607246124237079" }, "user_tz": -660 }, "id": "ecnqNxm0TZay", "outputId": "58e9073a-cdaf-4ce6-a6ce-7d318d225e13" }, "outputs": [], "source": [ "# Normalising data\n", "from sklearn import preprocessing\n", "x = bankData[['balance']].values.astype(float)\n", "# Creating the scaling function\n", "minmaxScaler = preprocessing.MinMaxScaler()\n", "# Transforming the balance data by normalising it with minmaxScalre\n", "bankData['balanceTran'] = minmaxScaler.fit_transform(x)\n", "# Printing the head of the data\n", "bankData.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "oISJ1v9sTg_S" }, "outputs": [], "source": [ "# Adding a small numerical constant to eliminate 0 values\n", "\n", "bankData['balanceTran'] = bankData['balanceTran'] + 0.00001" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 224 }, "colab_type": "code", "executionInfo": { "elapsed": 1635, "status": "ok", "timestamp": 1573001725560, "user": { "displayName": "Anthony So", "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64", "userId": "11809607246124237079" }, "user_tz": -660 }, "id": "GsDGKLQzTy9O", "outputId": "57d18d01-8c21-4de4-a014-15d7579f7572" }, "outputs": [], "source": [ "# Let us transform values for loan data\n", "bankData['loanTran'] = 1\n", "# Giving a weight of 5 if there is no loan\n", "bankData.loc[bankData['loan'] == 'no', 'loanTran'] = 5\n", "bankData.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 224 }, "colab_type": "code", "executionInfo": { "elapsed": 1629, "status": "ok", "timestamp": 1573001725561, "user": { "displayName": "Anthony So", "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64", "userId": "11809607246124237079" }, "user_tz": -660 }, "id": "tPrwaWORT2wt", "outputId": "0f1971e0-ee7e-4db8-82b0-ebb40a642b60" }, "outputs": [], "source": [ "# Let us transform values for Housing data\n", "bankData['houseTran'] = 5\n", "# Giving a weight of 1 if the customer has a house\n", "bankData.loc[bankData['housing'] == 'no', 'houseTran'] = 1\n", "\n", "bankData.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 224 }, "colab_type": "code", "executionInfo": { "elapsed": 1623, "status": "ok", "timestamp": 1573001725561, "user": { "displayName": "Anthony So", "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64", "userId": "11809607246124237079" }, "user_tz": -660 }, "id": "C3a2zadTUGLq", "outputId": "173f3468-48c8-46f3-f769-467eddce1911" }, "outputs": [], "source": [ "# Let us now create the new variable which is a product of all these\n", "bankData['assetIndex'] = bankData['balanceTran'] * bankData['loanTran'] * bankData['houseTran']\n", "bankData.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "colab_type": "code", "executionInfo": { "elapsed": 1618, "status": "ok", "timestamp": 1573001725562, "user": { "displayName": "Anthony So", "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64", "userId": "11809607246124237079" }, "user_tz": -660 }, "id": "I2eFF9GLUSn0", "outputId": "0581c004-bd06-4072-dadf-01fed00e370e" }, "outputs": [], "source": [ "# Finding the quantile\n", "np.quantile(bankData['assetIndex'],[0.25,0.5,0.75])" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 224 }, "colab_type": "code", "executionInfo": { "elapsed": 1609, "status": "ok", "timestamp": 1573001725562, "user": { "displayName": "Anthony So", "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64", "userId": "11809607246124237079" }, "user_tz": -660 }, "id": "dklJXEaNUbf7", "outputId": "8f5b4dbb-f9d4-4db9-d1fd-b92533c5c238" }, "outputs": [], "source": [ "# Creating quantiles from the assetindex data\n", "bankData['assetClass'] = 'Quant1'\n", "\n", "bankData.loc[(bankData['assetIndex'] > 0.38) & (bankData['assetIndex'] < 0.57), 'assetClass'] = 'Quant2'\n", "\n", "bankData.loc[(bankData['assetIndex'] > 0.57) & (bankData['assetIndex'] < 1.9), 'assetClass'] = 'Quant3'\n", "\n", "bankData.loc[bankData['assetIndex'] > 1.9, 'assetClass'] = 'Quant4'\n", "\n", "bankData.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "HFr4R4pBMKZ0" }, "outputs": [], "source": [ "# Calculating total of each asset class\n", "assetTot = bankData.groupby('assetClass')['y'].agg(assetTot='count').reset_index()\n", "# Calculating the category wise counts\n", "assetProp = bankData.groupby(['assetClass', 'y'])['y'].agg(assetCat='count').reset_index()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 297 }, "colab_type": "code", "executionInfo": { "elapsed": 1598, "status": "ok", "timestamp": 1573001725563, "user": { "displayName": "Anthony So", "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64", "userId": "11809607246124237079" }, "user_tz": -660 }, "id": "28dy-0arMjIx", "outputId": "2686a10a-9471-4b17-c322-a8fa512af776" }, "outputs": [], "source": [ "# Merging both the data frames\n", "assetComb = pd.merge(assetProp, assetTot, on = ['assetClass'])\n", "assetComb['catProp'] = (assetComb.assetCat / assetComb.assetTot)*100\n", "assetComb" ] } ], "metadata": { "colab": { "collapsed_sections": [], "name": "Exercise3.04_updated.ipynb", "provenance": [] }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.6" } }, "nbformat": 4, "nbformat_minor": 1 }