Files
fenago f3b24b4b7f added
2021-02-07 15:16:01 +05:00

318 lines
8.1 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "3WCj36D5L7_G"
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "HSXgY0ze09cY"
},
"outputs": [],
"source": [
"file_url = '../bank-full.csv'\n",
"bankData = pd.read_csv(file_url, sep=\";\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 204
},
"colab_type": "code",
"executionInfo": {
"elapsed": 1259,
"status": "ok",
"timestamp": 1573001725172,
"user": {
"displayName": "Anthony So",
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
"userId": "11809607246124237079"
},
"user_tz": -660
},
"id": "ecnqNxm0TZay",
"outputId": "58e9073a-cdaf-4ce6-a6ce-7d318d225e13"
},
"outputs": [],
"source": [
"# Normalising data\n",
"from sklearn import preprocessing\n",
"x = bankData[['balance']].values.astype(float)\n",
"# Creating the scaling function\n",
"minmaxScaler = preprocessing.MinMaxScaler()\n",
"# Transforming the balance data by normalising it with minmaxScalre\n",
"bankData['balanceTran'] = minmaxScaler.fit_transform(x)\n",
"# Printing the head of the data\n",
"bankData.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "oISJ1v9sTg_S"
},
"outputs": [],
"source": [
"# Adding a small numerical constant to eliminate 0 values\n",
"\n",
"bankData['balanceTran'] = bankData['balanceTran'] + 0.00001"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 224
},
"colab_type": "code",
"executionInfo": {
"elapsed": 1635,
"status": "ok",
"timestamp": 1573001725560,
"user": {
"displayName": "Anthony So",
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
"userId": "11809607246124237079"
},
"user_tz": -660
},
"id": "GsDGKLQzTy9O",
"outputId": "57d18d01-8c21-4de4-a014-15d7579f7572"
},
"outputs": [],
"source": [
"# Let us transform values for loan data\n",
"bankData['loanTran'] = 1\n",
"# Giving a weight of 5 if there is no loan\n",
"bankData.loc[bankData['loan'] == 'no', 'loanTran'] = 5\n",
"bankData.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 224
},
"colab_type": "code",
"executionInfo": {
"elapsed": 1629,
"status": "ok",
"timestamp": 1573001725561,
"user": {
"displayName": "Anthony So",
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
"userId": "11809607246124237079"
},
"user_tz": -660
},
"id": "tPrwaWORT2wt",
"outputId": "0f1971e0-ee7e-4db8-82b0-ebb40a642b60"
},
"outputs": [],
"source": [
"# Let us transform values for Housing data\n",
"bankData['houseTran'] = 5\n",
"# Giving a weight of 1 if the customer has a house\n",
"bankData.loc[bankData['housing'] == 'no', 'houseTran'] = 1\n",
"\n",
"bankData.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 224
},
"colab_type": "code",
"executionInfo": {
"elapsed": 1623,
"status": "ok",
"timestamp": 1573001725561,
"user": {
"displayName": "Anthony So",
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
"userId": "11809607246124237079"
},
"user_tz": -660
},
"id": "C3a2zadTUGLq",
"outputId": "173f3468-48c8-46f3-f769-467eddce1911"
},
"outputs": [],
"source": [
"# Let us now create the new variable which is a product of all these\n",
"bankData['assetIndex'] = bankData['balanceTran'] * bankData['loanTran'] * bankData['houseTran']\n",
"bankData.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"colab_type": "code",
"executionInfo": {
"elapsed": 1618,
"status": "ok",
"timestamp": 1573001725562,
"user": {
"displayName": "Anthony So",
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
"userId": "11809607246124237079"
},
"user_tz": -660
},
"id": "I2eFF9GLUSn0",
"outputId": "0581c004-bd06-4072-dadf-01fed00e370e"
},
"outputs": [],
"source": [
"# Finding the quantile\n",
"np.quantile(bankData['assetIndex'],[0.25,0.5,0.75])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 224
},
"colab_type": "code",
"executionInfo": {
"elapsed": 1609,
"status": "ok",
"timestamp": 1573001725562,
"user": {
"displayName": "Anthony So",
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
"userId": "11809607246124237079"
},
"user_tz": -660
},
"id": "dklJXEaNUbf7",
"outputId": "8f5b4dbb-f9d4-4db9-d1fd-b92533c5c238"
},
"outputs": [],
"source": [
"# Creating quantiles from the assetindex data\n",
"bankData['assetClass'] = 'Quant1'\n",
"\n",
"bankData.loc[(bankData['assetIndex'] > 0.38) & (bankData['assetIndex'] < 0.57), 'assetClass'] = 'Quant2'\n",
"\n",
"bankData.loc[(bankData['assetIndex'] > 0.57) & (bankData['assetIndex'] < 1.9), 'assetClass'] = 'Quant3'\n",
"\n",
"bankData.loc[bankData['assetIndex'] > 1.9, 'assetClass'] = 'Quant4'\n",
"\n",
"bankData.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "HFr4R4pBMKZ0"
},
"outputs": [],
"source": [
"# Calculating total of each asset class\n",
"assetTot = bankData.groupby('assetClass')['y'].agg(assetTot='count').reset_index()\n",
"# Calculating the category wise counts\n",
"assetProp = bankData.groupby(['assetClass', 'y'])['y'].agg(assetCat='count').reset_index()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 297
},
"colab_type": "code",
"executionInfo": {
"elapsed": 1598,
"status": "ok",
"timestamp": 1573001725563,
"user": {
"displayName": "Anthony So",
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
"userId": "11809607246124237079"
},
"user_tz": -660
},
"id": "28dy-0arMjIx",
"outputId": "2686a10a-9471-4b17-c322-a8fa512af776"
},
"outputs": [],
"source": [
"# Merging both the data frames\n",
"assetComb = pd.merge(assetProp, assetTot, on = ['assetClass'])\n",
"assetComb['catProp'] = (assetComb.assetCat / assetComb.assetTot)*100\n",
"assetComb"
]
}
],
"metadata": {
"colab": {
"collapsed_sections": [],
"name": "Exercise3.04_updated.ipynb",
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.6"
}
},
"nbformat": 4,
"nbformat_minor": 1
}