mirror of
https://github.com/fenago/data-science.git
synced 2026-05-04 08:31:59 +00:00
318 lines
8.1 KiB
Plaintext
318 lines
8.1 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "3WCj36D5L7_G"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"import pandas as pd\n",
|
|
"import numpy as np"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "HSXgY0ze09cY"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"file_url = '../bank-full.csv'\n",
|
|
"bankData = pd.read_csv(file_url, sep=\";\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 204
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 1259,
|
|
"status": "ok",
|
|
"timestamp": 1573001725172,
|
|
"user": {
|
|
"displayName": "Anthony So",
|
|
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
|
|
"userId": "11809607246124237079"
|
|
},
|
|
"user_tz": -660
|
|
},
|
|
"id": "ecnqNxm0TZay",
|
|
"outputId": "58e9073a-cdaf-4ce6-a6ce-7d318d225e13"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Normalising data\n",
|
|
"from sklearn import preprocessing\n",
|
|
"x = bankData[['balance']].values.astype(float)\n",
|
|
"# Creating the scaling function\n",
|
|
"minmaxScaler = preprocessing.MinMaxScaler()\n",
|
|
"# Transforming the balance data by normalising it with minmaxScalre\n",
|
|
"bankData['balanceTran'] = minmaxScaler.fit_transform(x)\n",
|
|
"# Printing the head of the data\n",
|
|
"bankData.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "oISJ1v9sTg_S"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Adding a small numerical constant to eliminate 0 values\n",
|
|
"\n",
|
|
"bankData['balanceTran'] = bankData['balanceTran'] + 0.00001"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 224
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 1635,
|
|
"status": "ok",
|
|
"timestamp": 1573001725560,
|
|
"user": {
|
|
"displayName": "Anthony So",
|
|
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
|
|
"userId": "11809607246124237079"
|
|
},
|
|
"user_tz": -660
|
|
},
|
|
"id": "GsDGKLQzTy9O",
|
|
"outputId": "57d18d01-8c21-4de4-a014-15d7579f7572"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Let us transform values for loan data\n",
|
|
"bankData['loanTran'] = 1\n",
|
|
"# Giving a weight of 5 if there is no loan\n",
|
|
"bankData.loc[bankData['loan'] == 'no', 'loanTran'] = 5\n",
|
|
"bankData.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 224
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 1629,
|
|
"status": "ok",
|
|
"timestamp": 1573001725561,
|
|
"user": {
|
|
"displayName": "Anthony So",
|
|
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
|
|
"userId": "11809607246124237079"
|
|
},
|
|
"user_tz": -660
|
|
},
|
|
"id": "tPrwaWORT2wt",
|
|
"outputId": "0f1971e0-ee7e-4db8-82b0-ebb40a642b60"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Let us transform values for Housing data\n",
|
|
"bankData['houseTran'] = 5\n",
|
|
"# Giving a weight of 1 if the customer has a house\n",
|
|
"bankData.loc[bankData['housing'] == 'no', 'houseTran'] = 1\n",
|
|
"\n",
|
|
"bankData.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 224
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 1623,
|
|
"status": "ok",
|
|
"timestamp": 1573001725561,
|
|
"user": {
|
|
"displayName": "Anthony So",
|
|
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
|
|
"userId": "11809607246124237079"
|
|
},
|
|
"user_tz": -660
|
|
},
|
|
"id": "C3a2zadTUGLq",
|
|
"outputId": "173f3468-48c8-46f3-f769-467eddce1911"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Let us now create the new variable which is a product of all these\n",
|
|
"bankData['assetIndex'] = bankData['balanceTran'] * bankData['loanTran'] * bankData['houseTran']\n",
|
|
"bankData.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 34
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 1618,
|
|
"status": "ok",
|
|
"timestamp": 1573001725562,
|
|
"user": {
|
|
"displayName": "Anthony So",
|
|
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
|
|
"userId": "11809607246124237079"
|
|
},
|
|
"user_tz": -660
|
|
},
|
|
"id": "I2eFF9GLUSn0",
|
|
"outputId": "0581c004-bd06-4072-dadf-01fed00e370e"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Finding the quantile\n",
|
|
"np.quantile(bankData['assetIndex'],[0.25,0.5,0.75])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 224
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 1609,
|
|
"status": "ok",
|
|
"timestamp": 1573001725562,
|
|
"user": {
|
|
"displayName": "Anthony So",
|
|
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
|
|
"userId": "11809607246124237079"
|
|
},
|
|
"user_tz": -660
|
|
},
|
|
"id": "dklJXEaNUbf7",
|
|
"outputId": "8f5b4dbb-f9d4-4db9-d1fd-b92533c5c238"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Creating quantiles from the assetindex data\n",
|
|
"bankData['assetClass'] = 'Quant1'\n",
|
|
"\n",
|
|
"bankData.loc[(bankData['assetIndex'] > 0.38) & (bankData['assetIndex'] < 0.57), 'assetClass'] = 'Quant2'\n",
|
|
"\n",
|
|
"bankData.loc[(bankData['assetIndex'] > 0.57) & (bankData['assetIndex'] < 1.9), 'assetClass'] = 'Quant3'\n",
|
|
"\n",
|
|
"bankData.loc[bankData['assetIndex'] > 1.9, 'assetClass'] = 'Quant4'\n",
|
|
"\n",
|
|
"bankData.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "HFr4R4pBMKZ0"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Calculating total of each asset class\n",
|
|
"assetTot = bankData.groupby('assetClass')['y'].agg(assetTot='count').reset_index()\n",
|
|
"# Calculating the category wise counts\n",
|
|
"assetProp = bankData.groupby(['assetClass', 'y'])['y'].agg(assetCat='count').reset_index()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 297
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 1598,
|
|
"status": "ok",
|
|
"timestamp": 1573001725563,
|
|
"user": {
|
|
"displayName": "Anthony So",
|
|
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
|
|
"userId": "11809607246124237079"
|
|
},
|
|
"user_tz": -660
|
|
},
|
|
"id": "28dy-0arMjIx",
|
|
"outputId": "2686a10a-9471-4b17-c322-a8fa512af776"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Merging both the data frames\n",
|
|
"assetComb = pd.merge(assetProp, assetTot, on = ['assetClass'])\n",
|
|
"assetComb['catProp'] = (assetComb.assetCat / assetComb.assetTot)*100\n",
|
|
"assetComb"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"colab": {
|
|
"collapsed_sections": [],
|
|
"name": "Exercise3.04_updated.ipynb",
|
|
"provenance": []
|
|
},
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.8.6"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 1
|
|
}
|