mirror of
https://github.com/fenago/data-science.git
synced 2026-05-10 03:21:17 +00:00
291 lines
7.5 KiB
Plaintext
291 lines
7.5 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "aUF-lH3fFBsa"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"import pandas as pd"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 204
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 2336,
|
|
"status": "ok",
|
|
"timestamp": 1573001775617,
|
|
"user": {
|
|
"displayName": "Anthony So",
|
|
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
|
|
"userId": "11809607246124237079"
|
|
},
|
|
"user_tz": -660
|
|
},
|
|
"id": "8JSOq_4DFCKB",
|
|
"outputId": "97e1bf86-b8b7-4b63-b795-0a771d1e0336"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"file_url = '../bank-full.csv'\n",
|
|
"bankData = pd.read_csv(file_url, sep=\";\")\n",
|
|
"bankData.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 173
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 2328,
|
|
"status": "ok",
|
|
"timestamp": 1573001775618,
|
|
"user": {
|
|
"displayName": "Anthony So",
|
|
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
|
|
"userId": "11809607246124237079"
|
|
},
|
|
"user_tz": -660
|
|
},
|
|
"id": "5_K4qSskFOiX",
|
|
"outputId": "4f6d26d9-9d9c-41fb-bb4c-bec08a74da4c"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Relationship between housing and propensity for term deposits\n",
|
|
"bankData.groupby(['housing', 'y'])['y'].agg(houseTot='count').reset_index()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 173
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 2322,
|
|
"status": "ok",
|
|
"timestamp": 1573001775619,
|
|
"user": {
|
|
"displayName": "Anthony So",
|
|
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
|
|
"userId": "11809607246124237079"
|
|
},
|
|
"user_tz": -660
|
|
},
|
|
"id": "HBJQtKjUFfC1",
|
|
"outputId": "42939e01-5c7f-4e13-8f93-a97fba2d62bb"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Relationship between having a loan and propensity for term deposits\n",
|
|
"bankData.groupby(['loan', 'y'])['y'].agg(loanTot='count').reset_index()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 34
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 2315,
|
|
"status": "ok",
|
|
"timestamp": 1573001775619,
|
|
"user": {
|
|
"displayName": "Anthony So",
|
|
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
|
|
"userId": "11809607246124237079"
|
|
},
|
|
"user_tz": -660
|
|
},
|
|
"id": "ktpNZGC2Qz70",
|
|
"outputId": "ca650459-4e00-427e-e913-c84e46d4e26b"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Taking the quantiles for 25%, 50% and 75% of the balance data\n",
|
|
"import numpy as np\n",
|
|
"np.quantile(bankData['balance'],[0.25,0.5,0.75])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 204
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 2309,
|
|
"status": "ok",
|
|
"timestamp": 1573001775620,
|
|
"user": {
|
|
"displayName": "Anthony So",
|
|
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
|
|
"userId": "11809607246124237079"
|
|
},
|
|
"user_tz": -660
|
|
},
|
|
"id": "BbcbCowlRCsc",
|
|
"outputId": "6bdb3a5c-42d4-4566-c04a-929bf614db1b"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Creating new features for bank data based on the quantile values\n",
|
|
"\n",
|
|
"bankData['balanceClass'] = 'Quant1'\n",
|
|
"\n",
|
|
"bankData.loc[(bankData['balance'] > 72) & (bankData['balance'] < 448), 'balanceClass'] = 'Quant2'\n",
|
|
"\n",
|
|
"bankData.loc[(bankData['balance'] > 448) & (bankData['balance'] < 1428), 'balanceClass'] = 'Quant3'\n",
|
|
"\n",
|
|
"bankData.loc[bankData['balance'] > 1428, 'balanceClass'] = 'Quant4'\n",
|
|
"\n",
|
|
"bankData.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 173
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 2303,
|
|
"status": "ok",
|
|
"timestamp": 1573001775621,
|
|
"user": {
|
|
"displayName": "Anthony So",
|
|
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
|
|
"userId": "11809607246124237079"
|
|
},
|
|
"user_tz": -660
|
|
},
|
|
"id": "fGVxqFC3FzTL",
|
|
"outputId": "1ec443fa-32ee-4482-e620-1f8d27091e0a"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Calculating the customers under each quantile \n",
|
|
"balanceTot = bankData.groupby(['balanceClass'])['y'].agg(balanceTot='count').reset_index()\n",
|
|
"balanceTot"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 297
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 2294,
|
|
"status": "ok",
|
|
"timestamp": 1573001775621,
|
|
"user": {
|
|
"displayName": "Anthony So",
|
|
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
|
|
"userId": "11809607246124237079"
|
|
},
|
|
"user_tz": -660
|
|
},
|
|
"id": "vKet2DvBF_p3",
|
|
"outputId": "d56e51b5-ed2e-4d4e-9c97-dcae53e88a03"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Calculating the total customers categorised as per quantile and propensity classification\n",
|
|
"balanceProp = bankData.groupby(['balanceClass', 'y'])['y'].agg(balanceCat='count').reset_index()\n",
|
|
"balanceProp"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 297
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 2289,
|
|
"status": "ok",
|
|
"timestamp": 1573001775622,
|
|
"user": {
|
|
"displayName": "Anthony So",
|
|
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
|
|
"userId": "11809607246124237079"
|
|
},
|
|
"user_tz": -660
|
|
},
|
|
"id": "ljhfRWOAGHKi",
|
|
"outputId": "f0db8c52-9d68-4a77-d5c0-46f86758fd4c"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Merging both the data frames\n",
|
|
"balanceComb = pd.merge(balanceProp, balanceTot, on = ['balanceClass'])\n",
|
|
"balanceComb['catProp'] = (balanceComb.balanceCat / balanceComb.balanceTot)*100\n",
|
|
"balanceComb"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"colab": {
|
|
"collapsed_sections": [],
|
|
"name": "Exercise3.03_updated.ipynb",
|
|
"provenance": []
|
|
},
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.8.6"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 1
|
|
}
|