Files
Your Name 54ccb1423f added
2021-02-08 11:17:02 +00:00

257 lines
6.3 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "Qq90W1oIrv1j"
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import altair as alt\n",
"\n",
"import warnings\n",
"warnings.filterwarnings(\"ignore\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 204
},
"colab_type": "code",
"executionInfo": {
"elapsed": 1375,
"status": "ok",
"timestamp": 1572999445313,
"user": {
"displayName": "Anthony So",
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
"userId": "11809607246124237079"
},
"user_tz": -660
},
"id": "lX4RqHx3rfSd",
"outputId": "735616a9-a3e3-447e-90c9-8a7b5ac64072"
},
"outputs": [],
"source": [
"file_url = '../bank-full.csv'\n",
"bankData = pd.read_csv(file_url, sep=\";\")\n",
"bankData.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 204
},
"colab_type": "code",
"executionInfo": {
"elapsed": 1371,
"status": "ok",
"timestamp": 1572999445314,
"user": {
"displayName": "Anthony So",
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
"userId": "11809607246124237079"
},
"user_tz": -660
},
"id": "Rz80uwq9AVhf",
"outputId": "013dcd08-9f5c-4305-913e-c66057a3c881"
},
"outputs": [],
"source": [
"filter_mask = bankData['y'] == 'yes'\n",
"bankSub1 = bankData[filter_mask].groupby('age')['y'].agg(agegrp='count').reset_index()\n",
"bankSub1.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 368
},
"colab_type": "code",
"executionInfo": {
"elapsed": 1364,
"status": "ok",
"timestamp": 1572999445314,
"user": {
"displayName": "Anthony So",
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
"userId": "11809607246124237079"
},
"user_tz": -660
},
"id": "0gLl4ExhDThV",
"outputId": "ff8f65de-47fe-4318-ebf9-76ebebdc0ec0"
},
"outputs": [],
"source": [
"# Visualising the relationship using altair\n",
"alt.Chart(bankSub1).mark_line().encode(x='age', y='agegrp')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 204
},
"colab_type": "code",
"executionInfo": {
"elapsed": 1358,
"status": "ok",
"timestamp": 1572999445315,
"user": {
"displayName": "Anthony So",
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
"userId": "11809607246124237079"
},
"user_tz": -660
},
"id": "nEWi-O5nCHtt",
"outputId": "8f8ed75e-d711-48a2-a2fe-b92a6d06ee31"
},
"outputs": [],
"source": [
"# Getting another perspective\n",
"ageTot = bankData.groupby('age')['y'].agg(ageTot='count').reset_index()\n",
"ageTot.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 204
},
"colab_type": "code",
"executionInfo": {
"elapsed": 1353,
"status": "ok",
"timestamp": 1572999445316,
"user": {
"displayName": "Anthony So",
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
"userId": "11809607246124237079"
},
"user_tz": -660
},
"id": "AcVBmjvLCk3O",
"outputId": "5ccf99b4-488f-4de8-aa99-fc154fbbe408"
},
"outputs": [],
"source": [
"# Getting all the details in one place\n",
"ageProp = bankData.groupby(['age','y'])['y'].agg(ageCat='count').reset_index()\n",
"ageProp.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 204
},
"colab_type": "code",
"executionInfo": {
"elapsed": 1347,
"status": "ok",
"timestamp": 1572999445316,
"user": {
"displayName": "Anthony So",
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
"userId": "11809607246124237079"
},
"user_tz": -660
},
"id": "q0j3PhBtCv_O",
"outputId": "03a37cb9-4af7-44b6-9f20-8de6abcdee31"
},
"outputs": [],
"source": [
"# Merging both the data frames\n",
"ageComb = pd.merge(ageProp, ageTot,left_on = ['age'], right_on = ['age'])\n",
"ageComb['catProp'] = (ageComb.ageCat/ageComb.ageTot)*100\n",
"ageComb.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 418
},
"colab_type": "code",
"executionInfo": {
"elapsed": 1342,
"status": "ok",
"timestamp": 1572999445317,
"user": {
"displayName": "Anthony So",
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
"userId": "11809607246124237079"
},
"user_tz": -660
},
"id": "p-n6MerCEMyf",
"outputId": "c6b83817-4f8e-474d-99ef-6d3282d43c82"
},
"outputs": [],
"source": [
"# Visualising the relationship using altair\n",
"alt.Chart(ageComb).mark_line().encode(x='age', y='catProp').facet(column='y')"
]
}
],
"metadata": {
"colab": {
"collapsed_sections": [],
"name": "Exercise3.02_updated.ipynb",
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.6"
}
},
"nbformat": 4,
"nbformat_minor": 1
}