mirror of
https://github.com/fenago/data-science.git
synced 2026-05-04 08:31:59 +00:00
257 lines
6.3 KiB
Plaintext
257 lines
6.3 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "Qq90W1oIrv1j"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"import pandas as pd\n",
|
|
"import altair as alt\n",
|
|
"\n",
|
|
"import warnings\n",
|
|
"warnings.filterwarnings(\"ignore\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 204
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 1375,
|
|
"status": "ok",
|
|
"timestamp": 1572999445313,
|
|
"user": {
|
|
"displayName": "Anthony So",
|
|
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
|
|
"userId": "11809607246124237079"
|
|
},
|
|
"user_tz": -660
|
|
},
|
|
"id": "lX4RqHx3rfSd",
|
|
"outputId": "735616a9-a3e3-447e-90c9-8a7b5ac64072"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"file_url = '../bank-full.csv'\n",
|
|
"bankData = pd.read_csv(file_url, sep=\";\")\n",
|
|
"bankData.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 204
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 1371,
|
|
"status": "ok",
|
|
"timestamp": 1572999445314,
|
|
"user": {
|
|
"displayName": "Anthony So",
|
|
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
|
|
"userId": "11809607246124237079"
|
|
},
|
|
"user_tz": -660
|
|
},
|
|
"id": "Rz80uwq9AVhf",
|
|
"outputId": "013dcd08-9f5c-4305-913e-c66057a3c881"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"filter_mask = bankData['y'] == 'yes'\n",
|
|
"bankSub1 = bankData[filter_mask].groupby('age')['y'].agg(agegrp='count').reset_index()\n",
|
|
"bankSub1.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 368
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 1364,
|
|
"status": "ok",
|
|
"timestamp": 1572999445314,
|
|
"user": {
|
|
"displayName": "Anthony So",
|
|
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
|
|
"userId": "11809607246124237079"
|
|
},
|
|
"user_tz": -660
|
|
},
|
|
"id": "0gLl4ExhDThV",
|
|
"outputId": "ff8f65de-47fe-4318-ebf9-76ebebdc0ec0"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Visualising the relationship using altair\n",
|
|
"alt.Chart(bankSub1).mark_line().encode(x='age', y='agegrp')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 204
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 1358,
|
|
"status": "ok",
|
|
"timestamp": 1572999445315,
|
|
"user": {
|
|
"displayName": "Anthony So",
|
|
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
|
|
"userId": "11809607246124237079"
|
|
},
|
|
"user_tz": -660
|
|
},
|
|
"id": "nEWi-O5nCHtt",
|
|
"outputId": "8f8ed75e-d711-48a2-a2fe-b92a6d06ee31"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Getting another perspective\n",
|
|
"ageTot = bankData.groupby('age')['y'].agg(ageTot='count').reset_index()\n",
|
|
"ageTot.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 204
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 1353,
|
|
"status": "ok",
|
|
"timestamp": 1572999445316,
|
|
"user": {
|
|
"displayName": "Anthony So",
|
|
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
|
|
"userId": "11809607246124237079"
|
|
},
|
|
"user_tz": -660
|
|
},
|
|
"id": "AcVBmjvLCk3O",
|
|
"outputId": "5ccf99b4-488f-4de8-aa99-fc154fbbe408"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Getting all the details in one place\n",
|
|
"ageProp = bankData.groupby(['age','y'])['y'].agg(ageCat='count').reset_index()\n",
|
|
"ageProp.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 204
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 1347,
|
|
"status": "ok",
|
|
"timestamp": 1572999445316,
|
|
"user": {
|
|
"displayName": "Anthony So",
|
|
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
|
|
"userId": "11809607246124237079"
|
|
},
|
|
"user_tz": -660
|
|
},
|
|
"id": "q0j3PhBtCv_O",
|
|
"outputId": "03a37cb9-4af7-44b6-9f20-8de6abcdee31"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Merging both the data frames\n",
|
|
"ageComb = pd.merge(ageProp, ageTot,left_on = ['age'], right_on = ['age'])\n",
|
|
"ageComb['catProp'] = (ageComb.ageCat/ageComb.ageTot)*100\n",
|
|
"ageComb.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 418
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 1342,
|
|
"status": "ok",
|
|
"timestamp": 1572999445317,
|
|
"user": {
|
|
"displayName": "Anthony So",
|
|
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
|
|
"userId": "11809607246124237079"
|
|
},
|
|
"user_tz": -660
|
|
},
|
|
"id": "p-n6MerCEMyf",
|
|
"outputId": "c6b83817-4f8e-474d-99ef-6d3282d43c82"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Visualising the relationship using altair\n",
|
|
"alt.Chart(ageComb).mark_line().encode(x='age', y='catProp').facet(column='y')"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"colab": {
|
|
"collapsed_sections": [],
|
|
"name": "Exercise3.02_updated.ipynb",
|
|
"provenance": []
|
|
},
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.8.6"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 1
|
|
}
|