{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "Qq90W1oIrv1j" }, "outputs": [], "source": [ "import pandas as pd\n", "import altair as alt\n", "\n", "import warnings\n", "warnings.filterwarnings(\"ignore\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 204 }, "colab_type": "code", "executionInfo": { "elapsed": 1375, "status": "ok", "timestamp": 1572999445313, "user": { "displayName": "Anthony So", "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64", "userId": "11809607246124237079" }, "user_tz": -660 }, "id": "lX4RqHx3rfSd", "outputId": "735616a9-a3e3-447e-90c9-8a7b5ac64072" }, "outputs": [], "source": [ "file_url = '../bank-full.csv'\n", "bankData = pd.read_csv(file_url, sep=\";\")\n", "bankData.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 204 }, "colab_type": "code", "executionInfo": { "elapsed": 1371, "status": "ok", "timestamp": 1572999445314, "user": { "displayName": "Anthony So", "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64", "userId": "11809607246124237079" }, "user_tz": -660 }, "id": "Rz80uwq9AVhf", "outputId": "013dcd08-9f5c-4305-913e-c66057a3c881" }, "outputs": [], "source": [ "filter_mask = bankData['y'] == 'yes'\n", "bankSub1 = bankData[filter_mask].groupby('age')['y'].agg(agegrp='count').reset_index()\n", "bankSub1.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 368 }, "colab_type": "code", "executionInfo": { "elapsed": 1364, "status": "ok", "timestamp": 1572999445314, "user": { "displayName": "Anthony So", "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64", "userId": "11809607246124237079" }, "user_tz": -660 }, "id": "0gLl4ExhDThV", "outputId": "ff8f65de-47fe-4318-ebf9-76ebebdc0ec0" }, "outputs": [], "source": [ "# Visualising the relationship using altair\n", "alt.Chart(bankSub1).mark_line().encode(x='age', y='agegrp')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 204 }, "colab_type": "code", "executionInfo": { "elapsed": 1358, "status": "ok", "timestamp": 1572999445315, "user": { "displayName": "Anthony So", "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64", "userId": "11809607246124237079" }, "user_tz": -660 }, "id": "nEWi-O5nCHtt", "outputId": "8f8ed75e-d711-48a2-a2fe-b92a6d06ee31" }, "outputs": [], "source": [ "# Getting another perspective\n", "ageTot = bankData.groupby('age')['y'].agg(ageTot='count').reset_index()\n", "ageTot.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 204 }, "colab_type": "code", "executionInfo": { "elapsed": 1353, "status": "ok", "timestamp": 1572999445316, "user": { "displayName": "Anthony So", "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64", "userId": "11809607246124237079" }, "user_tz": -660 }, "id": "AcVBmjvLCk3O", "outputId": "5ccf99b4-488f-4de8-aa99-fc154fbbe408" }, "outputs": [], "source": [ "# Getting all the details in one place\n", "ageProp = bankData.groupby(['age','y'])['y'].agg(ageCat='count').reset_index()\n", "ageProp.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 204 }, "colab_type": "code", "executionInfo": { "elapsed": 1347, "status": "ok", "timestamp": 1572999445316, "user": { "displayName": "Anthony So", "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64", "userId": "11809607246124237079" }, "user_tz": -660 }, "id": "q0j3PhBtCv_O", "outputId": "03a37cb9-4af7-44b6-9f20-8de6abcdee31" }, "outputs": [], "source": [ "# Merging both the data frames\n", "ageComb = pd.merge(ageProp, ageTot,left_on = ['age'], right_on = ['age'])\n", "ageComb['catProp'] = (ageComb.ageCat/ageComb.ageTot)*100\n", "ageComb.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 418 }, "colab_type": "code", "executionInfo": { "elapsed": 1342, "status": "ok", "timestamp": 1572999445317, "user": { "displayName": "Anthony So", "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64", "userId": "11809607246124237079" }, "user_tz": -660 }, "id": "p-n6MerCEMyf", "outputId": "c6b83817-4f8e-474d-99ef-6d3282d43c82" }, "outputs": [], "source": [ "# Visualising the relationship using altair\n", "alt.Chart(ageComb).mark_line().encode(x='age', y='catProp').facet(column='y')" ] } ], "metadata": { "colab": { "collapsed_sections": [], "name": "Exercise3.02_updated.ipynb", "provenance": [] }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.6" } }, "nbformat": 4, "nbformat_minor": 1 }