mlessentials/Lab03/Activity3.01/Activity3_01.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "8UMFqsCD0xyF"
   },
   "outputs": [],
   "source": [
    "# Importing necessary packages\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "HSXgY0ze09cY"
   },
   "outputs": [],
   "source": [
    "# Reading the banking data\n",
    "file_url = '../bank-full.csv'\n",
    "bankData = pd.read_csv(file_url, sep=\";\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 421
    },
    "colab_type": "code",
    "id": "Do4xwaIT2LWz",
    "outputId": "0af195a8-74ef-4b21-a55a-88009a6546e5"
   },
   "outputs": [],
   "source": [
    "# Getting the total counts under each job category\n",
    "jobTot = bankData.groupby('job')['y'].agg(jobTot='count').reset_index()\n",
    "jobTot"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "fHptBmjb2Rii"
   },
   "outputs": [],
   "source": [
    "# Getting all the details in one place\n",
    "jobProp = bankData.groupby(['job', 'y'])['y'].agg(jobCat='count').reset_index()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 204
    },
    "colab_type": "code",
    "id": "QOy6VjXEN0Dn",
    "outputId": "729f6f25-8fa6-4cbf-bc88-e310a1465afc"
   },
   "outputs": [],
   "source": [
    "# Merging both the data frames\n",
    "jobComb = pd.merge(jobProp, jobTot, on=['job'])\n",
    "jobComb['catProp'] = (jobComb.jobCat/jobComb.jobTot)*100\n",
    "\n",
    "jobComb.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 342
    },
    "colab_type": "code",
    "id": "lH14UUsSpFO8",
    "outputId": "dbdff402-01e5-4663-8018-c5c2d6180c16"
   },
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "\n",
    "# Create seperate data frames for Yes and No\n",
    "jobcombYes = jobComb[jobComb['y'] == 'yes']\n",
    "jobcombNo = jobComb[jobComb['y'] == 'no']\n",
    "\n",
    "# Get the length of the xaxis labels \n",
    "xlabels = jobTot['job'].nunique()\n",
    "\n",
    "# Get the proportion values \n",
    "jobYes = jobcombYes['catProp'].unique()\n",
    "jobNo = jobcombNo['catProp'].unique()\n",
    "\n",
    "# Arrange the indexes of x asix\n",
    "ind = np.arange(xlabels)\n",
    "\n",
    "# Get the width of each bar\n",
    "width = 0.35  \n",
    "\n",
    "# Getting the plots\n",
    "p1 = plt.bar(ind, jobYes, width)\n",
    "p2 = plt.bar(ind, jobNo, width,bottom=jobYes)\n",
    "\n",
    "plt.ylabel('Propensity Proportion')\n",
    "plt.title('Propensity of purchase by Job')\n",
    "\n",
    "# Defining the x label indexes and y label indexes\n",
    "plt.xticks(ind, jobTot['job'].unique())\n",
    "plt.yticks(np.arange(0, 100, 10))\n",
    "\n",
    "# Defining the legends\n",
    "plt.legend((p1[0], p2[0]), ('Yes', 'No'))\n",
    "\n",
    "# To rotate the axis labels \n",
    "plt.xticks(rotation=90)\n",
    "plt.show()"
   ]
  }
 ],
 "metadata": {
  "colab": {
   "collapsed_sections": [],
   "name": "Activity3.01.ipynb",
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}