{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "HEiOAwQPW0qb" }, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "j7whidfaYjns" }, "outputs": [], "source": [ "file_url = '../Dataset/taxstats2015.csv'" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "nFMz2jNVt-xy" }, "outputs": [], "source": [ "df = pd.read_csv(file_url)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "colab_type": "code", "executionInfo": { "elapsed": 1161, "status": "ok", "timestamp": 1574650509015, "user": { "displayName": "Anthony So", "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64", "userId": "11809607246124237079" }, "user_tz": -660 }, "id": "QjJwGSZCMMPb", "outputId": "dc3ca48a-88aa-45bc-90b0-1c8ef1270513" }, "outputs": [], "source": [ "df.shape" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 292 }, "colab_type": "code", "executionInfo": { "elapsed": 1155, "status": "ok", "timestamp": 1574650509016, "user": { "displayName": "Anthony So", "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64", "userId": "11809607246124237079" }, "user_tz": -660 }, "id": "xJwp63DyLPu_", "outputId": "c656a879-7e27-4d18-dce2-90f0718aa72a" }, "outputs": [], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "colab_type": "code", "executionInfo": { "elapsed": 1149, "status": "ok", "timestamp": 1574650509016, "user": { "displayName": "Anthony So", "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64", "userId": "11809607246124237079" }, "user_tz": -660 }, "id": "XB7eVlroMN1r", "outputId": "a5b78ebf-0e36-40bb-f5f3-1a96296eceb9" }, "outputs": [], "source": [ "df['Postcode'].nunique()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "nexqBH1TKHj3" }, "outputs": [], "source": [ "postcode_url = '../Dataset/taxstats2016individual06taxablestatusstateterritorypostcodetaxableincome%20(2).xlsx'" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "4TzHD65-KHnc" }, "outputs": [], "source": [ "postcode_df = pd.read_excel(postcode_url, sheet_name='Individuals Table 6B', header=2)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "colab_type": "code", "executionInfo": { "elapsed": 23926, "status": "ok", "timestamp": 1574650531806, "user": { "displayName": "Anthony So", "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64", "userId": "11809607246124237079" }, "user_tz": -660 }, "id": "Pf9YIlPqL7B9", "outputId": "35ecf9e9-3ba0-41ab-e8e5-95e71c35d80b" }, "outputs": [], "source": [ "postcode_df.shape" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 389 }, "colab_type": "code", "executionInfo": { "elapsed": 23920, "status": "ok", "timestamp": 1574650531807, "user": { "displayName": "Anthony So", "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64", "userId": "11809607246124237079" }, "user_tz": -660 }, "id": "fNx5ZFmuKHqN", "outputId": "f25cd749-5f9a-4e7d-d804-677259b0cda8" }, "outputs": [], "source": [ "postcode_df.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "colab_type": "code", "executionInfo": { "elapsed": 23915, "status": "ok", "timestamp": 1574650531808, "user": { "displayName": "Anthony So", "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64", "userId": "11809607246124237079" }, "user_tz": -660 }, "id": "_RKsEeTAL_Nb", "outputId": "7575e99f-053e-4d55-a853-7ed3bceaeb41" }, "outputs": [], "source": [ "postcode_df['Postcode'].nunique()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "LIIeGKlkMbzL" }, "outputs": [], "source": [ "merged_df = pd.merge(df, postcode_df, how='left', on='Postcode')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "colab_type": "code", "executionInfo": { "elapsed": 23909, "status": "ok", "timestamp": 1574650531809, "user": { "displayName": "Anthony So", "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64", "userId": "11809607246124237079" }, "user_tz": -660 }, "id": "QVlzvk1NMk1M", "outputId": "a0b2e405-e6e4-44fa-f21e-eaf945f78542" }, "outputs": [], "source": [ "merged_df.shape" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "colab_type": "code", "executionInfo": { "elapsed": 23904, "status": "ok", "timestamp": 1574650531810, "user": { "displayName": "Anthony So", "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64", "userId": "11809607246124237079" }, "user_tz": -660 }, "id": "fGlQGlclMuya", "outputId": "95875cba-9fca-4165-f21e-6e4743a8c2ec" }, "outputs": [], "source": [ "merged_df['State/ Territory1'].isna().sum()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 102 }, "colab_type": "code", "executionInfo": { "elapsed": 23900, "status": "ok", "timestamp": 1574650531811, "user": { "displayName": "Anthony So", "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64", "userId": "11809607246124237079" }, "user_tz": -660 }, "id": "TF4kZMZ9qMms", "outputId": "48a97b2a-4151-42b0-e686-3a0f60cf2d24" }, "outputs": [], "source": [ "merged_df.loc[merged_df['State/ Territory1'].isna(), 'Postcode']" ] } ], "metadata": { "colab": { "collapsed_sections": [], "name": "Exercise12_01.ipynb", "provenance": [] }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.6" } }, "nbformat": 4, "nbformat_minor": 1 }