Files
fenago f3b24b4b7f added
2021-02-07 15:16:01 +05:00

361 lines
8.0 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "HEiOAwQPW0qb"
},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "j7whidfaYjns"
},
"outputs": [],
"source": [
"file_url = '../Dataset/taxstats2015.csv'"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "nFMz2jNVt-xy"
},
"outputs": [],
"source": [
"df = pd.read_csv(file_url)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"colab_type": "code",
"executionInfo": {
"elapsed": 1161,
"status": "ok",
"timestamp": 1574650509015,
"user": {
"displayName": "Anthony So",
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
"userId": "11809607246124237079"
},
"user_tz": -660
},
"id": "QjJwGSZCMMPb",
"outputId": "dc3ca48a-88aa-45bc-90b0-1c8ef1270513"
},
"outputs": [],
"source": [
"df.shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 292
},
"colab_type": "code",
"executionInfo": {
"elapsed": 1155,
"status": "ok",
"timestamp": 1574650509016,
"user": {
"displayName": "Anthony So",
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
"userId": "11809607246124237079"
},
"user_tz": -660
},
"id": "xJwp63DyLPu_",
"outputId": "c656a879-7e27-4d18-dce2-90f0718aa72a"
},
"outputs": [],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"colab_type": "code",
"executionInfo": {
"elapsed": 1149,
"status": "ok",
"timestamp": 1574650509016,
"user": {
"displayName": "Anthony So",
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
"userId": "11809607246124237079"
},
"user_tz": -660
},
"id": "XB7eVlroMN1r",
"outputId": "a5b78ebf-0e36-40bb-f5f3-1a96296eceb9"
},
"outputs": [],
"source": [
"df['Postcode'].nunique()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "nexqBH1TKHj3"
},
"outputs": [],
"source": [
"postcode_url = '../Dataset/taxstats2016individual06taxablestatusstateterritorypostcodetaxableincome%20(2).xlsx'"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "4TzHD65-KHnc"
},
"outputs": [],
"source": [
"postcode_df = pd.read_excel(postcode_url, sheet_name='Individuals Table 6B', header=2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"colab_type": "code",
"executionInfo": {
"elapsed": 23926,
"status": "ok",
"timestamp": 1574650531806,
"user": {
"displayName": "Anthony So",
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
"userId": "11809607246124237079"
},
"user_tz": -660
},
"id": "Pf9YIlPqL7B9",
"outputId": "35ecf9e9-3ba0-41ab-e8e5-95e71c35d80b"
},
"outputs": [],
"source": [
"postcode_df.shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 389
},
"colab_type": "code",
"executionInfo": {
"elapsed": 23920,
"status": "ok",
"timestamp": 1574650531807,
"user": {
"displayName": "Anthony So",
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
"userId": "11809607246124237079"
},
"user_tz": -660
},
"id": "fNx5ZFmuKHqN",
"outputId": "f25cd749-5f9a-4e7d-d804-677259b0cda8"
},
"outputs": [],
"source": [
"postcode_df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"colab_type": "code",
"executionInfo": {
"elapsed": 23915,
"status": "ok",
"timestamp": 1574650531808,
"user": {
"displayName": "Anthony So",
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
"userId": "11809607246124237079"
},
"user_tz": -660
},
"id": "_RKsEeTAL_Nb",
"outputId": "7575e99f-053e-4d55-a853-7ed3bceaeb41"
},
"outputs": [],
"source": [
"postcode_df['Postcode'].nunique()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "LIIeGKlkMbzL"
},
"outputs": [],
"source": [
"merged_df = pd.merge(df, postcode_df, how='left', on='Postcode')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"colab_type": "code",
"executionInfo": {
"elapsed": 23909,
"status": "ok",
"timestamp": 1574650531809,
"user": {
"displayName": "Anthony So",
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
"userId": "11809607246124237079"
},
"user_tz": -660
},
"id": "QVlzvk1NMk1M",
"outputId": "a0b2e405-e6e4-44fa-f21e-eaf945f78542"
},
"outputs": [],
"source": [
"merged_df.shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"colab_type": "code",
"executionInfo": {
"elapsed": 23904,
"status": "ok",
"timestamp": 1574650531810,
"user": {
"displayName": "Anthony So",
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
"userId": "11809607246124237079"
},
"user_tz": -660
},
"id": "fGlQGlclMuya",
"outputId": "95875cba-9fca-4165-f21e-6e4743a8c2ec"
},
"outputs": [],
"source": [
"merged_df['State/ Territory1'].isna().sum()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 102
},
"colab_type": "code",
"executionInfo": {
"elapsed": 23900,
"status": "ok",
"timestamp": 1574650531811,
"user": {
"displayName": "Anthony So",
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
"userId": "11809607246124237079"
},
"user_tz": -660
},
"id": "TF4kZMZ9qMms",
"outputId": "48a97b2a-4151-42b0-e686-3a0f60cf2d24"
},
"outputs": [],
"source": [
"merged_df.loc[merged_df['State/ Territory1'].isna(), 'Postcode']"
]
}
],
"metadata": {
"colab": {
"collapsed_sections": [],
"name": "Exercise12_01.ipynb",
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.6"
}
},
"nbformat": 4,
"nbformat_minor": 1
}