mlessentials/Lab05/Exercise5.01/Exercise5_1.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "zVIPgXoWlEXC"
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "from sklearn.cluster import KMeans"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "EhSFCirNlG5J"
   },
   "outputs": [],
   "source": [
    "file_url = '../DataSet/taxstats2015.csv'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "T9mNYH_WlLIG"
   },
   "outputs": [],
   "source": [
    "df = pd.read_csv(file_url, usecols=['Postcode', 'Average net tax', 'Average total deductions'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 204
    },
    "colab_type": "code",
    "id": "yLUsDBmRlMot",
    "outputId": "d2f4fdc6-1c9f-43f9-c2ff-2b432679927d"
   },
   "outputs": [],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 204
    },
    "colab_type": "code",
    "id": "eNncdXnpuLKL",
    "outputId": "54094385-9d7b-4cbd-b5f8-06bc5c0ed9b1"
   },
   "outputs": [],
   "source": [
    "df.tail()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "n_Sfij90lOVg"
   },
   "outputs": [],
   "source": [
    "kmeans = KMeans(random_state=42)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "w6ZLzCgGlbsG"
   },
   "outputs": [],
   "source": [
    "X = df[['Average net tax', 'Average total deductions']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 68
    },
    "colab_type": "code",
    "id": "VX56G-A0ldDY",
    "outputId": "df0d34c6-532c-41c9-a229-df4ce8ca24f4"
   },
   "outputs": [],
   "source": [
    "kmeans.fit(X)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 34
    },
    "colab_type": "code",
    "id": "aPL8x8zalpvO",
    "outputId": "2eeb587a-daf8-434d-d1aa-f00160adf77f"
   },
   "outputs": [],
   "source": [
    "y_preds = kmeans.predict(X)\n",
    "y_preds"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 204
    },
    "colab_type": "code",
    "id": "GrYs7MjIl1Iu",
    "outputId": "eca54d86-c1ba-45f1-f955-efa5b80c98f7"
   },
   "outputs": [],
   "source": [
    "df['cluster'] = y_preds\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "colab": {
   "collapsed_sections": [],
   "name": "Exercise5_1.ipynb",
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}