mirror of
https://github.com/fenago/data-science.git
synced 2026-05-04 08:31:59 +00:00
191 lines
3.6 KiB
Plaintext
191 lines
3.6 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "zVIPgXoWlEXC"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"import pandas as pd\n",
|
|
"from sklearn.cluster import KMeans"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "EhSFCirNlG5J"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"file_url = '../DataSet/taxstats2015.csv'"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "T9mNYH_WlLIG"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df = pd.read_csv(file_url, usecols=['Postcode', 'Average net tax', 'Average total deductions'])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 204
|
|
},
|
|
"colab_type": "code",
|
|
"id": "yLUsDBmRlMot",
|
|
"outputId": "d2f4fdc6-1c9f-43f9-c2ff-2b432679927d"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 204
|
|
},
|
|
"colab_type": "code",
|
|
"id": "eNncdXnpuLKL",
|
|
"outputId": "54094385-9d7b-4cbd-b5f8-06bc5c0ed9b1"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df.tail()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "n_Sfij90lOVg"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"kmeans = KMeans(random_state=42)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "w6ZLzCgGlbsG"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"X = df[['Average net tax', 'Average total deductions']]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 68
|
|
},
|
|
"colab_type": "code",
|
|
"id": "VX56G-A0ldDY",
|
|
"outputId": "df0d34c6-532c-41c9-a229-df4ce8ca24f4"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"kmeans.fit(X)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 34
|
|
},
|
|
"colab_type": "code",
|
|
"id": "aPL8x8zalpvO",
|
|
"outputId": "2eeb587a-daf8-434d-d1aa-f00160adf77f"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"y_preds = kmeans.predict(X)\n",
|
|
"y_preds"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 204
|
|
},
|
|
"colab_type": "code",
|
|
"id": "GrYs7MjIl1Iu",
|
|
"outputId": "eca54d86-c1ba-45f1-f955-efa5b80c98f7"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df['cluster'] = y_preds\n",
|
|
"df.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"colab": {
|
|
"collapsed_sections": [],
|
|
"name": "Exercise5_1.ipynb",
|
|
"provenance": []
|
|
},
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.8.6"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 1
|
|
}
|