mirror of
https://github.com/fenago/data-science.git
synced 2026-05-06 01:22:41 +00:00
302 lines
6.5 KiB
Plaintext
302 lines
6.5 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "HEiOAwQPW0qb"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"import pandas as pd"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "j7whidfaYjns"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"file_url = '../Dataset/ames_iowa_housing.csv'"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "nFMz2jNVt-xy"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df = pd.read_csv(file_url)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "7Xc2ALj24hkv"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df_agg = df.groupby(['Neighborhood', 'YrSold']).agg({'SalePrice': 'max'}).reset_index()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "vaUbUsfczGun"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df_agg.columns = ['Neighborhood', 'YrSold', 'SalePriceMax']"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 204
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 1531,
|
|
"status": "ok",
|
|
"timestamp": 1574650716391,
|
|
"user": {
|
|
"displayName": "Anthony So",
|
|
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
|
|
"userId": "11809607246124237079"
|
|
},
|
|
"user_tz": -660
|
|
},
|
|
"id": "j1-zBIDIu8DW",
|
|
"outputId": "e7b2482d-4088-41bf-9868-ab1cc0c3153b"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df_agg.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "i0erbyjDu8Gb"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df_new = pd.merge(df, df_agg, how='left', on=['Neighborhood', 'YrSold'])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 253
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 1821,
|
|
"status": "ok",
|
|
"timestamp": 1574650716691,
|
|
"user": {
|
|
"displayName": "Anthony So",
|
|
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
|
|
"userId": "11809607246124237079"
|
|
},
|
|
"user_tz": -660
|
|
},
|
|
"id": "quHkbSTz0VZP",
|
|
"outputId": "c7b5f273-f59d-4217-d30e-601312884354"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df_new.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "3SKFS_A20W0R"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df_new['SalePriceRatio'] = df_new['SalePrice'] / df_new['SalePriceMax']"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 253
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 1813,
|
|
"status": "ok",
|
|
"timestamp": 1574650716693,
|
|
"user": {
|
|
"displayName": "Anthony So",
|
|
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
|
|
"userId": "11809607246124237079"
|
|
},
|
|
"user_tz": -660
|
|
},
|
|
"id": "_jg43hgI0jPm",
|
|
"outputId": "b97f2ff5-96e9-49ec-d309-f64cfb4a3f95"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df_new.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "88LREvol06ef"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df_agg2 = df.groupby(['Neighborhood', 'YrSold']).agg({'LotArea': 'max'}).reset_index()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 204
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 1805,
|
|
"status": "ok",
|
|
"timestamp": 1574650716694,
|
|
"user": {
|
|
"displayName": "Anthony So",
|
|
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
|
|
"userId": "11809607246124237079"
|
|
},
|
|
"user_tz": -660
|
|
},
|
|
"id": "g1wU-7Ok1XVg",
|
|
"outputId": "e26d4b24-c1fc-445b-f944-c26f095c04dd"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df_agg2.columns = ['Neighborhood', 'YrSold', 'LotAreaMax']\n",
|
|
"df_agg2.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "QzXKCoCy06je"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df_final = pd.merge(df_new, df_agg2, how='left', on=['Neighborhood', 'YrSold'])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "jRm1mz-a06oT"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df_final['LotAreaRatio'] = df_final['LotArea'] / df_final['LotAreaMax']"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 204
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 2154,
|
|
"status": "ok",
|
|
"timestamp": 1574650717053,
|
|
"user": {
|
|
"displayName": "Anthony So",
|
|
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
|
|
"userId": "11809607246124237079"
|
|
},
|
|
"user_tz": -660
|
|
},
|
|
"id": "Yh4J8OYs1SnX",
|
|
"outputId": "12574d36-f1ee-4b25-a7fe-55ca53580bfd"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df_final[['Id', 'Neighborhood', 'YrSold', 'SalePrice', 'SalePriceMax', 'SalePriceRatio', 'LotArea', 'LotAreaMax', 'LotAreaRatio']].head()"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"colab": {
|
|
"collapsed_sections": [],
|
|
"name": "Exercise12_04.ipynb",
|
|
"provenance": []
|
|
},
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.8.6"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 1
|
|
}
|