Files
fenago f3b24b4b7f added
2021-02-07 15:16:01 +05:00

302 lines
6.5 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "HEiOAwQPW0qb"
},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "j7whidfaYjns"
},
"outputs": [],
"source": [
"file_url = '../Dataset/ames_iowa_housing.csv'"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "nFMz2jNVt-xy"
},
"outputs": [],
"source": [
"df = pd.read_csv(file_url)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "7Xc2ALj24hkv"
},
"outputs": [],
"source": [
"df_agg = df.groupby(['Neighborhood', 'YrSold']).agg({'SalePrice': 'max'}).reset_index()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "vaUbUsfczGun"
},
"outputs": [],
"source": [
"df_agg.columns = ['Neighborhood', 'YrSold', 'SalePriceMax']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 204
},
"colab_type": "code",
"executionInfo": {
"elapsed": 1531,
"status": "ok",
"timestamp": 1574650716391,
"user": {
"displayName": "Anthony So",
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
"userId": "11809607246124237079"
},
"user_tz": -660
},
"id": "j1-zBIDIu8DW",
"outputId": "e7b2482d-4088-41bf-9868-ab1cc0c3153b"
},
"outputs": [],
"source": [
"df_agg.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "i0erbyjDu8Gb"
},
"outputs": [],
"source": [
"df_new = pd.merge(df, df_agg, how='left', on=['Neighborhood', 'YrSold'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 253
},
"colab_type": "code",
"executionInfo": {
"elapsed": 1821,
"status": "ok",
"timestamp": 1574650716691,
"user": {
"displayName": "Anthony So",
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
"userId": "11809607246124237079"
},
"user_tz": -660
},
"id": "quHkbSTz0VZP",
"outputId": "c7b5f273-f59d-4217-d30e-601312884354"
},
"outputs": [],
"source": [
"df_new.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "3SKFS_A20W0R"
},
"outputs": [],
"source": [
"df_new['SalePriceRatio'] = df_new['SalePrice'] / df_new['SalePriceMax']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 253
},
"colab_type": "code",
"executionInfo": {
"elapsed": 1813,
"status": "ok",
"timestamp": 1574650716693,
"user": {
"displayName": "Anthony So",
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
"userId": "11809607246124237079"
},
"user_tz": -660
},
"id": "_jg43hgI0jPm",
"outputId": "b97f2ff5-96e9-49ec-d309-f64cfb4a3f95"
},
"outputs": [],
"source": [
"df_new.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "88LREvol06ef"
},
"outputs": [],
"source": [
"df_agg2 = df.groupby(['Neighborhood', 'YrSold']).agg({'LotArea': 'max'}).reset_index()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 204
},
"colab_type": "code",
"executionInfo": {
"elapsed": 1805,
"status": "ok",
"timestamp": 1574650716694,
"user": {
"displayName": "Anthony So",
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
"userId": "11809607246124237079"
},
"user_tz": -660
},
"id": "g1wU-7Ok1XVg",
"outputId": "e26d4b24-c1fc-445b-f944-c26f095c04dd"
},
"outputs": [],
"source": [
"df_agg2.columns = ['Neighborhood', 'YrSold', 'LotAreaMax']\n",
"df_agg2.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "QzXKCoCy06je"
},
"outputs": [],
"source": [
"df_final = pd.merge(df_new, df_agg2, how='left', on=['Neighborhood', 'YrSold'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "jRm1mz-a06oT"
},
"outputs": [],
"source": [
"df_final['LotAreaRatio'] = df_final['LotArea'] / df_final['LotAreaMax']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 204
},
"colab_type": "code",
"executionInfo": {
"elapsed": 2154,
"status": "ok",
"timestamp": 1574650717053,
"user": {
"displayName": "Anthony So",
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
"userId": "11809607246124237079"
},
"user_tz": -660
},
"id": "Yh4J8OYs1SnX",
"outputId": "12574d36-f1ee-4b25-a7fe-55ca53580bfd"
},
"outputs": [],
"source": [
"df_final[['Id', 'Neighborhood', 'YrSold', 'SalePrice', 'SalePriceMax', 'SalePriceRatio', 'LotArea', 'LotAreaMax', 'LotAreaRatio']].head()"
]
}
],
"metadata": {
"colab": {
"collapsed_sections": [],
"name": "Exercise12_04.ipynb",
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.6"
}
},
"nbformat": 4,
"nbformat_minor": 1
}