{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "HEiOAwQPW0qb" }, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "j7whidfaYjns" }, "outputs": [], "source": [ "file_url = '../dataset/ames_iowa_housing.csv'" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "nFMz2jNVt-xy" }, "outputs": [], "source": [ "df = pd.read_csv(file_url)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 221 }, "colab_type": "code", "id": "VFZSIuUjddvZ", "outputId": "fc2622b9-88ca-4495-bb23-32c5fe6f43bc" }, "outputs": [], "source": [ "df.dtypes" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "mWw1dgZTgh5r" }, "outputs": [], "source": [ "df['Id'] = df['Id'].astype('category')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "L9012mdDoiID" }, "outputs": [], "source": [ "df['MSSubClass'] = df['MSSubClass'].astype('category')\n", "df['OverallQual'] = df['OverallQual'].astype('category')\n", "df['OverallCond'] = df['OverallCond'].astype('category')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 204 }, "colab_type": "code", "id": "uiLlKYp1p1Lp", "outputId": "b5c505a5-47ba-429b-fb39-a5a8df4636c1" }, "outputs": [], "source": [ "for col_name in ['Id', 'MSSubClass', 'OverallQual', 'OverallCond']:\n", " print(col_name)\n", " print(df[col_name].cat.categories)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "eLSewbSud4vl" }, "outputs": [], "source": [ "obj_df = df.select_dtypes(include='object')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 187 }, "colab_type": "code", "id": "U3FmCwWrqu0-", "outputId": "29157bcd-57d3-41b8-875d-4126aa21bd88" }, "outputs": [], "source": [ "obj_cols = obj_df.columns\n", "obj_cols" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 1000 }, "colab_type": "code", "id": "Dc7pw1ZufBdL", "outputId": "ac425683-1c23-4551-b2ae-6f7960eae694" }, "outputs": [], "source": [ "for col_name in obj_cols:\n", " print(col_name)\n", " print(df[col_name].unique())" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "6OgiuQVwfQ-l" }, "outputs": [], "source": [ "for col_name in obj_cols:\n", " df[col_name] = df[col_name].astype('category')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 221 }, "colab_type": "code", "id": "0z64JDTcfaXv", "outputId": "643b14fd-a3bb-4322-e3eb-1fc9526f27a0" }, "outputs": [], "source": [ "df.dtypes" ] } ], "metadata": { "colab": { "collapsed_sections": [], "name": "Exercise11.02.ipynb", "provenance": [] }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.6" } }, "nbformat": 4, "nbformat_minor": 1 }