{ "cells": [ { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "1Jea7edSZEQt" }, "source": [ "# Cross Validation" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "LlEvES3LZEQu" }, "outputs": [], "source": [ "# import libraries\n", "import pandas as pd\n", "from sklearn.model_selection import train_test_split" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 266 }, "colab_type": "code", "id": "mKjSA1nyZEQ1", "outputId": "ceea8307-5740-4402-ac68-ae4982ea0897" }, "outputs": [], "source": [ "# data doesn't have headers, so let's create headers\n", "_headers = ['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety', 'car']\n", "# read in cars dataset\n", "df = pd.read_csv('../Dataset/car.data', names=_headers, index_col=None)\n", "df.info()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "Q7bSbwgLZEQ6" }, "outputs": [], "source": [ "#split the data into 80% for training and 20% for evaluation\n", "training_df, eval_df = train_test_split(df, train_size=0.8, random_state=1)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 194 }, "colab_type": "code", "id": "15q798w9ZERA", "outputId": "87ba948e-8696-4bab-d16d-c77830d51595" }, "outputs": [], "source": [ "training_df.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 194 }, "colab_type": "code", "id": "1m6MMB5qZERF", "outputId": "5bd155a7-a1d2-4508-c060-3236542c6d4e" }, "outputs": [], "source": [ "eval_df.head()" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "JNDU3YyqZERK" }, "source": [ "## KFold" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "kcfasL9TZERL" }, "outputs": [], "source": [ "from sklearn.model_selection import KFold" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "aZ0kHogUZERO" }, "outputs": [], "source": [ "_kf = KFold(n_splits=5)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "xRNYRhVaZERU" }, "outputs": [], "source": [ "indices = _kf.split(df)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 35 }, "colab_type": "code", "id": "FWwkr65qZERX", "outputId": "6a3b8cde-0f08-43c7-d2e3-87ec31cea395" }, "outputs": [], "source": [ "print(type(indices))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "QWCNPEzSZERa" }, "outputs": [], "source": [ "#first set\n", "train_indices, val_indices = next(indices)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "O_IOUW9aZERd" }, "outputs": [], "source": [ "train_df = df.drop(val_indices)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 266 }, "colab_type": "code", "id": "0wNV-QYwZERg", "outputId": "a482777b-4b99-4bba-9211-cb9093953944" }, "outputs": [], "source": [ "train_df.info()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "LDQVoPAHZERl" }, "outputs": [], "source": [ "val_df = df.drop(train_indices)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 266 }, "colab_type": "code", "id": "X8UvqR28ZERo", "outputId": "c6d9e3c6-3936-49d8-bc67-a7d5aeab44f6" }, "outputs": [], "source": [ "val_df.info()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "Jn14g6h0ZERr" }, "outputs": [], "source": [] } ], "metadata": { "colab": { "name": "Exercise7.03.ipynb", "provenance": [] }, "file_extension": ".py", "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.6" }, "mimetype": "text/x-python", "name": "python", "npconvert_exporter": "python", "pygments_lexer": "ipython3", "version": 3 }, "nbformat": 4, "nbformat_minor": 1 }