mlessentials/Lab07/Exercise7.03/Exercise7_03.ipynb

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "1Jea7edSZEQt"
   },
   "source": [
    "# Cross Validation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "LlEvES3LZEQu"
   },
   "outputs": [],
   "source": [
    "# import libraries\n",
    "import pandas as pd\n",
    "from sklearn.model_selection import train_test_split"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 266
    },
    "colab_type": "code",
    "id": "mKjSA1nyZEQ1",
    "outputId": "ceea8307-5740-4402-ac68-ae4982ea0897"
   },
   "outputs": [],
   "source": [
    "# data doesn't have headers, so let's create headers\n",
    "_headers = ['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety', 'car']\n",
    "# read in cars dataset\n",
    "df = pd.read_csv('../Dataset/car.data', names=_headers, index_col=None)\n",
    "df.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "Q7bSbwgLZEQ6"
   },
   "outputs": [],
   "source": [
    "#split the data into 80% for training and 20% for evaluation\n",
    "training_df, eval_df = train_test_split(df, train_size=0.8, random_state=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 194
    },
    "colab_type": "code",
    "id": "15q798w9ZERA",
    "outputId": "87ba948e-8696-4bab-d16d-c77830d51595"
   },
   "outputs": [],
   "source": [
    "training_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 194
    },
    "colab_type": "code",
    "id": "1m6MMB5qZERF",
    "outputId": "5bd155a7-a1d2-4508-c060-3236542c6d4e"
   },
   "outputs": [],
   "source": [
    "eval_df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "JNDU3YyqZERK"
   },
   "source": [
    "## KFold"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "kcfasL9TZERL"
   },
   "outputs": [],
   "source": [
    "from sklearn.model_selection import KFold"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "aZ0kHogUZERO"
   },
   "outputs": [],
   "source": [
    "_kf = KFold(n_splits=5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "xRNYRhVaZERU"
   },
   "outputs": [],
   "source": [
    "indices = _kf.split(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 35
    },
    "colab_type": "code",
    "id": "FWwkr65qZERX",
    "outputId": "6a3b8cde-0f08-43c7-d2e3-87ec31cea395"
   },
   "outputs": [],
   "source": [
    "print(type(indices))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "QWCNPEzSZERa"
   },
   "outputs": [],
   "source": [
    "#first set\n",
    "train_indices, val_indices = next(indices)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "O_IOUW9aZERd"
   },
   "outputs": [],
   "source": [
    "train_df = df.drop(val_indices)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 266
    },
    "colab_type": "code",
    "id": "0wNV-QYwZERg",
    "outputId": "a482777b-4b99-4bba-9211-cb9093953944"
   },
   "outputs": [],
   "source": [
    "train_df.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "LDQVoPAHZERl"
   },
   "outputs": [],
   "source": [
    "val_df = df.drop(train_indices)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 266
    },
    "colab_type": "code",
    "id": "X8UvqR28ZERo",
    "outputId": "c6d9e3c6-3936-49d8-bc67-a7d5aeab44f6"
   },
   "outputs": [],
   "source": [
    "val_df.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "Jn14g6h0ZERr"
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "colab": {
   "name": "Exercise7.03.ipynb",
   "provenance": []
  },
  "file_extension": ".py",
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.6"
  },
  "mimetype": "text/x-python",
  "name": "python",
  "npconvert_exporter": "python",
  "pygments_lexer": "ipython3",
  "version": 3
 },
 "nbformat": 4,
 "nbformat_minor": 1
}