mlessentials/Lab07/Exercise7.04/Exercise7_04.ipynb

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "d3tuu-nXbQI_"
   },
   "source": [
    "# 5-Fold Cross Validation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "OZcXS3CMbQJA"
   },
   "outputs": [],
   "source": [
    "# import libraries\n",
    "import pandas as pd\n",
    "from sklearn.model_selection import train_test_split"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 266
    },
    "colab_type": "code",
    "id": "E_pUA2KBbQJJ",
    "outputId": "4e4aaae8-77e3-4527-a456-2185519445a4"
   },
   "outputs": [],
   "source": [
    "# data doesn't have headers, so let's create headers\n",
    "_headers = ['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety', 'car']\n",
    "# read in cars dataset\n",
    "df = pd.read_csv('../Dataset/car.data', names=_headers, index_col=None)\n",
    "df.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "l0vTXFTxbQJR"
   },
   "outputs": [],
   "source": [
    "#split the data into 80% for training and 20% for evaluation\n",
    "training_df, eval_df = train_test_split(df, train_size=0.8, random_state=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 194
    },
    "colab_type": "code",
    "id": "IoLYxyNIbQJX",
    "outputId": "7fb4eed4-cba5-493d-c901-947895b41e8c"
   },
   "outputs": [],
   "source": [
    "training_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 194
    },
    "colab_type": "code",
    "id": "IyoSMML6bQJd",
    "outputId": "02d748bf-8d12-4352-dc57-6152b855b763"
   },
   "outputs": [],
   "source": [
    "eval_df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "B5kBfeZZbQJk"
   },
   "source": [
    "## KFold"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "KlKnqGdebQJl"
   },
   "outputs": [],
   "source": [
    "from sklearn.model_selection import KFold"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "8Q0HLrEjbQJq"
   },
   "outputs": [],
   "source": [
    "n_splits = 5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "XVr1kaHPbQJw"
   },
   "outputs": [],
   "source": [
    "#create an instance of KFold\n",
    "_kf = KFold(n_splits=n_splits)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "3T1UfHi5bQJz"
   },
   "outputs": [],
   "source": [
    "#create splits as _indices\n",
    "_indices = _kf.split(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "Bx52vDyMbQJ5"
   },
   "outputs": [],
   "source": [
    "# create lists to hold training and validation DataFrames\n",
    "_t, _v = [], []"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "WmyBGmL8bQJ8"
   },
   "outputs": [],
   "source": [
    "#iterate over _indices\n",
    "for i in range(n_splits):\n",
    "    train_idx, val_idx = next(_indices)\n",
    "    _train_df = df.drop(val_idx)\n",
    "    _t.append(_train_df)\n",
    "    _val_df = df.drop(train_idx)\n",
    "    _v.append(_val_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 1000
    },
    "colab_type": "code",
    "id": "zdhxalisbQKA",
    "outputId": "0a58a9dd-0e3c-4d8a-9133-e9587c89a3f1"
   },
   "outputs": [],
   "source": [
    "for d in _t:\n",
    "    print(d.info())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 1000
    },
    "colab_type": "code",
    "id": "zlisAq67bQKE",
    "outputId": "65aaac45-3721-46c0-eb88-6fc6f8662d64"
   },
   "outputs": [],
   "source": [
    "for d in _v:\n",
    "    print(d.info())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "iQK3oFtDbQKK"
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "colab": {
   "name": "Exercise7.04.ipynb",
   "provenance": []
  },
  "file_extension": ".py",
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.6"
  },
  "mimetype": "text/x-python",
  "name": "python",
  "npconvert_exporter": "python",
  "pygments_lexer": "ipython3",
  "version": 3
 },
 "nbformat": 4,
 "nbformat_minor": 1
}