mlessentials/Lab07/Exercise7.02/Exercise7_02.ipynb

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "xHaiiBElWeXW"
   },
   "source": [
    "# Import and Split Data Repeatably"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "jBiMpFDdWeXZ"
   },
   "outputs": [],
   "source": [
    "# import libraries\n",
    "import pandas as pd\n",
    "from sklearn.model_selection import train_test_split"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 266
    },
    "colab_type": "code",
    "id": "4aZXomWIWeXg",
    "outputId": "49a7f885-dc6c-42d4-83a4-6ffdbe4838d0"
   },
   "outputs": [],
   "source": [
    "# data doesn't have headers, so let's create headers\n",
    "_headers = ['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety', 'car']\n",
    "# read in cars dataset\n",
    "df = pd.read_csv('../Dataset/car.data', names=_headers, index_col=None)\n",
    "df.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "SHl6M8xSWeXn"
   },
   "outputs": [],
   "source": [
    "#split the data into 80% for training and 20% for evaluation\n",
    "training_df, eval_df = train_test_split(df, train_size=0.8, random_state=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 194
    },
    "colab_type": "code",
    "id": "OG-GDqaGWeXs",
    "outputId": "7d23c702-53fb-4def-e367-b75b315242c0"
   },
   "outputs": [],
   "source": [
    "training_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 194
    },
    "colab_type": "code",
    "id": "oB9GUb1cWeX6",
    "outputId": "5f045d6c-c066-43a5-e8be-1e4e608aad00"
   },
   "outputs": [],
   "source": [
    "eval_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "16jscRkkWeYK"
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "colab": {
   "name": "Exercise7.02.ipynb",
   "provenance": []
  },
  "file_extension": ".py",
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.6"
  },
  "mimetype": "text/x-python",
  "name": "python",
  "npconvert_exporter": "python",
  "pygments_lexer": "ipython3",
  "version": 3
 },
 "nbformat": 4,
 "nbformat_minor": 1
}