{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "pVk2P_EkJph4"
   },
   "source": [
    "**Saving Model**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "GcXAK_ewJMUe"
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.linear_model import LinearRegression\n",
    "import warnings\n",
    "warnings.filterwarnings(\"ignore\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "KeXQXpu5JRMi"
   },
   "outputs": [],
   "source": [
    "_headers = ['CIC0', 'SM1', 'GATS1i', 'NdsCH', 'Ndssc', 'MLOGP', 'response']\n",
    "\n",
    "# read in data\n",
    "df = pd.read_csv('../Dataset/qsar_fish_toxicity.csv', names=_headers, sep=';')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 194
    },
    "colab_type": "code",
    "id": "Wn9WF2P2JTO_",
    "outputId": "67a5e3f4-e2aa-4c38-a0b2-e5712b4aa908"
   },
   "outputs": [],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "ZJMFt-GKJVDk"
   },
   "outputs": [],
   "source": [
    "features = df.drop('response', axis=1).values\n",
    "labels = df[['response']].values\n",
    "\n",
    "X_train, X_eval, y_train, y_eval = train_test_split(features, labels, test_size=0.2, random_state=0)\n",
    "X_val, X_test, y_val, y_test = train_test_split(X_eval, y_eval, random_state=0)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 35
    },
    "colab_type": "code",
    "id": "iE1odfjcJWfo",
    "outputId": "6ac225a0-e57e-469b-f5fc-e2898153cd80"
   },
   "outputs": [],
   "source": [
    "model = LinearRegression()\n",
    "print (model)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 35
    },
    "colab_type": "code",
    "id": "TGN-jqrjJY_Y",
    "outputId": "76b2cea0-9f43-4f83-accc-c980981da455"
   },
   "outputs": [],
   "source": [
    "model.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "_62dyt4IJai0"
   },
   "outputs": [],
   "source": [
    "y_pred = model.predict(X_val)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 72
    },
    "colab_type": "code",
    "id": "qC_LbCBvJcP-",
    "outputId": "6d750dbd-1efc-47f0-bffc-573629190145"
   },
   "outputs": [],
   "source": [
    "from sklearn.externals import joblib"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 35
    },
    "colab_type": "code",
    "id": "5BaUnuCiJdug",
    "outputId": "0d69300b-dcc8-4254-8d1e-bfd2cef0f0c9"
   },
   "outputs": [],
   "source": [
    "joblib.dump(model, './model.joblib')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "dBRNnfVPJfPZ"
   },
   "outputs": [],
   "source": [
    "m2 = joblib.load('./model.joblib')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "huOP4iHjJkPG"
   },
   "outputs": [],
   "source": [
    "m2_preds = m2.predict(X_val)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 194
    },
    "colab_type": "code",
    "id": "imDac40oJl4K",
    "outputId": "353e8c47-2be8-46d0-c5e7-7e8a42c469a5"
   },
   "outputs": [],
   "source": [
    "ys = pd.DataFrame(dict(predicted=y_pred.reshape(-1), m2=m2_preds.reshape(-1)))\n",
    "ys.head()\n"
   ]
  }
 ],
 "metadata": {
  "colab": {
   "name": "Exercise6_14.ipynb",
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}