mirror of
https://github.com/fenago/data-science.git
synced 2026-05-04 00:22:32 +00:00
233 lines
4.7 KiB
Plaintext
233 lines
4.7 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"colab_type": "text",
|
|
"id": "pVk2P_EkJph4"
|
|
},
|
|
"source": [
|
|
"**Saving Model**"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "GcXAK_ewJMUe"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"import pandas as pd\n",
|
|
"from sklearn.model_selection import train_test_split\n",
|
|
"from sklearn.linear_model import LinearRegression\n",
|
|
"import warnings\n",
|
|
"warnings.filterwarnings(\"ignore\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "KeXQXpu5JRMi"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"_headers = ['CIC0', 'SM1', 'GATS1i', 'NdsCH', 'Ndssc', 'MLOGP', 'response']\n",
|
|
"\n",
|
|
"# read in data\n",
|
|
"df = pd.read_csv('../Dataset/qsar_fish_toxicity.csv', names=_headers, sep=';')\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 194
|
|
},
|
|
"colab_type": "code",
|
|
"id": "Wn9WF2P2JTO_",
|
|
"outputId": "67a5e3f4-e2aa-4c38-a0b2-e5712b4aa908"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "ZJMFt-GKJVDk"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"features = df.drop('response', axis=1).values\n",
|
|
"labels = df[['response']].values\n",
|
|
"\n",
|
|
"X_train, X_eval, y_train, y_eval = train_test_split(features, labels, test_size=0.2, random_state=0)\n",
|
|
"X_val, X_test, y_val, y_test = train_test_split(X_eval, y_eval, random_state=0)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 35
|
|
},
|
|
"colab_type": "code",
|
|
"id": "iE1odfjcJWfo",
|
|
"outputId": "6ac225a0-e57e-469b-f5fc-e2898153cd80"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"model = LinearRegression()\n",
|
|
"print (model)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 35
|
|
},
|
|
"colab_type": "code",
|
|
"id": "TGN-jqrjJY_Y",
|
|
"outputId": "76b2cea0-9f43-4f83-accc-c980981da455"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"model.fit(X_train, y_train)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "_62dyt4IJai0"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"y_pred = model.predict(X_val)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 72
|
|
},
|
|
"colab_type": "code",
|
|
"id": "qC_LbCBvJcP-",
|
|
"outputId": "6d750dbd-1efc-47f0-bffc-573629190145"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"from sklearn.externals import joblib"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 35
|
|
},
|
|
"colab_type": "code",
|
|
"id": "5BaUnuCiJdug",
|
|
"outputId": "0d69300b-dcc8-4254-8d1e-bfd2cef0f0c9"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"joblib.dump(model, './model.joblib')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "dBRNnfVPJfPZ"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"m2 = joblib.load('./model.joblib')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "huOP4iHjJkPG"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"m2_preds = m2.predict(X_val)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 194
|
|
},
|
|
"colab_type": "code",
|
|
"id": "imDac40oJl4K",
|
|
"outputId": "353e8c47-2be8-46d0-c5e7-7e8a42c469a5"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"ys = pd.DataFrame(dict(predicted=y_pred.reshape(-1), m2=m2_preds.reshape(-1)))\n",
|
|
"ys.head()\n"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"colab": {
|
|
"name": "Exercise6_14.ipynb",
|
|
"provenance": []
|
|
},
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.8.6"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 1
|
|
}
|