{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "tH3lvFAXfjGz" }, "outputs": [], "source": [ "# import libraries \n", "\n", "import pandas as pd \n", "\n", "from sklearn.model_selection import train_test_split \n", "\n", "from sklearn.linear_model import LinearRegression " ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "O0ob0P0RftvC" }, "outputs": [], "source": [ "# column headers \n", "\n", "_headers = ['CIC0', 'SM1', 'GATS1i', 'NdsCH', 'Ndssc', 'MLOGP', 'response'] \n", "\n", "# read in data \n", "\n", "df = pd.read_csv('../Dataset/qsar_fish_toxicity.csv', names=_headers, sep=';') " ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 194 }, "colab_type": "code", "id": "RbFPJeOKfvFx", "outputId": "20b9e550-bcb8-49c7-a589-f08b9cd48754" }, "outputs": [], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "wS62XTnDf1VC" }, "outputs": [], "source": [ "# Let's split our data \n", "\n", "features = df.drop('response', axis=1).values \n", "\n", "labels = df[['response']].values \n", "\n", " \n", "\n", "X_train, X_eval, y_train, y_eval = train_test_split(features, labels, test_size=0.2, random_state=0) \n", "\n", "X_val, X_test, y_val, y_test = train_test_split(X_eval, y_eval, random_state=0) " ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "h2BBicmsf5Gi" }, "outputs": [], "source": [ "model = LinearRegression() " ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 35 }, "colab_type": "code", "id": "Sbmn6Wfif6sa", "outputId": "819c811a-ec55-46a7-a4b2-a6c20facaecc" }, "outputs": [], "source": [ "model.fit(X_train, y_train) " ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "j52nxGrLf_Y6" }, "outputs": [], "source": [ "y_pred = model.predict(X_val) " ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 35 }, "colab_type": "code", "id": "dboYPiDhgAzS", "outputId": "354432d9-670c-4d5e-96ba-62b79733f930" }, "outputs": [], "source": [ "r2 = model.score(X_val, y_val) \n", "\n", "print('R^2 score: {}'.format(r2)) " ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 194 }, "colab_type": "code", "id": "KWhxdfXmgZPk", "outputId": "f0602638-2383-4df3-8286-9225ca272cb0" }, "outputs": [], "source": [ "_ys = pd.DataFrame(dict(actuals=y_val.reshape(-1), predicted=y_pred.reshape(-1))) \n", "\n", "_ys.head() " ] } ], "metadata": { "colab": { "name": "Exercise6_02.ipynb", "provenance": [] }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.6" } }, "nbformat": 4, "nbformat_minor": 1 }