{ "cells": [ { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "2fCtd4kCt__z" }, "source": [ "# Lasso Regression" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "jG6YpAbot__3" }, "outputs": [], "source": [ "import pandas as pd\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.linear_model import LinearRegression, Lasso\n", "from sklearn.metrics import mean_squared_error\n", "from sklearn.pipeline import Pipeline\n", "from sklearn.preprocessing import MinMaxScaler, PolynomialFeatures" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "sGKCHZb0t__8" }, "outputs": [], "source": [ "_df = pd.read_csv('../Dataset/ccpp.csv')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 230 }, "colab_type": "code", "id": "MJzrSgbzuAAB", "outputId": "efbd59f2-3b49-44f6-b186-e1d275f13cc6" }, "outputs": [], "source": [ "_df.info()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "WVkCs_VpuAAH" }, "outputs": [], "source": [ "X = _df.drop(['PE'], axis=1).values" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "J4PHvpCzuAAM" }, "outputs": [], "source": [ "y = _df['PE'].values" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "tFOurIZ-uAAW" }, "outputs": [], "source": [ "train_X, eval_X, train_y, eval_y = train_test_split(X, y, train_size=0.8, random_state=0)" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "qChyCx_QuAAu" }, "source": [ "# Implement a LinearRegression model" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "mp6CUsPDuAA0" }, "outputs": [], "source": [ "lr_model_1 = LinearRegression()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 35 }, "colab_type": "code", "id": "f9w8TJiXuABY", "outputId": "f38d02e0-36e7-4aa5-dc4a-fb3adf505ee0" }, "outputs": [], "source": [ "lr_model_1.fit(train_X, train_y)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "MEGafhCSuABo" }, "outputs": [], "source": [ "lr_model_1_preds = lr_model_1.predict(eval_X)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 35 }, "colab_type": "code", "id": "WV0MFrBzuAB5", "outputId": "58a1231f-3e1c-452c-e799-307254d3e149" }, "outputs": [], "source": [ "print('lr_model_1 R2 Score: {}'.format(lr_model_1.score(eval_X, eval_y)))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 35 }, "colab_type": "code", "id": "oKUggTVmuACA", "outputId": "23656741-ee91-4b1c-a3f8-cfb93650bc34" }, "outputs": [], "source": [ "print('lr_model_1 MSE: {}'.format(mean_squared_error(eval_y, lr_model_1_preds)))" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "jqtZ38VhuACE" }, "source": [ "# Engineer cubic features" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "jc3PXN3buACG" }, "outputs": [], "source": [ "steps = [\n", " ('scaler', MinMaxScaler()),\n", " ('poly', PolynomialFeatures(degree=3)),\n", " ('lr', LinearRegression())\n", "]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "SNCYPewxuACJ" }, "outputs": [], "source": [ "lr_model_2 = Pipeline(steps)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 177 }, "colab_type": "code", "id": "0vr1LXSsuACM", "outputId": "27888045-5e27-4291-bc87-b96d761a4c20" }, "outputs": [], "source": [ "lr_model_2.fit(train_X, train_y)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 35 }, "colab_type": "code", "id": "0T__GpZEuACP", "outputId": "cbddcba9-37cd-4115-8675-2ce482e625ae" }, "outputs": [], "source": [ "print('lr_model_2 R2 Score: {}'.format(lr_model_2.score(eval_X, eval_y)))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "hkcTjTXQuACT" }, "outputs": [], "source": [ "lr_model_2_preds = lr_model_2.predict(eval_X)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 35 }, "colab_type": "code", "id": "ijQLoniluACX", "outputId": "7616aaf8-c9d3-4497-8810-ea16985493d8" }, "outputs": [], "source": [ "print('lr_model_2 MSE: {}'.format(mean_squared_error(eval_y, lr_model_2_preds)))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 177 }, "colab_type": "code", "id": "2oCuH-VOuACa", "outputId": "948c8f4a-0909-4e74-f60f-40050f071014" }, "outputs": [], "source": [ "print(lr_model_2[-1].coef_)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 35 }, "colab_type": "code", "id": "4pBIh88SuACf", "outputId": "42d39731-1dbc-4385-8c3c-a83cb935e369" }, "outputs": [], "source": [ "print(len(lr_model_2[-1].coef_))" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "sRnkygHkuACn" }, "source": [ "# Engineer polynomial features" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "Fgc0hLuiuACn" }, "outputs": [], "source": [ "steps = [\n", " ('scaler', MinMaxScaler()),\n", " ('poly', PolynomialFeatures(degree=10)),\n", " ('lr', LinearRegression())\n", "]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "ENi3637QuACr" }, "outputs": [], "source": [ "lr_model_3 = Pipeline(steps)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 0 }, "colab_type": "code", "id": "6Fa3hJ4duACx", "outputId": "9faa5eb7-1ed0-40b0-b99d-c0ba085b4d5f" }, "outputs": [], "source": [ "lr_model_3.fit(train_X, train_y)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 0 }, "colab_type": "code", "id": "dvVQIDtNuAC1", "outputId": "3448cfd7-6ad9-4b58-ccb8-3cfb0e04aea7" }, "outputs": [], "source": [ "print('lr_model_3 R2 Score: {}'.format(lr_model_3.score(eval_X, eval_y)))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "g_J5KhfCuAC7" }, "outputs": [], "source": [ "lr_model_3_preds = lr_model_3.predict(eval_X)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 0 }, "colab_type": "code", "id": "phBeRc0FuADC", "outputId": "4de4e58b-e326-4639-d0c0-08a40456ee04" }, "outputs": [], "source": [ "print('lr_model_3 MSE: {}'.format(mean_squared_error(eval_y, lr_model_3_preds)))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 0 }, "colab_type": "code", "id": "Bx8QmaJduADL", "outputId": "1832ef7c-f3a3-42f7-aba9-d4a4d640135b" }, "outputs": [], "source": [ "print(len(lr_model_3[-1].coef_))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 0 }, "colab_type": "code", "id": "omV4-ydJuADT", "outputId": "4ffe3e60-4949-4a96-8ad8-042bb5551ea0" }, "outputs": [], "source": [ "print(lr_model_3[-1].coef_[:35])" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "R3KTPZd5uADY" }, "source": [ "# Implement Lasso on the same pipeline" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "wvtRmkOLuADc" }, "outputs": [], "source": [ "steps = [\n", " ('scaler', MinMaxScaler()),\n", " ('poly', PolynomialFeatures(degree=10)),\n", " ('lr', Lasso(alpha=0.01))\n", "]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "VKkWpShFuADi" }, "outputs": [], "source": [ "lasso_model = Pipeline(steps)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 212 }, "colab_type": "code", "id": "CFUKWqGSuAD9", "outputId": "304b16fe-a245-4278-8219-4d0f466e21ea" }, "outputs": [], "source": [ "lasso_model.fit(train_X, train_y)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 35 }, "colab_type": "code", "id": "_bOVRGGeuAEh", "outputId": "6ff11eaa-45ec-4dab-b807-dbdc27d57093" }, "outputs": [], "source": [ "print('lasso_model R2 Score: {}'.format(lasso_model.score(eval_X, eval_y)))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "B0sCTyzxuAE1" }, "outputs": [], "source": [ "lasso_preds = lasso_model.predict(eval_X)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 35 }, "colab_type": "code", "id": "terkLrI6uAE7", "outputId": "5804a278-f495-4a16-a703-2621378d370d" }, "outputs": [], "source": [ "print('lasso_model MSE: {}'.format(mean_squared_error(eval_y, lasso_preds)))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 35 }, "colab_type": "code", "id": "2y24Ljr_uAFG", "outputId": "759e6c34-cee9-4f1e-9791-408aad4e4b20" }, "outputs": [], "source": [ "print(len(lasso_model[-1].coef_))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 141 }, "colab_type": "code", "id": "9fPTFs6CuAFV", "outputId": "d15394eb-8ff7-4f36-af46-b61e12f49b97" }, "outputs": [], "source": [ "print(lasso_model[-1].coef_[:35])" ] } ], "metadata": { "colab": { "name": "Exercise7.09.ipynb", "provenance": [] }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.6" } }, "nbformat": 4, "nbformat_minor": 1 }