{ "cells": [ { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "UB0JdwYh4TXP" }, "source": [ "# Ridge Regression" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "C7YFRZD74TXQ" }, "outputs": [], "source": [ "import pandas as pd\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.linear_model import LinearRegression, Ridge\n", "from sklearn.metrics import mean_squared_error\n", "from sklearn.pipeline import Pipeline\n", "from sklearn.preprocessing import MinMaxScaler, PolynomialFeatures" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "o_LETRC34TXW" }, "outputs": [], "source": [ "_df = pd.read_csv('../Dataset/ccpp.csv')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 230 }, "colab_type": "code", "id": "AJruh2jv4TXb", "outputId": "c992073a-0b55-4818-f789-5fbb0b2c66a1" }, "outputs": [], "source": [ "_df.info()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "ncMDnCFH4TXi" }, "outputs": [], "source": [ "X = _df.drop(['PE'], axis=1).values\n", "y = _df['PE'].values" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "MWeCBwia4TXs" }, "outputs": [], "source": [ "train_X, eval_X, train_y, eval_y = train_test_split(X, y, train_size=0.8, random_state=0)" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "tM3c9EIN4TXw" }, "source": [ "# Implement a LinearRegression model" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "4T2HmvOL4TXx" }, "outputs": [], "source": [ "lr_model_1 = LinearRegression()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 35 }, "colab_type": "code", "id": "jtxCFguq4TX0", "outputId": "6baaa6a0-1ee2-4081-b807-943ef0dacf4a" }, "outputs": [], "source": [ "lr_model_1.fit(train_X, train_y)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "DmjxSQFx4TX4" }, "outputs": [], "source": [ "lr_model_1_preds = lr_model_1.predict(eval_X)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 35 }, "colab_type": "code", "id": "2tjzBNFY4TX-", "outputId": "af7ceef5-e293-4c03-9da9-3ca1d8449793" }, "outputs": [], "source": [ "print('lr_model_1 R2 Score: {}'.format(lr_model_1.score(eval_X, eval_y)))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 35 }, "colab_type": "code", "id": "7s2xf7ZJ4TYG", "outputId": "f51ebf29-81bf-4950-faf4-d145a83bb85a" }, "outputs": [], "source": [ "print('lr_model_1 MSE: {}'.format(mean_squared_error(eval_y, lr_model_1_preds)))" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "KdDWbtcH4TYM" }, "source": [ "# Engineer cubic features" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "NgKhTb974TYN" }, "outputs": [], "source": [ "steps = [\n", " ('scaler', MinMaxScaler()),\n", " ('poly', PolynomialFeatures(degree=3)),\n", " ('lr', LinearRegression())\n", "]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "qXAuIVa_4TYS" }, "outputs": [], "source": [ "lr_model_2 = Pipeline(steps)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 177 }, "colab_type": "code", "id": "a8es2V-O4TYX", "outputId": "8be3b435-f59d-405c-f63e-2dd78dbb87c2" }, "outputs": [], "source": [ "lr_model_2.fit(train_X, train_y)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 35 }, "colab_type": "code", "id": "ctQ5S3hC4TYb", "outputId": "5740cb06-5003-4466-f2d7-eb5e9f55b728" }, "outputs": [], "source": [ "print('lr_model_2 R2 Score: {}'.format(lr_model_2.score(eval_X, eval_y)))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "SuXPIB-k4TYj" }, "outputs": [], "source": [ "lr_model_2_preds = lr_model_2.predict(eval_X)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 35 }, "colab_type": "code", "id": "ITfCmnHP4TYm", "outputId": "65ca8ff8-08da-4d7d-c4a1-7a85fe5ddda5" }, "outputs": [], "source": [ "print('lr_model_2 MSE: {}'.format(mean_squared_error(eval_y, lr_model_2_preds)))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 177 }, "colab_type": "code", "id": "hbbvnP8H4TYp", "outputId": "8f23907b-0bf0-484d-907c-39c88d117645" }, "outputs": [], "source": [ "print(lr_model_2[-1].coef_)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 35 }, "colab_type": "code", "id": "0HLfk7zh4TYu", "outputId": "86414a27-a260-4d7e-dc99-710d60ebd800" }, "outputs": [], "source": [ "print(len(lr_model_2[-1].coef_))" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "IurzrMU74TYz" }, "source": [ "# Engineer polynomial features" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "UPaMX0Os4TY0" }, "outputs": [], "source": [ "steps = [\n", " ('scaler', MinMaxScaler()),\n", " ('poly', PolynomialFeatures(degree=10)),\n", " ('lr', LinearRegression())\n", "]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "gNa1_R4F4TY5" }, "outputs": [], "source": [ "lr_model_3 = Pipeline(steps)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 177 }, "colab_type": "code", "id": "PEvqJh5V4TY9", "outputId": "08bd757d-1293-4d03-ee73-791a7ac114b9" }, "outputs": [], "source": [ "lr_model_3.fit(train_X, train_y)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 35 }, "colab_type": "code", "id": "Ux-fhkRp4TZE", "outputId": "744336a7-c29c-48b2-9ca8-a77b7a2644e5" }, "outputs": [], "source": [ "print('lr_model_3 R2 Score: {}'.format(lr_model_3.score(eval_X, eval_y)))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "6TCmu8ON4TZH" }, "outputs": [], "source": [ "lr_model_3_preds = lr_model_3.predict(eval_X)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 35 }, "colab_type": "code", "id": "Z2AVT7xh4TZK", "outputId": "0a0b22cc-a49f-4b3a-a4a0-ee48a2891ca7" }, "outputs": [], "source": [ "print('lr_model_3 MSE: {}'.format(mean_squared_error(eval_y, lr_model_3_preds)))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 35 }, "colab_type": "code", "id": "nioc06jt4TZO", "outputId": "525ac3aa-b84c-4fe6-f6f9-6c2bb325cd0e" }, "outputs": [], "source": [ "print(len(lr_model_3[-1].coef_))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 177 }, "colab_type": "code", "id": "QkgaxPCR4TZT", "outputId": "37d463b6-187b-490c-b65f-e1f180d7acdd" }, "outputs": [], "source": [ "print(lr_model_3[-1].coef_[:35])" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "mW6S5WTh4TZY" }, "source": [ "# Implement Ridge on the same pipeline" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "__LdiSB84TZY" }, "outputs": [], "source": [ "steps = [\n", " ('scaler', MinMaxScaler()),\n", " ('poly', PolynomialFeatures(degree=10)),\n", " ('lr', Ridge(alpha=0.9))\n", "]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "p0a-4cxH4TZd" }, "outputs": [], "source": [ "ridge_model = Pipeline(steps)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 195 }, "colab_type": "code", "id": "ENQhe2T84TZi", "outputId": "45d16816-b8fd-4d90-b7d1-53990669b268" }, "outputs": [], "source": [ "ridge_model.fit(train_X, train_y)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 35 }, "colab_type": "code", "id": "8mIo0z5-4TZl", "outputId": "1800a63e-8a8d-4393-bc2f-0b5431d4c8e1" }, "outputs": [], "source": [ "print('ridge_model R2 Score: {}'.format(ridge_model.score(eval_X, eval_y)))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "bRz7syxm4TZr" }, "outputs": [], "source": [ "ridge_model_preds = ridge_model.predict(eval_X)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 35 }, "colab_type": "code", "id": "Ho4NgDe_4TZu", "outputId": "d3c54a15-ff04-4db6-add5-be5c354ec894" }, "outputs": [], "source": [ "print('ridge_model MSE: {}'.format(mean_squared_error(eval_y, ridge_model_preds)))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 35 }, "colab_type": "code", "id": "HosEKfHd4TZy", "outputId": "821e57b2-929f-4284-85bc-cd4b911aa0b2" }, "outputs": [], "source": [ "print(len(ridge_model[-1].coef_))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 141 }, "colab_type": "code", "id": "52c0ZM1a4TZ2", "outputId": "2c9d310b-6dab-4357-fda1-f388fa4f6881" }, "outputs": [], "source": [ "print(ridge_model[-1].coef_[:35])" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "ier6eRNz4TZ5" }, "outputs": [], "source": [] } ], "metadata": { "colab": { "name": "Exercise7.10.ipynb", "provenance": [] }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.6" } }, "nbformat": 4, "nbformat_minor": 1 }