Files
fenago f3b24b4b7f added
2021-02-07 15:16:01 +05:00

627 lines
12 KiB
Plaintext

{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "UB0JdwYh4TXP"
},
"source": [
"# Ridge Regression"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "C7YFRZD74TXQ"
},
"outputs": [],
"source": [
"import pandas as pd\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.linear_model import LinearRegression, Ridge\n",
"from sklearn.metrics import mean_squared_error\n",
"from sklearn.pipeline import Pipeline\n",
"from sklearn.preprocessing import MinMaxScaler, PolynomialFeatures"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "o_LETRC34TXW"
},
"outputs": [],
"source": [
"_df = pd.read_csv('../Dataset/ccpp.csv')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 230
},
"colab_type": "code",
"id": "AJruh2jv4TXb",
"outputId": "c992073a-0b55-4818-f789-5fbb0b2c66a1"
},
"outputs": [],
"source": [
"_df.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "ncMDnCFH4TXi"
},
"outputs": [],
"source": [
"X = _df.drop(['PE'], axis=1).values\n",
"y = _df['PE'].values"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "MWeCBwia4TXs"
},
"outputs": [],
"source": [
"train_X, eval_X, train_y, eval_y = train_test_split(X, y, train_size=0.8, random_state=0)"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "tM3c9EIN4TXw"
},
"source": [
"# Implement a LinearRegression model"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "4T2HmvOL4TXx"
},
"outputs": [],
"source": [
"lr_model_1 = LinearRegression()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 35
},
"colab_type": "code",
"id": "jtxCFguq4TX0",
"outputId": "6baaa6a0-1ee2-4081-b807-943ef0dacf4a"
},
"outputs": [],
"source": [
"lr_model_1.fit(train_X, train_y)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "DmjxSQFx4TX4"
},
"outputs": [],
"source": [
"lr_model_1_preds = lr_model_1.predict(eval_X)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 35
},
"colab_type": "code",
"id": "2tjzBNFY4TX-",
"outputId": "af7ceef5-e293-4c03-9da9-3ca1d8449793"
},
"outputs": [],
"source": [
"print('lr_model_1 R2 Score: {}'.format(lr_model_1.score(eval_X, eval_y)))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 35
},
"colab_type": "code",
"id": "7s2xf7ZJ4TYG",
"outputId": "f51ebf29-81bf-4950-faf4-d145a83bb85a"
},
"outputs": [],
"source": [
"print('lr_model_1 MSE: {}'.format(mean_squared_error(eval_y, lr_model_1_preds)))"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "KdDWbtcH4TYM"
},
"source": [
"# Engineer cubic features"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "NgKhTb974TYN"
},
"outputs": [],
"source": [
"steps = [\n",
" ('scaler', MinMaxScaler()),\n",
" ('poly', PolynomialFeatures(degree=3)),\n",
" ('lr', LinearRegression())\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "qXAuIVa_4TYS"
},
"outputs": [],
"source": [
"lr_model_2 = Pipeline(steps)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 177
},
"colab_type": "code",
"id": "a8es2V-O4TYX",
"outputId": "8be3b435-f59d-405c-f63e-2dd78dbb87c2"
},
"outputs": [],
"source": [
"lr_model_2.fit(train_X, train_y)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 35
},
"colab_type": "code",
"id": "ctQ5S3hC4TYb",
"outputId": "5740cb06-5003-4466-f2d7-eb5e9f55b728"
},
"outputs": [],
"source": [
"print('lr_model_2 R2 Score: {}'.format(lr_model_2.score(eval_X, eval_y)))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "SuXPIB-k4TYj"
},
"outputs": [],
"source": [
"lr_model_2_preds = lr_model_2.predict(eval_X)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 35
},
"colab_type": "code",
"id": "ITfCmnHP4TYm",
"outputId": "65ca8ff8-08da-4d7d-c4a1-7a85fe5ddda5"
},
"outputs": [],
"source": [
"print('lr_model_2 MSE: {}'.format(mean_squared_error(eval_y, lr_model_2_preds)))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 177
},
"colab_type": "code",
"id": "hbbvnP8H4TYp",
"outputId": "8f23907b-0bf0-484d-907c-39c88d117645"
},
"outputs": [],
"source": [
"print(lr_model_2[-1].coef_)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 35
},
"colab_type": "code",
"id": "0HLfk7zh4TYu",
"outputId": "86414a27-a260-4d7e-dc99-710d60ebd800"
},
"outputs": [],
"source": [
"print(len(lr_model_2[-1].coef_))"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "IurzrMU74TYz"
},
"source": [
"# Engineer polynomial features"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "UPaMX0Os4TY0"
},
"outputs": [],
"source": [
"steps = [\n",
" ('scaler', MinMaxScaler()),\n",
" ('poly', PolynomialFeatures(degree=10)),\n",
" ('lr', LinearRegression())\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "gNa1_R4F4TY5"
},
"outputs": [],
"source": [
"lr_model_3 = Pipeline(steps)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 177
},
"colab_type": "code",
"id": "PEvqJh5V4TY9",
"outputId": "08bd757d-1293-4d03-ee73-791a7ac114b9"
},
"outputs": [],
"source": [
"lr_model_3.fit(train_X, train_y)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 35
},
"colab_type": "code",
"id": "Ux-fhkRp4TZE",
"outputId": "744336a7-c29c-48b2-9ca8-a77b7a2644e5"
},
"outputs": [],
"source": [
"print('lr_model_3 R2 Score: {}'.format(lr_model_3.score(eval_X, eval_y)))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "6TCmu8ON4TZH"
},
"outputs": [],
"source": [
"lr_model_3_preds = lr_model_3.predict(eval_X)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 35
},
"colab_type": "code",
"id": "Z2AVT7xh4TZK",
"outputId": "0a0b22cc-a49f-4b3a-a4a0-ee48a2891ca7"
},
"outputs": [],
"source": [
"print('lr_model_3 MSE: {}'.format(mean_squared_error(eval_y, lr_model_3_preds)))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 35
},
"colab_type": "code",
"id": "nioc06jt4TZO",
"outputId": "525ac3aa-b84c-4fe6-f6f9-6c2bb325cd0e"
},
"outputs": [],
"source": [
"print(len(lr_model_3[-1].coef_))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 177
},
"colab_type": "code",
"id": "QkgaxPCR4TZT",
"outputId": "37d463b6-187b-490c-b65f-e1f180d7acdd"
},
"outputs": [],
"source": [
"print(lr_model_3[-1].coef_[:35])"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "mW6S5WTh4TZY"
},
"source": [
"# Implement Ridge on the same pipeline"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "__LdiSB84TZY"
},
"outputs": [],
"source": [
"steps = [\n",
" ('scaler', MinMaxScaler()),\n",
" ('poly', PolynomialFeatures(degree=10)),\n",
" ('lr', Ridge(alpha=0.9))\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "p0a-4cxH4TZd"
},
"outputs": [],
"source": [
"ridge_model = Pipeline(steps)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 195
},
"colab_type": "code",
"id": "ENQhe2T84TZi",
"outputId": "45d16816-b8fd-4d90-b7d1-53990669b268"
},
"outputs": [],
"source": [
"ridge_model.fit(train_X, train_y)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 35
},
"colab_type": "code",
"id": "8mIo0z5-4TZl",
"outputId": "1800a63e-8a8d-4393-bc2f-0b5431d4c8e1"
},
"outputs": [],
"source": [
"print('ridge_model R2 Score: {}'.format(ridge_model.score(eval_X, eval_y)))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "bRz7syxm4TZr"
},
"outputs": [],
"source": [
"ridge_model_preds = ridge_model.predict(eval_X)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 35
},
"colab_type": "code",
"id": "Ho4NgDe_4TZu",
"outputId": "d3c54a15-ff04-4db6-add5-be5c354ec894"
},
"outputs": [],
"source": [
"print('ridge_model MSE: {}'.format(mean_squared_error(eval_y, ridge_model_preds)))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 35
},
"colab_type": "code",
"id": "HosEKfHd4TZy",
"outputId": "821e57b2-929f-4284-85bc-cd4b911aa0b2"
},
"outputs": [],
"source": [
"print(len(ridge_model[-1].coef_))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 141
},
"colab_type": "code",
"id": "52c0ZM1a4TZ2",
"outputId": "2c9d310b-6dab-4357-fda1-f388fa4f6881"
},
"outputs": [],
"source": [
"print(ridge_model[-1].coef_[:35])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "ier6eRNz4TZ5"
},
"outputs": [],
"source": []
}
],
"metadata": {
"colab": {
"name": "Exercise7.10.ipynb",
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.6"
}
},
"nbformat": 4,
"nbformat_minor": 1
}