mirror of
https://github.com/fenago/data-science.git
synced 2026-05-08 02:21:04 +00:00
627 lines
12 KiB
Plaintext
627 lines
12 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"colab_type": "text",
|
|
"id": "UB0JdwYh4TXP"
|
|
},
|
|
"source": [
|
|
"# Ridge Regression"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "C7YFRZD74TXQ"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"import pandas as pd\n",
|
|
"from sklearn.model_selection import train_test_split\n",
|
|
"from sklearn.linear_model import LinearRegression, Ridge\n",
|
|
"from sklearn.metrics import mean_squared_error\n",
|
|
"from sklearn.pipeline import Pipeline\n",
|
|
"from sklearn.preprocessing import MinMaxScaler, PolynomialFeatures"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "o_LETRC34TXW"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"_df = pd.read_csv('../Dataset/ccpp.csv')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 230
|
|
},
|
|
"colab_type": "code",
|
|
"id": "AJruh2jv4TXb",
|
|
"outputId": "c992073a-0b55-4818-f789-5fbb0b2c66a1"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"_df.info()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "ncMDnCFH4TXi"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"X = _df.drop(['PE'], axis=1).values\n",
|
|
"y = _df['PE'].values"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "MWeCBwia4TXs"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"train_X, eval_X, train_y, eval_y = train_test_split(X, y, train_size=0.8, random_state=0)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"colab_type": "text",
|
|
"id": "tM3c9EIN4TXw"
|
|
},
|
|
"source": [
|
|
"# Implement a LinearRegression model"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "4T2HmvOL4TXx"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"lr_model_1 = LinearRegression()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 35
|
|
},
|
|
"colab_type": "code",
|
|
"id": "jtxCFguq4TX0",
|
|
"outputId": "6baaa6a0-1ee2-4081-b807-943ef0dacf4a"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"lr_model_1.fit(train_X, train_y)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "DmjxSQFx4TX4"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"lr_model_1_preds = lr_model_1.predict(eval_X)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 35
|
|
},
|
|
"colab_type": "code",
|
|
"id": "2tjzBNFY4TX-",
|
|
"outputId": "af7ceef5-e293-4c03-9da9-3ca1d8449793"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"print('lr_model_1 R2 Score: {}'.format(lr_model_1.score(eval_X, eval_y)))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 35
|
|
},
|
|
"colab_type": "code",
|
|
"id": "7s2xf7ZJ4TYG",
|
|
"outputId": "f51ebf29-81bf-4950-faf4-d145a83bb85a"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"print('lr_model_1 MSE: {}'.format(mean_squared_error(eval_y, lr_model_1_preds)))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"colab_type": "text",
|
|
"id": "KdDWbtcH4TYM"
|
|
},
|
|
"source": [
|
|
"# Engineer cubic features"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "NgKhTb974TYN"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"steps = [\n",
|
|
" ('scaler', MinMaxScaler()),\n",
|
|
" ('poly', PolynomialFeatures(degree=3)),\n",
|
|
" ('lr', LinearRegression())\n",
|
|
"]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "qXAuIVa_4TYS"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"lr_model_2 = Pipeline(steps)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 177
|
|
},
|
|
"colab_type": "code",
|
|
"id": "a8es2V-O4TYX",
|
|
"outputId": "8be3b435-f59d-405c-f63e-2dd78dbb87c2"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"lr_model_2.fit(train_X, train_y)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 35
|
|
},
|
|
"colab_type": "code",
|
|
"id": "ctQ5S3hC4TYb",
|
|
"outputId": "5740cb06-5003-4466-f2d7-eb5e9f55b728"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"print('lr_model_2 R2 Score: {}'.format(lr_model_2.score(eval_X, eval_y)))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "SuXPIB-k4TYj"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"lr_model_2_preds = lr_model_2.predict(eval_X)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 35
|
|
},
|
|
"colab_type": "code",
|
|
"id": "ITfCmnHP4TYm",
|
|
"outputId": "65ca8ff8-08da-4d7d-c4a1-7a85fe5ddda5"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"print('lr_model_2 MSE: {}'.format(mean_squared_error(eval_y, lr_model_2_preds)))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 177
|
|
},
|
|
"colab_type": "code",
|
|
"id": "hbbvnP8H4TYp",
|
|
"outputId": "8f23907b-0bf0-484d-907c-39c88d117645"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"print(lr_model_2[-1].coef_)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 35
|
|
},
|
|
"colab_type": "code",
|
|
"id": "0HLfk7zh4TYu",
|
|
"outputId": "86414a27-a260-4d7e-dc99-710d60ebd800"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"print(len(lr_model_2[-1].coef_))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"colab_type": "text",
|
|
"id": "IurzrMU74TYz"
|
|
},
|
|
"source": [
|
|
"# Engineer polynomial features"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "UPaMX0Os4TY0"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"steps = [\n",
|
|
" ('scaler', MinMaxScaler()),\n",
|
|
" ('poly', PolynomialFeatures(degree=10)),\n",
|
|
" ('lr', LinearRegression())\n",
|
|
"]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "gNa1_R4F4TY5"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"lr_model_3 = Pipeline(steps)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 177
|
|
},
|
|
"colab_type": "code",
|
|
"id": "PEvqJh5V4TY9",
|
|
"outputId": "08bd757d-1293-4d03-ee73-791a7ac114b9"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"lr_model_3.fit(train_X, train_y)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 35
|
|
},
|
|
"colab_type": "code",
|
|
"id": "Ux-fhkRp4TZE",
|
|
"outputId": "744336a7-c29c-48b2-9ca8-a77b7a2644e5"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"print('lr_model_3 R2 Score: {}'.format(lr_model_3.score(eval_X, eval_y)))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "6TCmu8ON4TZH"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"lr_model_3_preds = lr_model_3.predict(eval_X)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 35
|
|
},
|
|
"colab_type": "code",
|
|
"id": "Z2AVT7xh4TZK",
|
|
"outputId": "0a0b22cc-a49f-4b3a-a4a0-ee48a2891ca7"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"print('lr_model_3 MSE: {}'.format(mean_squared_error(eval_y, lr_model_3_preds)))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 35
|
|
},
|
|
"colab_type": "code",
|
|
"id": "nioc06jt4TZO",
|
|
"outputId": "525ac3aa-b84c-4fe6-f6f9-6c2bb325cd0e"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"print(len(lr_model_3[-1].coef_))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 177
|
|
},
|
|
"colab_type": "code",
|
|
"id": "QkgaxPCR4TZT",
|
|
"outputId": "37d463b6-187b-490c-b65f-e1f180d7acdd"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"print(lr_model_3[-1].coef_[:35])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"colab_type": "text",
|
|
"id": "mW6S5WTh4TZY"
|
|
},
|
|
"source": [
|
|
"# Implement Ridge on the same pipeline"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "__LdiSB84TZY"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"steps = [\n",
|
|
" ('scaler', MinMaxScaler()),\n",
|
|
" ('poly', PolynomialFeatures(degree=10)),\n",
|
|
" ('lr', Ridge(alpha=0.9))\n",
|
|
"]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "p0a-4cxH4TZd"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"ridge_model = Pipeline(steps)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 195
|
|
},
|
|
"colab_type": "code",
|
|
"id": "ENQhe2T84TZi",
|
|
"outputId": "45d16816-b8fd-4d90-b7d1-53990669b268"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"ridge_model.fit(train_X, train_y)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 35
|
|
},
|
|
"colab_type": "code",
|
|
"id": "8mIo0z5-4TZl",
|
|
"outputId": "1800a63e-8a8d-4393-bc2f-0b5431d4c8e1"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"print('ridge_model R2 Score: {}'.format(ridge_model.score(eval_X, eval_y)))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "bRz7syxm4TZr"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"ridge_model_preds = ridge_model.predict(eval_X)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 35
|
|
},
|
|
"colab_type": "code",
|
|
"id": "Ho4NgDe_4TZu",
|
|
"outputId": "d3c54a15-ff04-4db6-add5-be5c354ec894"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"print('ridge_model MSE: {}'.format(mean_squared_error(eval_y, ridge_model_preds)))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 35
|
|
},
|
|
"colab_type": "code",
|
|
"id": "HosEKfHd4TZy",
|
|
"outputId": "821e57b2-929f-4284-85bc-cd4b911aa0b2"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"print(len(ridge_model[-1].coef_))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 141
|
|
},
|
|
"colab_type": "code",
|
|
"id": "52c0ZM1a4TZ2",
|
|
"outputId": "2c9d310b-6dab-4357-fda1-f388fa4f6881"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"print(ridge_model[-1].coef_[:35])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "ier6eRNz4TZ5"
|
|
},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"colab": {
|
|
"name": "Exercise7.10.ipynb",
|
|
"provenance": []
|
|
},
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.8.6"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 1
|
|
}
|