Files
fenago f3b24b4b7f added
2021-02-07 15:16:01 +05:00

300 lines
5.8 KiB
Plaintext

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Superconductivity"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"from sklearn.linear_model import LinearRegression, Lasso, Ridge\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.metrics import mean_squared_error\n",
"from sklearn.pipeline import Pipeline\n",
"from sklearn.preprocessing import MinMaxScaler, PolynomialFeatures"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"_df = pd.read_csv('../Dataset/superconduct/train.csv')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"_df.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"X = _df.drop(['critical_temp'], axis=1).values"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"y = _df['critical_temp'].values"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"train_X, eval_X, train_y, eval_y = train_test_split(X, y, test_size=0.8, random_state=0)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"model_1 = LinearRegression()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"model_1.fit(train_X, train_y)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print('Model 1 R2 Score: {}'.format(model_1.score(eval_X, eval_y)))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(model_1.coef_)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"preds_1 = model_1.predict(eval_X)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print('Model 1 MSE: {}'.format(mean_squared_error(eval_y, preds_1)))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"steps = [\n",
" ('scaler', MinMaxScaler()),\n",
" ('poly', PolynomialFeatures(degree=3, interaction_only=True)),\n",
" ('model', LinearRegression())\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"model_2 = Pipeline(steps)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"model_2.fit(train_X, train_y)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print('Model 2 R2 Score: {}'.format(model_2.score(eval_X, eval_y)))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print('Number of coefficients: {}'.format(len(model_2[-1].coef_)))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"steps = [\n",
" ('scaler', MinMaxScaler()),\n",
" ('poly', PolynomialFeatures(degree=3, interaction_only=True)),\n",
" ('model', Lasso(alpha=0.001, max_iter=2000))\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"lasso_model = Pipeline(steps)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"lasso_model.fit(train_X, train_y)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print('Lasso Model R2 Score: {}'.format(lasso_model.score(eval_X, eval_y)))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(lasso_model[-1].coef_[:30])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"steps = [\n",
" ('scaler', MinMaxScaler()),\n",
" ('poly', PolynomialFeatures(degree=3, interaction_only=True)),\n",
" ('model', Ridge(alpha=0.9))\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ridge_model = Pipeline(steps)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ridge_model.fit(train_X, train_y)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print('Ridge Model R2 score: {}'.format(ridge_model.score(eval_X, eval_y)))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(ridge_model[-1].coef_[:30])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}