mirror of
https://github.com/fenago/data-science.git
synced 2026-05-04 00:22:32 +00:00
349 lines
7.3 KiB
Plaintext
349 lines
7.3 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "uGkVk0AaLY6l"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"import pandas as pd\n",
|
|
"from sklearn.model_selection import train_test_split\n",
|
|
"from sklearn.linear_model import LogisticRegression\n",
|
|
"from sklearn.metrics import classification_report\n",
|
|
"from sklearn.tree import DecisionTreeClassifier\n",
|
|
"from sklearn.ensemble import RandomForestClassifier\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "dRMP8E1FLpWI"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df = pd.read_csv('../Dataset/bank-additional-full.csv', sep=';')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 459
|
|
},
|
|
"colab_type": "code",
|
|
"id": "r7XYKYARLqt4",
|
|
"outputId": "e66dec91-0d8e-45db-fa6f-008de8cdb3a8"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df.info()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 224
|
|
},
|
|
"colab_type": "code",
|
|
"id": "MtOHZhE7LuIu",
|
|
"outputId": "892b49b5-9a15-44e3-b0c0-51ccb5760383"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 1000
|
|
},
|
|
"colab_type": "code",
|
|
"id": "kA7IlrPiMOGF",
|
|
"outputId": "aed6bf89-aa0f-417a-da20-ad7b21a15923"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"cat_cols = ['job', 'marital', 'education', 'default', 'housing', 'loan', 'contact', 'month', 'day_of_week', 'poutcome']\n",
|
|
"_df = pd.get_dummies(df, columns=cat_cols, prefix=cat_cols, drop_first=True)\n",
|
|
"_df.info()\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 241
|
|
},
|
|
"colab_type": "code",
|
|
"id": "sEJc8NZeMTZm",
|
|
"outputId": "2f6b481c-ef34-445c-effd-379ecfc21515"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"_df.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "q8WPkabYMVXy"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"X = _df.drop(['y'], axis=1)\n",
|
|
"X = X.values\n",
|
|
"y = df['y'].apply(lambda x: 0 if x == 'no' else 1)\n",
|
|
"y = y.values\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "9PX4ava1MXiu"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"train_X, eval_X, train_y, eval_y = train_test_split(X, y, test_size=0.3, random_state=0)\n",
|
|
"val_X, test_X, val_y, test_y = train_test_split(eval_X, eval_y, random_state=0)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "lqdsD7YzMZ-2"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"lr_model = LogisticRegression()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 258
|
|
},
|
|
"colab_type": "code",
|
|
"id": "-rdzBySdMcH2",
|
|
"outputId": "9c4611af-6eeb-4822-cc08-70da5cc69684"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"lr_model.fit(train_X, train_y)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "YKNwsA8FMdw2"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"lr_preds = lr_model.predict(val_X)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 170
|
|
},
|
|
"colab_type": "code",
|
|
"id": "7L_lyv5uMgkU",
|
|
"outputId": "ef706117-e639-4b05-9c94-e77b06cb72f3"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"lr_report = classification_report(val_y, lr_preds)\n",
|
|
"print(lr_report)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "EusQF-6MMidI"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"dt_model = DecisionTreeClassifier(max_depth= 6)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 119
|
|
},
|
|
"colab_type": "code",
|
|
"id": "HvYPMNDGMkIf",
|
|
"outputId": "96850069-5b13-45b7-f772-80c734f1120b"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"dt_model.fit(train_X, train_y)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "aSBt8RvDMm_m"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"dt_preds = dt_model.predict(val_X)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 170
|
|
},
|
|
"colab_type": "code",
|
|
"id": "9CNLWLklMo5_",
|
|
"outputId": "95017766-faae-4c17-81a0-0398adb19027"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"dt_report = classification_report(val_y, dt_preds)\n",
|
|
"print(dt_report)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 153
|
|
},
|
|
"colab_type": "code",
|
|
"id": "Q60nt2b0Mq0X",
|
|
"outputId": "e6408ad0-722c-4d53-a801-75f985784c9b"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"rf_model = RandomForestClassifier(n_estimators=1000)\n",
|
|
"rf_model.fit(train_X, train_y)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "T37wZC1lMy7-"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"rf_preds = rf_model.predict(val_X)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 170
|
|
},
|
|
"colab_type": "code",
|
|
"id": "rfyAqubjNF5h",
|
|
"outputId": "3454b27f-91be-4157-8e7b-9dfd20a77137"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"rf_report = classification_report(val_y, rf_preds)\n",
|
|
"print(rf_report)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 34
|
|
},
|
|
"colab_type": "code",
|
|
"id": "HkSlaECTNH1X",
|
|
"outputId": "d2a068a4-25b1-4767-b782-df1133d03cba"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"print('Linear Score: {}, DecisionTree Score: {}, RandomForest Score: {}'.format(lr_model.score(val_X, val_y), dt_model.score(val_X, val_y), rf_model.score(val_X, val_y)))"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"colab": {
|
|
"name": "Activity6.01.ipynb",
|
|
"provenance": []
|
|
},
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.8.6"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 1
|
|
}
|