From f8d95e8d58582f19c27cba4767d23605e952f01f Mon Sep 17 00:00:00 2001 From: Your Name Date: Sun, 7 Feb 2021 12:27:22 +0000 Subject: [PATCH] added --- Lab06/Old/Exercise6_07/Exercise6_07.ipynb | 367 +-------------- Lab06/Old/Exercise6_08/Exercise6_08.ipynb | 367 +-------------- Lab06/Old/Exercise6_09/Exercise6_09.ipynb | 367 +-------------- Lab06/Old/Exercise6_10/Exercise6_10.ipynb | 355 +------------- Lab06/Old/Exercise6_11/Exercise6_11.ipynb | 191 +------- Lab06/Old/Exercise6_12/Exercise6_12.ipynb | 434 ++++++------------ ...Chapter_13_Unbalanced_Data_sets_v1.0.ipynb | 68 ++- 7 files changed, 257 insertions(+), 1892 deletions(-) diff --git a/Lab06/Old/Exercise6_07/Exercise6_07.ipynb b/Lab06/Old/Exercise6_07/Exercise6_07.ipynb index 102f6e7..3b95a14 100644 --- a/Lab06/Old/Exercise6_07/Exercise6_07.ipynb +++ b/Lab06/Old/Exercise6_07/Exercise6_07.ipynb @@ -12,7 +12,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -28,7 +28,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -38,106 +38,7 @@ "id": "eMkdycnKAC3k", "outputId": "98d8e715-c55f-47e3-a26a-2baad7f4139e" }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
buyingmaintdoorspersonslug_bootsafetycar
0vhighvhigh22smalllowunacc
1vhighvhigh22smallmedunacc
2vhighvhigh22smallhighunacc
3vhighvhigh22medlowunacc
4vhighvhigh22medmedunacc
\n", - "
" - ], - "text/plain": [ - " buying maint doors persons lug_boot safety car\n", - "0 vhigh vhigh 2 2 small low unacc\n", - "1 vhigh vhigh 2 2 small med unacc\n", - "2 vhigh vhigh 2 2 small high unacc\n", - "3 vhigh vhigh 2 2 med low unacc\n", - "4 vhigh vhigh 2 2 med med unacc" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# data doesn't have headers, so let's create headers\n", "_headers = ['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety', 'car']\n", @@ -150,7 +51,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -160,214 +61,7 @@ "id": "EQvotJcIAC3o", "outputId": "aa12d7dd-dfaf-47c7-8e52-bd019034f4f8" }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
carbuying_highbuying_lowbuying_medbuying_vhighmaint_highmaint_lowmaint_medmaint_vhighdoors_2...doors_5morepersons_2persons_4persons_morelug_boot_biglug_boot_medlug_boot_smallsafety_highsafety_lowsafety_med
0unacc000100011...0100001010
1unacc000100011...0100001001
2unacc000100011...0100001100
3unacc000100011...0100010010
4unacc000100011...0100010001
\n", - "

5 rows × 22 columns

\n", - "
" - ], - "text/plain": [ - " car buying_high buying_low buying_med buying_vhigh maint_high \\\n", - "0 unacc 0 0 0 1 0 \n", - "1 unacc 0 0 0 1 0 \n", - "2 unacc 0 0 0 1 0 \n", - "3 unacc 0 0 0 1 0 \n", - "4 unacc 0 0 0 1 0 \n", - "\n", - " maint_low maint_med maint_vhigh doors_2 ... doors_5more persons_2 \\\n", - "0 0 0 1 1 ... 0 1 \n", - "1 0 0 1 1 ... 0 1 \n", - "2 0 0 1 1 ... 0 1 \n", - "3 0 0 1 1 ... 0 1 \n", - "4 0 0 1 1 ... 0 1 \n", - "\n", - " persons_4 persons_more lug_boot_big lug_boot_med lug_boot_small \\\n", - "0 0 0 0 0 1 \n", - "1 0 0 0 0 1 \n", - "2 0 0 0 0 1 \n", - "3 0 0 0 1 0 \n", - "4 0 0 0 1 0 \n", - "\n", - " safety_high safety_low safety_med \n", - "0 0 1 0 \n", - "1 0 0 1 \n", - "2 1 0 0 \n", - "3 0 1 0 \n", - "4 0 0 1 \n", - "\n", - "[5 rows x 22 columns]" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# encode categorical variables\n", "_df = pd.get_dummies(df, columns=['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety'])\n", @@ -376,7 +70,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -398,7 +92,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -408,34 +102,7 @@ "id": "-4nPRPmXAC3s", "outputId": "1d259a9b-b588-45e6-e80f-26f58cd6cd0c" }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/robert/anaconda3/envs/tensorflow/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n", - " FutureWarning)\n", - "/Users/robert/anaconda3/envs/tensorflow/lib/python3.7/site-packages/sklearn/utils/validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n", - " y = column_or_1d(y, warn=True)\n", - "/Users/robert/anaconda3/envs/tensorflow/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:469: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n", - " \"this warning.\", FutureWarning)\n" - ] - }, - { - "data": { - "text/plain": [ - "LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n", - " intercept_scaling=1, l1_ratio=None, max_iter=100,\n", - " multi_class='warn', n_jobs=None, penalty='l2',\n", - " random_state=None, solver='warn', tol=0.0001, verbose=0,\n", - " warm_start=False)" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# train a Logistic Regression model\n", "model = LogisticRegression()\n", @@ -444,7 +111,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -458,7 +125,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -472,7 +139,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -482,15 +149,7 @@ "id": "phVn4D7cAC3z", "outputId": "fde5fd3f-a262-4414-ca2d-d7cf3ff6223f" }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.46172597937303816\n" - ] - } - ], + "outputs": [], "source": [ "recall_score = recall_score(y_val, y_pred, average='macro')\n", "print(recall_score)" @@ -529,7 +188,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.4" + "version": "3.8.6" }, "mimetype": "text/x-python", "name": "python", diff --git a/Lab06/Old/Exercise6_08/Exercise6_08.ipynb b/Lab06/Old/Exercise6_08/Exercise6_08.ipynb index c50b4a5..b3418e4 100644 --- a/Lab06/Old/Exercise6_08/Exercise6_08.ipynb +++ b/Lab06/Old/Exercise6_08/Exercise6_08.ipynb @@ -9,7 +9,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -21,108 +21,9 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
buyingmaintdoorspersonslug_bootsafetycar
0vhighvhigh22smalllowunacc
1vhighvhigh22smallmedunacc
2vhighvhigh22smallhighunacc
3vhighvhigh22medlowunacc
4vhighvhigh22medmedunacc
\n", - "
" - ], - "text/plain": [ - " buying maint doors persons lug_boot safety car\n", - "0 vhigh vhigh 2 2 small low unacc\n", - "1 vhigh vhigh 2 2 small med unacc\n", - "2 vhigh vhigh 2 2 small high unacc\n", - "3 vhigh vhigh 2 2 med low unacc\n", - "4 vhigh vhigh 2 2 med med unacc" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# data doesn't have headers, so let's create headers\n", "_headers = ['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety', 'car']\n", @@ -135,216 +36,9 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
carbuying_highbuying_lowbuying_medbuying_vhighmaint_highmaint_lowmaint_medmaint_vhighdoors_2...doors_5morepersons_2persons_4persons_morelug_boot_biglug_boot_medlug_boot_smallsafety_highsafety_lowsafety_med
0unacc000100011...0100001010
1unacc000100011...0100001001
2unacc000100011...0100001100
3unacc000100011...0100010010
4unacc000100011...0100010001
\n", - "

5 rows × 22 columns

\n", - "
" - ], - "text/plain": [ - " car buying_high buying_low buying_med buying_vhigh maint_high \\\n", - "0 unacc 0 0 0 1 0 \n", - "1 unacc 0 0 0 1 0 \n", - "2 unacc 0 0 0 1 0 \n", - "3 unacc 0 0 0 1 0 \n", - "4 unacc 0 0 0 1 0 \n", - "\n", - " maint_low maint_med maint_vhigh doors_2 ... doors_5more persons_2 \\\n", - "0 0 0 1 1 ... 0 1 \n", - "1 0 0 1 1 ... 0 1 \n", - "2 0 0 1 1 ... 0 1 \n", - "3 0 0 1 1 ... 0 1 \n", - "4 0 0 1 1 ... 0 1 \n", - "\n", - " persons_4 persons_more lug_boot_big lug_boot_med lug_boot_small \\\n", - "0 0 0 0 0 1 \n", - "1 0 0 0 0 1 \n", - "2 0 0 0 0 1 \n", - "3 0 0 0 1 0 \n", - "4 0 0 0 1 0 \n", - "\n", - " safety_high safety_low safety_med \n", - "0 0 1 0 \n", - "1 0 0 1 \n", - "2 1 0 0 \n", - "3 0 1 0 \n", - "4 0 0 1 \n", - "\n", - "[5 rows x 22 columns]" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# encode categorical variables\n", "_df = pd.get_dummies(df, columns=['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety'])\n", @@ -353,7 +47,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -371,36 +65,9 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/robert/anaconda3/envs/tensorflow/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n", - " FutureWarning)\n", - "/Users/robert/anaconda3/envs/tensorflow/lib/python3.7/site-packages/sklearn/utils/validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n", - " y = column_or_1d(y, warn=True)\n", - "/Users/robert/anaconda3/envs/tensorflow/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:469: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n", - " \"this warning.\", FutureWarning)\n" - ] - }, - { - "data": { - "text/plain": [ - "LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n", - " intercept_scaling=1, l1_ratio=None, max_iter=100,\n", - " multi_class='warn', n_jobs=None, penalty='l2',\n", - " random_state=None, solver='warn', tol=0.0001, verbose=0,\n", - " warm_start=False)" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# train a Logistic Regression model\n", "model = LogisticRegression()\n", @@ -409,7 +76,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -419,7 +86,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -429,17 +96,9 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.4649367623880367\n" - ] - } - ], + "outputs": [], "source": [ "f1_score = f1_score(y_val, y_pred, average='macro')\n", "print(f1_score)" @@ -470,7 +129,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.4" + "version": "3.8.6" }, "mimetype": "text/x-python", "name": "python", diff --git a/Lab06/Old/Exercise6_09/Exercise6_09.ipynb b/Lab06/Old/Exercise6_09/Exercise6_09.ipynb index 06a4a6d..1a62374 100644 --- a/Lab06/Old/Exercise6_09/Exercise6_09.ipynb +++ b/Lab06/Old/Exercise6_09/Exercise6_09.ipynb @@ -9,7 +9,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -21,108 +21,9 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
buyingmaintdoorspersonslug_bootsafetycar
0vhighvhigh22smalllowunacc
1vhighvhigh22smallmedunacc
2vhighvhigh22smallhighunacc
3vhighvhigh22medlowunacc
4vhighvhigh22medmedunacc
\n", - "
" - ], - "text/plain": [ - " buying maint doors persons lug_boot safety car\n", - "0 vhigh vhigh 2 2 small low unacc\n", - "1 vhigh vhigh 2 2 small med unacc\n", - "2 vhigh vhigh 2 2 small high unacc\n", - "3 vhigh vhigh 2 2 med low unacc\n", - "4 vhigh vhigh 2 2 med med unacc" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# data doesn't have headers, so let's create headers\n", "_headers = ['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety', 'car']\n", @@ -135,216 +36,9 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
carbuying_highbuying_lowbuying_medbuying_vhighmaint_highmaint_lowmaint_medmaint_vhighdoors_2...doors_5morepersons_2persons_4persons_morelug_boot_biglug_boot_medlug_boot_smallsafety_highsafety_lowsafety_med
0unacc000100011...0100001010
1unacc000100011...0100001001
2unacc000100011...0100001100
3unacc000100011...0100010010
4unacc000100011...0100010001
\n", - "

5 rows × 22 columns

\n", - "
" - ], - "text/plain": [ - " car buying_high buying_low buying_med buying_vhigh maint_high \\\n", - "0 unacc 0 0 0 1 0 \n", - "1 unacc 0 0 0 1 0 \n", - "2 unacc 0 0 0 1 0 \n", - "3 unacc 0 0 0 1 0 \n", - "4 unacc 0 0 0 1 0 \n", - "\n", - " maint_low maint_med maint_vhigh doors_2 ... doors_5more persons_2 \\\n", - "0 0 0 1 1 ... 0 1 \n", - "1 0 0 1 1 ... 0 1 \n", - "2 0 0 1 1 ... 0 1 \n", - "3 0 0 1 1 ... 0 1 \n", - "4 0 0 1 1 ... 0 1 \n", - "\n", - " persons_4 persons_more lug_boot_big lug_boot_med lug_boot_small \\\n", - "0 0 0 0 0 1 \n", - "1 0 0 0 0 1 \n", - "2 0 0 0 0 1 \n", - "3 0 0 0 1 0 \n", - "4 0 0 0 1 0 \n", - "\n", - " safety_high safety_low safety_med \n", - "0 0 1 0 \n", - "1 0 0 1 \n", - "2 1 0 0 \n", - "3 0 1 0 \n", - "4 0 0 1 \n", - "\n", - "[5 rows x 22 columns]" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# encode categorical variables\n", "_df = pd.get_dummies(df, columns=['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety'])\n", @@ -353,7 +47,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -371,36 +65,9 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/robert/anaconda3/envs/tensorflow/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n", - " FutureWarning)\n", - "/Users/robert/anaconda3/envs/tensorflow/lib/python3.7/site-packages/sklearn/utils/validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n", - " y = column_or_1d(y, warn=True)\n", - "/Users/robert/anaconda3/envs/tensorflow/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:469: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n", - " \"this warning.\", FutureWarning)\n" - ] - }, - { - "data": { - "text/plain": [ - "LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n", - " intercept_scaling=1, l1_ratio=None, max_iter=100,\n", - " multi_class='warn', n_jobs=None, penalty='l2',\n", - " random_state=None, solver='warn', tol=0.0001, verbose=0,\n", - " warm_start=False)" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# train a Logistic Regression model\n", "model = LogisticRegression()\n", @@ -409,7 +76,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -419,7 +86,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -429,17 +96,9 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.8764478764478765\n" - ] - } - ], + "outputs": [], "source": [ "_accuracy = accuracy_score(y_val, y_pred)\n", "print(_accuracy)" @@ -463,7 +122,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.4" + "version": "3.8.6" }, "mimetype": "text/x-python", "name": "python", diff --git a/Lab06/Old/Exercise6_10/Exercise6_10.ipynb b/Lab06/Old/Exercise6_10/Exercise6_10.ipynb index d5c9880..9b6f917 100644 --- a/Lab06/Old/Exercise6_10/Exercise6_10.ipynb +++ b/Lab06/Old/Exercise6_10/Exercise6_10.ipynb @@ -12,7 +12,7 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -28,7 +28,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -38,108 +38,7 @@ "id": "eMkdycnKAC3k", "outputId": "e4f4f1e3-9b87-4cd5-9539-4692a32407b6" }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
buyingmaintdoorspersonslug_bootsafetycar
0vhighvhigh22smalllowunacc
1vhighvhigh22smallmedunacc
2vhighvhigh22smallhighunacc
3vhighvhigh22medlowunacc
4vhighvhigh22medmedunacc
\n", - "
" - ], - "text/plain": [ - " buying maint doors persons lug_boot safety car\n", - "0 vhigh vhigh 2 2 small low unacc\n", - "1 vhigh vhigh 2 2 small med unacc\n", - "2 vhigh vhigh 2 2 small high unacc\n", - "3 vhigh vhigh 2 2 med low unacc\n", - "4 vhigh vhigh 2 2 med med unacc" - ] - }, - "execution_count": 11, - "metadata": { - "tags": [] - }, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# data doesn't have headers, so let's create headers\n", "_headers = ['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety', 'car']\n", @@ -152,7 +51,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -162,200 +61,7 @@ "id": "EQvotJcIAC3o", "outputId": "ac75fe26-8ead-45a5-c7e2-7820f589f454" }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
carbuying_highbuying_lowbuying_medbuying_vhighmaint_highmaint_lowmaint_medmaint_vhighdoors_2doors_3doors_4doors_5morepersons_2persons_4persons_morelug_boot_biglug_boot_medlug_boot_smallsafety_highsafety_lowsafety_med
0unacc000100011000100001010
1unacc000100011000100001001
2unacc000100011000100001100
3unacc000100011000100010010
4unacc000100011000100010001
\n", - "
" - ], - "text/plain": [ - " car buying_high buying_low ... safety_high safety_low safety_med\n", - "0 unacc 0 0 ... 0 1 0\n", - "1 unacc 0 0 ... 0 0 1\n", - "2 unacc 0 0 ... 1 0 0\n", - "3 unacc 0 0 ... 0 1 0\n", - "4 unacc 0 0 ... 0 0 1\n", - "\n", - "[5 rows x 22 columns]" - ] - }, - "execution_count": 12, - "metadata": { - "tags": [] - }, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# encode categorical variables\n", "_df = pd.get_dummies(df, columns=['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety'])\n", @@ -364,7 +70,7 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -386,7 +92,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -396,32 +102,7 @@ "id": "-4nPRPmXAC3s", "outputId": "429ffe12-5d71-4048-c55d-5cb56162c398" }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.6/dist-packages/sklearn/utils/validation.py:760: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n", - " y = column_or_1d(y, warn=True)\n" - ] - }, - { - "data": { - "text/plain": [ - "LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n", - " intercept_scaling=1, l1_ratio=None, max_iter=100,\n", - " multi_class='auto', n_jobs=None, penalty='l2',\n", - " random_state=None, solver='lbfgs', tol=0.0001, verbose=0,\n", - " warm_start=False)" - ] - }, - "execution_count": 14, - "metadata": { - "tags": [] - }, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# train a Logistic Regression model\n", "model = LogisticRegression()\n", @@ -430,7 +111,7 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -444,7 +125,7 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -458,7 +139,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -468,15 +149,7 @@ "id": "phVn4D7cAC3z", "outputId": "0f9a4e44-5e96-4ce0-8b78-08c5a91040a7" }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.22578836752298448\n" - ] - } - ], + "outputs": [], "source": [ "_loss = log_loss(y_val, model.predict_proba(X_val))\n", "print(_loss)" @@ -484,7 +157,7 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -515,7 +188,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.4" + "version": "3.8.6" }, "mimetype": "text/x-python", "name": "python", diff --git a/Lab06/Old/Exercise6_11/Exercise6_11.ipynb b/Lab06/Old/Exercise6_11/Exercise6_11.ipynb index 4acdcda..3b457b1 100644 --- a/Lab06/Old/Exercise6_11/Exercise6_11.ipynb +++ b/Lab06/Old/Exercise6_11/Exercise6_11.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -21,7 +21,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -31,100 +31,7 @@ "id": "mxGIoM_5ilQF", "outputId": "e3bdf790-f9c5-4b87-e72e-9c68bb869517" }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
AgeDelivery_NbrDelivery_TimeBlood_PressureHeart_ProblemCaesarian
02210200
12620101
22621100
32810200
42220101
\n", - "
" - ], - "text/plain": [ - " Age Delivery_Nbr Delivery_Time Blood_Pressure Heart_Problem Caesarian\n", - "0 22 1 0 2 0 0\n", - "1 26 2 0 1 0 1\n", - "2 26 2 1 1 0 0\n", - "3 28 1 0 2 0 0\n", - "4 22 2 0 1 0 1" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# data doesn't have headers, so let's create headers \n", "\n", @@ -141,7 +48,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -170,7 +77,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -180,32 +87,7 @@ "id": "Ej6uIbezirCR", "outputId": "b0e201d2-d55f-4dbc-c21d-e05c2cf6935b" }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/robert/anaconda3/envs/tensorflow/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n", - " FutureWarning)\n", - "/Users/robert/anaconda3/envs/tensorflow/lib/python3.7/site-packages/sklearn/utils/validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n", - " y = column_or_1d(y, warn=True)\n" - ] - }, - { - "data": { - "text/plain": [ - "LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n", - " intercept_scaling=1, l1_ratio=None, max_iter=100,\n", - " multi_class='warn', n_jobs=None, penalty='l2',\n", - " random_state=None, solver='warn', tol=0.0001, verbose=0,\n", - " warm_start=False)" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "model = LogisticRegression() \n", "\n", @@ -214,7 +96,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -227,7 +109,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -241,7 +123,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -254,7 +136,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -264,22 +146,14 @@ "id": "laUL4vKgjSNa", "outputId": "dcece431-2930-4cb7-fcbb-627f234cef94" }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[0. 0. 0. 0.5 0.5 1. 1. ]\n" - ] - } - ], + "outputs": [], "source": [ "print(_false_positive) " ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -289,23 +163,14 @@ "id": "CWVZIRPFjlDN", "outputId": "40253e1c-782a-451d-ca2f-a1fe75bed8ea" }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[0. 0.16666667 0.33333333 0.33333333 0.83333333 0.83333333\n", - " 1. ]\n" - ] - } - ], + "outputs": [], "source": [ "print(_true_positive) " ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -315,23 +180,14 @@ "id": "87z9brVnjmht", "outputId": "7c8b8e45-ad81-411d-e7dc-316bd65c6d9d" }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[1.555085 0.555085 0.55005002 0.48012821 0.32088069 0.22067501\n", - " 0.19652383]\n" - ] - } - ], + "outputs": [], "source": [ "print(_thresholds) " ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -341,18 +197,7 @@ "id": "6XgGwPzqjoV-", "outputId": "bf98a239-c612-4a66-ddc2-0a019307a4f6" }, - "outputs": [ - { - "data": { - "text/plain": [ - "\"import matplotlib.pyplot as plt \\n\\n%matplotlib inline \\n\\n \\n\\nplt.plot(_false_positive, _true_positive, lw=2, label='Receiver Operating Characteristic') \\n\\nplt.xlim(0.0, 1.2) \\n\\nplt.ylim(0.0, 1.2) \\n\\nplt.xlabel('False Positive Rate') \\n\\nplt.ylabel('True Positive Rate') \\n\\nplt.title('Receiver Operating Characteristic') \\n\\nplt.show() \"" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# Plot the RoC \n", "# Uncomment the following block of code to see the plot\n", @@ -399,7 +244,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.4" + "version": "3.8.6" } }, "nbformat": 4, diff --git a/Lab06/Old/Exercise6_12/Exercise6_12.ipynb b/Lab06/Old/Exercise6_12/Exercise6_12.ipynb index 251ddb6..3b8d62c 100644 --- a/Lab06/Old/Exercise6_12/Exercise6_12.ipynb +++ b/Lab06/Old/Exercise6_12/Exercise6_12.ipynb @@ -1,286 +1,158 @@ { - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "file_extension": ".py", - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.4" - }, - "mimetype": "text/x-python", - "name": "python", - "npconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": 3, - "colab": { - "name": "Exercise6.12.ipynb", - "provenance": [] - } + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "QWyZQ1u8DuI_" + }, + "source": [ + "# Computing the ROC AUC for the Caesarian Dataset" + ] }, - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "QWyZQ1u8DuI_", - "colab_type": "text" - }, - "source": [ - "# Computing the ROC AUC for the Caesarian Dataset" - ] + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "p89bSWY_DuJD" + }, + "outputs": [], + "source": [ + "# import libraries\n", + "import pandas as pd\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.linear_model import LogisticRegression" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 204 }, - { - "cell_type": "code", - "metadata": { - "id": "p89bSWY_DuJD", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# import libraries\n", - "import pandas as pd\n", - "from sklearn.model_selection import train_test_split\n", - "from sklearn.linear_model import LogisticRegression" - ], - "execution_count": 0, - "outputs": [] + "colab_type": "code", + "id": "vW0hhGg3DuJI", + "outputId": "a79179c0-dedd-41dc-f13e-93c1e2120617" + }, + "outputs": [], + "source": [ + "# data doesn't have headers, so let's create headers\n", + "_headers = ['Age', 'Delivery_Nbr', 'Delivery_Time', 'Blood_Pressure', 'Heart_Problem', 'Caesarian']\n", + "# read in cars dataset\n", + "df = pd.read_csv('../Dataset/caesarian.csv.arff', names=_headers, index_col=None, skiprows=15)\n", + "df.head()\n", + "\n", + "# target column is 'Caesarian'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "MGk_0UmkDuJN" + }, + "outputs": [], + "source": [ + "# target column is 'Caesarian'\n", + "\n", + "features = df.drop(['Caesarian'], axis=1).values\n", + "labels = df[['Caesarian']].values\n", + "\n", + "# split 80% for training and 20% into an evaluation set\n", + "X_train, X_eval, y_train, y_eval = train_test_split(features, labels, test_size=0.2, random_state=0)\n", + "\n", + "# further split the evaluation set into validation and test sets of 10% each\n", + "X_val, X_test, y_val, y_test = train_test_split(X_eval, y_eval, test_size=0.5, random_state=0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 136 }, - { - "cell_type": "code", - "metadata": { - "id": "vW0hhGg3DuJI", - "colab_type": "code", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 204 - }, - "outputId": "a79179c0-dedd-41dc-f13e-93c1e2120617" - }, - "source": [ - "# data doesn't have headers, so let's create headers\n", - "_headers = ['Age', 'Delivery_Nbr', 'Delivery_Time', 'Blood_Pressure', 'Heart_Problem', 'Caesarian']\n", - "# read in cars dataset\n", - "df = pd.read_csv('../Dataset/caesarian.csv.arff', names=_headers, index_col=None, skiprows=15)\n", - "df.head()\n", - "\n", - "# target column is 'Caesarian'" - ], - "execution_count": 2, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
AgeDelivery_NbrDelivery_TimeBlood_PressureHeart_ProblemCaesarian
02210200
12620101
22621100
32810200
42220101
\n", - "
" - ], - "text/plain": [ - " Age Delivery_Nbr Delivery_Time Blood_Pressure Heart_Problem Caesarian\n", - "0 22 1 0 2 0 0\n", - "1 26 2 0 1 0 1\n", - "2 26 2 1 1 0 0\n", - "3 28 1 0 2 0 0\n", - "4 22 2 0 1 0 1" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 2 - } - ] + "colab_type": "code", + "id": "3qwJP9sEDuJQ", + "outputId": "de02ebb1-fb71-4283-f90c-1623e14af1f4" + }, + "outputs": [], + "source": [ + "# train a Logistic Regression model\n", + "model = LogisticRegression()\n", + "model.fit(X_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "v3vFYHI7DuJU" + }, + "outputs": [], + "source": [ + "# make predictions for the validation dataset\n", + "y_proba = model.predict_proba(X_val)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 }, - { - "cell_type": "code", - "metadata": { - "id": "MGk_0UmkDuJN", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# target column is 'Caesarian'\n", - "\n", - "features = df.drop(['Caesarian'], axis=1).values\n", - "labels = df[['Caesarian']].values\n", - "\n", - "# split 80% for training and 20% into an evaluation set\n", - "X_train, X_eval, y_train, y_eval = train_test_split(features, labels, test_size=0.2, random_state=0)\n", - "\n", - "# further split the evaluation set into validation and test sets of 10% each\n", - "X_val, X_test, y_val, y_test = train_test_split(X_eval, y_eval, test_size=0.5, random_state=0)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "3qwJP9sEDuJQ", - "colab_type": "code", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 136 - }, - "outputId": "de02ebb1-fb71-4283-f90c-1623e14af1f4" - }, - "source": [ - "# train a Logistic Regression model\n", - "model = LogisticRegression()\n", - "model.fit(X_train, y_train)" - ], - "execution_count": 4, - "outputs": [ - { - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.6/dist-packages/sklearn/utils/validation.py:760: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n", - " y = column_or_1d(y, warn=True)\n" - ], - "name": "stderr" - }, - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n", - " intercept_scaling=1, l1_ratio=None, max_iter=100,\n", - " multi_class='auto', n_jobs=None, penalty='l2',\n", - " random_state=None, solver='lbfgs', tol=0.0001, verbose=0,\n", - " warm_start=False)" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 4 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "v3vFYHI7DuJU", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# make predictions for the validation dataset\n", - "y_proba = model.predict_proba(X_val)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "s2mk5XKxDuJY", - "colab_type": "code", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 34 - }, - "outputId": "28e3e3db-3e8f-42ea-da67-5febd5cae99b" - }, - "source": [ - "from sklearn.metrics import roc_auc_score\n", - "_auc = roc_auc_score(y_val, y_proba[:, 0])\n", - "print(_auc)" - ], - "execution_count": 6, - "outputs": [ - { - "output_type": "stream", - "text": [ - "0.5833333333333334\n" - ], - "name": "stdout" - } - ] - } - ] -} \ No newline at end of file + "colab_type": "code", + "id": "s2mk5XKxDuJY", + "outputId": "28e3e3db-3e8f-42ea-da67-5febd5cae99b" + }, + "outputs": [], + "source": [ + "from sklearn.metrics import roc_auc_score\n", + "_auc = roc_auc_score(y_val, y_proba[:, 0])\n", + "print(_auc)" + ] + } + ], + "metadata": { + "colab": { + "name": "Exercise6.12.ipynb", + "provenance": [] + }, + "file_extension": ".py", + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.6" + }, + "mimetype": "text/x-python", + "name": "python", + "npconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": 3 + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/Lab13/Chapter_13_Unbalanced_Data_sets_v1.0.ipynb b/Lab13/Chapter_13_Unbalanced_Data_sets_v1.0.ipynb index da5c17f..c80560f 100644 --- a/Lab13/Chapter_13_Unbalanced_Data_sets_v1.0.ipynb +++ b/Lab13/Chapter_13_Unbalanced_Data_sets_v1.0.ipynb @@ -1,23 +1,5 @@ { "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 122 - }, - "colab_type": "code", - "id": "yswSlRdlXiWW", - "outputId": "5f83a1c3-9e2c-48ea-9adf-5506b8b27da4" - }, - "outputs": [], - "source": [ - "from google.colab import drive\n", - "drive.mount('/content/drive')" - ] - }, { "cell_type": "code", "execution_count": null, @@ -29,7 +11,10 @@ "outputs": [], "source": [ "# Loading the necessary library files\n", - "import pandas as pd" + "import pandas as pd\n", + "\n", + "import warnings\n", + "warnings.simplefilter(action='ignore', category=FutureWarning)" ] }, { @@ -46,10 +31,8 @@ }, "outputs": [], "source": [ - "# Loading data from the drive\n", - "\n", "# Please change the filename as per the location where the file is stored\n", - "filename = '/content/drive/My Drive/Packt_Colab/bank-full.csv'\n", + "filename = './Dataset/bank-full.csv'\n", "# Loading the data u'sing pandas\n", "\n", "bankData = pd.read_csv(filename,sep=\";\")\n", @@ -461,19 +444,6 @@ "Let us now try the over sampling method and find what effect it has on the results" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "4NrPQWkA9Eyf" - }, - "outputs": [], - "source": [ - "!pip install smote-variants" - ] - }, { "cell_type": "code", "execution_count": null, @@ -667,6 +637,34 @@ "from sklearn.metrics import classification_report\n", "print(classification_report(y_test, pred))" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": {