{ "cells": [ { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "wMcwTFr3IfLF" }, "source": [ "** Plotting ROC AUC**" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "pMbqUnILG4CX" }, "outputs": [], "source": [ "# import libraries\n", "import pandas as pd\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.metrics import roc_curve\n", "from sklearn.metrics import auc\n", "import warnings\n", "warnings.filterwarnings(\"ignore\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 194 }, "colab_type": "code", "id": "UUYqPslNHC5F", "outputId": "358d41d7-826f-4a39-e6dd-b66e9778f60d" }, "outputs": [], "source": [ "# data doesn't have headers, so let's create headers\n", "_headers = ['Age', 'Delivery_Nbr', 'Delivery_Time', 'Blood_Pressure', 'Heart_Problem', 'Caesarian']\n", "# read in cars dataset\n", "df = pd.read_csv('../Dataset/caesarian.csv.arff', names=_headers, index_col=None, skiprows=15)\n", "df.head()\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "NnYpB2dkHHNa" }, "outputs": [], "source": [ "# target column is 'Caesarian'\n", "features = df.drop(['Caesarian'], axis=1).values\n", "labels = df[['Caesarian']].values\n", "\n", "# split 80% for training and 20% into an evaluation set\n", "X_train, X_eval, y_train, y_eval = train_test_split(features, labels, test_size=0.2, random_state=0)\n", "\n", "# further split the evaluation set into validation and test sets of 10% each\n", "X_val, X_test, y_val, y_test = train_test_split(X_eval, y_eval, test_size=0.5, random_state=0)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 161 }, "colab_type": "code", "id": "A8E1Hc5RHI_5", "outputId": "2cc3b9cd-4a47-42b0-c79c-ec9c9b5f26d3" }, "outputs": [], "source": [ "model = LogisticRegression()\n", "model.fit(X_train, y_train)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "75B0f5oBHLVh" }, "outputs": [], "source": [ "y_proba = model.predict_proba(X_val)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 35 }, "colab_type": "code", "id": "KbtEbYtcI7cZ", "outputId": "3d2dc2ae-a660-4e48-b946-3b67dfb2066e" }, "outputs": [], "source": [ "from sklearn.metrics import roc_auc_score\n", "_auc = roc_auc_score(y_val, y_proba[:, 0])\n", "print(_auc)\n" ] } ], "metadata": { "colab": { "name": "Exercise6_13.ipynb", "provenance": [] }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.6" } }, "nbformat": 4, "nbformat_minor": 1 }