{ "cells": [ { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "wMcwTFr3IfLF" }, "source": [ "**Plotting ROC**" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "pMbqUnILG4CX" }, "outputs": [], "source": [ "# import libraries\n", "import pandas as pd\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.metrics import roc_curve\n", "from sklearn.metrics import auc\n", "\n", "import warnings\n", "warnings.filterwarnings(\"ignore\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 194 }, "colab_type": "code", "id": "UUYqPslNHC5F", "outputId": "db56cc50-f1a7-403f-f43d-79751f378f3c" }, "outputs": [], "source": [ "# data doesn't have headers, so let's create headers\n", "_headers = ['Age', 'Delivery_Nbr', 'Delivery_Time', 'Blood_Pressure', 'Heart_Problem', 'Caesarian']\n", "# read in cars dataset\n", "df = pd.read_csv('../Dataset/caesarian.csv.arff', names=_headers, index_col=None, skiprows=15)\n", "df.head()\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "NnYpB2dkHHNa" }, "outputs": [], "source": [ "# target column is 'Caesarian'\n", "features = df.drop(['Caesarian'], axis=1).values\n", "labels = df[['Caesarian']].values\n", "\n", "# split 80% for training and 20% into an evaluation set\n", "X_train, X_eval, y_train, y_eval = train_test_split(features, labels, test_size=0.2, random_state=0)\n", "\n", "# further split the evaluation set into validation and test sets of 10% each\n", "X_val, X_test, y_val, y_test = train_test_split(X_eval, y_eval, test_size=0.5, random_state=0)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 161 }, "colab_type": "code", "id": "A8E1Hc5RHI_5", "outputId": "d4ea2a9b-ef8b-4ad6-9e84-9d59ae23030b" }, "outputs": [], "source": [ "model = LogisticRegression()\n", "model.fit(X_train, y_train)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "75B0f5oBHLVh" }, "outputs": [], "source": [ "y_proba = model.predict_proba(X_val)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "ALurTbneHNQp" }, "outputs": [], "source": [ "_false_positive, _true_positive, _thresholds = roc_curve(y_val, y_proba[:, 0])" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 35 }, "colab_type": "code", "id": "S1GrOAc-Hw4A", "outputId": "0977453a-c6dd-4eab-8210-9518147a64b5" }, "outputs": [], "source": [ "print(_false_positive)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 52 }, "colab_type": "code", "id": "mTn94iOLH07c", "outputId": "6f7b1728-4be5-4176-def1-1a5f6e51c6e0" }, "outputs": [], "source": [ "print(_true_positive)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 52 }, "colab_type": "code", "id": "OhQAm1sTH2Cp", "outputId": "4cc45cde-36d8-4850-f22b-c15e740b8d2e" }, "outputs": [], "source": [ "print(_thresholds)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 313 }, "colab_type": "code", "id": "gdRsiwEqH5sM", "outputId": "26108706-1324-4229-dbd1-e80a40b21340" }, "outputs": [], "source": [ "# Plot the RoC\n", "import matplotlib.pyplot as plt\n", "%matplotlib inline\n", "\n", "plt.plot(_false_positive, _true_positive, lw=2, label='Receiver Operating Characteristic')\n", "plt.xlim(0.0, 1.2)\n", "plt.ylim(0.0, 1.2)\n", "plt.xlabel('False Positive Rate')\n", "plt.ylabel('True Positive Rate')\n", "plt.title('Receiver Operating Characteristic')\n", "plt.show()\n", "y_proba = model.predict_proba(X_val)\n", "from sklearn.metrics import roc_auc_score\n", "_auc = roc_auc_score(y_val, y_proba[:, 0])\n", "print(_auc)\n" ] } ], "metadata": { "colab": { "name": "Exercise6_12.ipynb", "provenance": [] }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.6" } }, "nbformat": 4, "nbformat_minor": 1 }