Files
mlessentials/Lab06/Exercise6.13/Exercise6_13.ipynb
T
Your Name 54ccb1423f added
2021-02-08 11:17:02 +00:00

152 lines
3.5 KiB
Plaintext

{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "wMcwTFr3IfLF"
},
"source": [
"** Plotting ROC AUC**"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "pMbqUnILG4CX"
},
"outputs": [],
"source": [
"# import libraries\n",
"import pandas as pd\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn.metrics import roc_curve\n",
"from sklearn.metrics import auc\n",
"import warnings\n",
"warnings.filterwarnings(\"ignore\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 194
},
"colab_type": "code",
"id": "UUYqPslNHC5F",
"outputId": "358d41d7-826f-4a39-e6dd-b66e9778f60d"
},
"outputs": [],
"source": [
"# data doesn't have headers, so let's create headers\n",
"_headers = ['Age', 'Delivery_Nbr', 'Delivery_Time', 'Blood_Pressure', 'Heart_Problem', 'Caesarian']\n",
"# read in cars dataset\n",
"df = pd.read_csv('../Dataset/caesarian.csv.arff', names=_headers, index_col=None, skiprows=15)\n",
"df.head()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "NnYpB2dkHHNa"
},
"outputs": [],
"source": [
"# target column is 'Caesarian'\n",
"features = df.drop(['Caesarian'], axis=1).values\n",
"labels = df[['Caesarian']].values\n",
"\n",
"# split 80% for training and 20% into an evaluation set\n",
"X_train, X_eval, y_train, y_eval = train_test_split(features, labels, test_size=0.2, random_state=0)\n",
"\n",
"# further split the evaluation set into validation and test sets of 10% each\n",
"X_val, X_test, y_val, y_test = train_test_split(X_eval, y_eval, test_size=0.5, random_state=0)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 161
},
"colab_type": "code",
"id": "A8E1Hc5RHI_5",
"outputId": "2cc3b9cd-4a47-42b0-c79c-ec9c9b5f26d3"
},
"outputs": [],
"source": [
"model = LogisticRegression()\n",
"model.fit(X_train, y_train)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "75B0f5oBHLVh"
},
"outputs": [],
"source": [
"y_proba = model.predict_proba(X_val)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 35
},
"colab_type": "code",
"id": "KbtEbYtcI7cZ",
"outputId": "3d2dc2ae-a660-4e48-b946-3b67dfb2066e"
},
"outputs": [],
"source": [
"from sklearn.metrics import roc_auc_score\n",
"_auc = roc_auc_score(y_val, y_proba[:, 0])\n",
"print(_auc)\n"
]
}
],
"metadata": {
"colab": {
"name": "Exercise6_13.ipynb",
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.6"
}
},
"nbformat": 4,
"nbformat_minor": 1
}