mirror of
https://github.com/fenago/data-science.git
synced 2026-05-04 00:22:32 +00:00
152 lines
3.5 KiB
Plaintext
152 lines
3.5 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"colab_type": "text",
|
|
"id": "wMcwTFr3IfLF"
|
|
},
|
|
"source": [
|
|
"** Plotting ROC AUC**"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "pMbqUnILG4CX"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# import libraries\n",
|
|
"import pandas as pd\n",
|
|
"from sklearn.model_selection import train_test_split\n",
|
|
"from sklearn.linear_model import LogisticRegression\n",
|
|
"from sklearn.metrics import roc_curve\n",
|
|
"from sklearn.metrics import auc\n",
|
|
"import warnings\n",
|
|
"warnings.filterwarnings(\"ignore\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 194
|
|
},
|
|
"colab_type": "code",
|
|
"id": "UUYqPslNHC5F",
|
|
"outputId": "358d41d7-826f-4a39-e6dd-b66e9778f60d"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# data doesn't have headers, so let's create headers\n",
|
|
"_headers = ['Age', 'Delivery_Nbr', 'Delivery_Time', 'Blood_Pressure', 'Heart_Problem', 'Caesarian']\n",
|
|
"# read in cars dataset\n",
|
|
"df = pd.read_csv('../Dataset/caesarian.csv.arff', names=_headers, index_col=None, skiprows=15)\n",
|
|
"df.head()\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "NnYpB2dkHHNa"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# target column is 'Caesarian'\n",
|
|
"features = df.drop(['Caesarian'], axis=1).values\n",
|
|
"labels = df[['Caesarian']].values\n",
|
|
"\n",
|
|
"# split 80% for training and 20% into an evaluation set\n",
|
|
"X_train, X_eval, y_train, y_eval = train_test_split(features, labels, test_size=0.2, random_state=0)\n",
|
|
"\n",
|
|
"# further split the evaluation set into validation and test sets of 10% each\n",
|
|
"X_val, X_test, y_val, y_test = train_test_split(X_eval, y_eval, test_size=0.5, random_state=0)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 161
|
|
},
|
|
"colab_type": "code",
|
|
"id": "A8E1Hc5RHI_5",
|
|
"outputId": "2cc3b9cd-4a47-42b0-c79c-ec9c9b5f26d3"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"model = LogisticRegression()\n",
|
|
"model.fit(X_train, y_train)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "75B0f5oBHLVh"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"y_proba = model.predict_proba(X_val)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 35
|
|
},
|
|
"colab_type": "code",
|
|
"id": "KbtEbYtcI7cZ",
|
|
"outputId": "3d2dc2ae-a660-4e48-b946-3b67dfb2066e"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"from sklearn.metrics import roc_auc_score\n",
|
|
"_auc = roc_auc_score(y_val, y_proba[:, 0])\n",
|
|
"print(_auc)\n"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"colab": {
|
|
"name": "Exercise6_13.ipynb",
|
|
"provenance": []
|
|
},
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.8.6"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 1
|
|
}
|