mirror of
https://github.com/fenago/data-science.git
synced 2026-05-04 00:22:32 +00:00
229 lines
5.2 KiB
Plaintext
229 lines
5.2 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"colab_type": "text",
|
|
"id": "wMcwTFr3IfLF"
|
|
},
|
|
"source": [
|
|
"**Plotting ROC**"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "pMbqUnILG4CX"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# import libraries\n",
|
|
"import pandas as pd\n",
|
|
"from sklearn.model_selection import train_test_split\n",
|
|
"from sklearn.linear_model import LogisticRegression\n",
|
|
"from sklearn.metrics import roc_curve\n",
|
|
"from sklearn.metrics import auc\n",
|
|
"\n",
|
|
"import warnings\n",
|
|
"warnings.filterwarnings(\"ignore\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 194
|
|
},
|
|
"colab_type": "code",
|
|
"id": "UUYqPslNHC5F",
|
|
"outputId": "db56cc50-f1a7-403f-f43d-79751f378f3c"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# data doesn't have headers, so let's create headers\n",
|
|
"_headers = ['Age', 'Delivery_Nbr', 'Delivery_Time', 'Blood_Pressure', 'Heart_Problem', 'Caesarian']\n",
|
|
"# read in cars dataset\n",
|
|
"df = pd.read_csv('../Dataset/caesarian.csv.arff', names=_headers, index_col=None, skiprows=15)\n",
|
|
"df.head()\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "NnYpB2dkHHNa"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# target column is 'Caesarian'\n",
|
|
"features = df.drop(['Caesarian'], axis=1).values\n",
|
|
"labels = df[['Caesarian']].values\n",
|
|
"\n",
|
|
"# split 80% for training and 20% into an evaluation set\n",
|
|
"X_train, X_eval, y_train, y_eval = train_test_split(features, labels, test_size=0.2, random_state=0)\n",
|
|
"\n",
|
|
"# further split the evaluation set into validation and test sets of 10% each\n",
|
|
"X_val, X_test, y_val, y_test = train_test_split(X_eval, y_eval, test_size=0.5, random_state=0)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 161
|
|
},
|
|
"colab_type": "code",
|
|
"id": "A8E1Hc5RHI_5",
|
|
"outputId": "d4ea2a9b-ef8b-4ad6-9e84-9d59ae23030b"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"model = LogisticRegression()\n",
|
|
"model.fit(X_train, y_train)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "75B0f5oBHLVh"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"y_proba = model.predict_proba(X_val)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "ALurTbneHNQp"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"_false_positive, _true_positive, _thresholds = roc_curve(y_val, y_proba[:, 0])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 35
|
|
},
|
|
"colab_type": "code",
|
|
"id": "S1GrOAc-Hw4A",
|
|
"outputId": "0977453a-c6dd-4eab-8210-9518147a64b5"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"print(_false_positive)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 52
|
|
},
|
|
"colab_type": "code",
|
|
"id": "mTn94iOLH07c",
|
|
"outputId": "6f7b1728-4be5-4176-def1-1a5f6e51c6e0"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"print(_true_positive)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 52
|
|
},
|
|
"colab_type": "code",
|
|
"id": "OhQAm1sTH2Cp",
|
|
"outputId": "4cc45cde-36d8-4850-f22b-c15e740b8d2e"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"print(_thresholds)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 313
|
|
},
|
|
"colab_type": "code",
|
|
"id": "gdRsiwEqH5sM",
|
|
"outputId": "26108706-1324-4229-dbd1-e80a40b21340"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Plot the RoC\n",
|
|
"import matplotlib.pyplot as plt\n",
|
|
"%matplotlib inline\n",
|
|
"\n",
|
|
"plt.plot(_false_positive, _true_positive, lw=2, label='Receiver Operating Characteristic')\n",
|
|
"plt.xlim(0.0, 1.2)\n",
|
|
"plt.ylim(0.0, 1.2)\n",
|
|
"plt.xlabel('False Positive Rate')\n",
|
|
"plt.ylabel('True Positive Rate')\n",
|
|
"plt.title('Receiver Operating Characteristic')\n",
|
|
"plt.show()\n",
|
|
"y_proba = model.predict_proba(X_val)\n",
|
|
"from sklearn.metrics import roc_auc_score\n",
|
|
"_auc = roc_auc_score(y_val, y_proba[:, 0])\n",
|
|
"print(_auc)\n"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"colab": {
|
|
"name": "Exercise6_12.ipynb",
|
|
"provenance": []
|
|
},
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.8.6"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 1
|
|
}
|