Files
Your Name 54ccb1423f added
2021-02-08 11:17:02 +00:00

256 lines
5.5 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "V8UoTLWkigol"
},
"outputs": [],
"source": [
"# import libraries \n",
"\n",
"import pandas as pd \n",
"\n",
"from sklearn.model_selection import train_test_split \n",
"\n",
"from sklearn.linear_model import LogisticRegression\n",
"\n",
"import warnings\n",
"warnings.filterwarnings(\"ignore\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 203
},
"colab_type": "code",
"id": "mxGIoM_5ilQF",
"outputId": "e3bdf790-f9c5-4b87-e72e-9c68bb869517"
},
"outputs": [],
"source": [
"# data doesn't have headers, so let's create headers \n",
"\n",
"_headers = ['Age', 'Delivery_Nbr', 'Delivery_Time', 'Blood_Pressure', 'Heart_Problem', 'Caesarian'] \n",
"\n",
"# read in cars dataset \n",
"\n",
"df = pd.read_csv('../Dataset/caesarian.csv.arff', names=_headers, index_col=None, skiprows=15) \n",
"\n",
"df.head() \n",
"\n",
"# target column is 'Caesarian' "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "PxEvuaTJiojZ"
},
"outputs": [],
"source": [
"# target column is 'Caesarian' \n",
"\n",
"features = df.drop(['Caesarian'], axis=1).values \n",
"\n",
"labels = df[['Caesarian']].values \n",
"\n",
" \n",
"\n",
"# split 80% for training and 20% into an evaluation set \n",
"\n",
"X_train, X_eval, y_train, y_eval = train_test_split(features, labels, test_size=0.2, random_state=0) \n",
"\n",
" \n",
"\n",
"# further split the evaluation set into validation and test sets of 10% each \n",
"\n",
"X_val, X_test, y_val, y_test = train_test_split(X_eval, y_eval, test_size=0.5, random_state=0)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 157
},
"colab_type": "code",
"id": "Ej6uIbezirCR",
"outputId": "b0e201d2-d55f-4dbc-c21d-e05c2cf6935b"
},
"outputs": [],
"source": [
"model = LogisticRegression() \n",
"\n",
"model.fit(X_train, y_train) "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "b7PyDlTRitfk"
},
"outputs": [],
"source": [
"y_proba = model.predict_proba(X_val) "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "oaztmV1QjTYF"
},
"outputs": [],
"source": [
"from sklearn.metrics import roc_curve\n",
"from sklearn.metrics import auc"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "DxAsnKfRiu0h"
},
"outputs": [],
"source": [
"_false_positive, _true_positive, _thresholds = roc_curve(y_val, y_proba[:, 0]) "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"colab_type": "code",
"id": "laUL4vKgjSNa",
"outputId": "dcece431-2930-4cb7-fcbb-627f234cef94"
},
"outputs": [],
"source": [
"print(_false_positive) "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 51
},
"colab_type": "code",
"id": "CWVZIRPFjlDN",
"outputId": "40253e1c-782a-451d-ca2f-a1fe75bed8ea"
},
"outputs": [],
"source": [
"print(_true_positive) "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 51
},
"colab_type": "code",
"id": "87z9brVnjmht",
"outputId": "7c8b8e45-ad81-411d-e7dc-316bd65c6d9d"
},
"outputs": [],
"source": [
"print(_thresholds) "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 295
},
"colab_type": "code",
"id": "6XgGwPzqjoV-",
"outputId": "bf98a239-c612-4a66-ddc2-0a019307a4f6"
},
"outputs": [],
"source": [
"# Plot the RoC \n",
"# Uncomment the following block of code to see the plot\n",
"\n",
"import matplotlib.pyplot as plt \n",
"\n",
"%matplotlib inline \n",
"\n",
" \n",
"\n",
"plt.plot(_false_positive, _true_positive, lw=2, label='Receiver Operating Characteristic') \n",
"\n",
"plt.xlim(0.0, 1.2) \n",
"\n",
"plt.ylim(0.0, 1.2) \n",
"\n",
"plt.xlabel('False Positive Rate') \n",
"\n",
"plt.ylabel('True Positive Rate') \n",
"\n",
"plt.title('Receiver Operating Characteristic') \n",
"\n",
"plt.show()"
]
}
],
"metadata": {
"colab": {
"name": "Exercise6.11.ipynb",
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.6"
}
},
"nbformat": 4,
"nbformat_minor": 1
}