Files
mlessentials/Lab03/Exercise3.06/Exercise3_06.ipynb
T
Your Name 54ccb1423f added
2021-02-08 11:17:02 +00:00

345 lines
8.2 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "8UMFqsCD0xyF"
},
"outputs": [],
"source": [
"# Importing necessary packages\n",
"import pandas as pd\n",
"import altair as alt\n",
"\n",
"import warnings\n",
"warnings.filterwarnings(\"ignore\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "HSXgY0ze09cY"
},
"outputs": [],
"source": [
"file_url = '../bank-full.csv'\n",
"bankData = pd.read_csv(file_url, sep=\";\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "5SqiAWuMhase"
},
"outputs": [],
"source": [
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn.model_selection import train_test_split"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 323
},
"colab_type": "code",
"executionInfo": {
"elapsed": 2512,
"status": "ok",
"timestamp": 1573002258890,
"user": {
"displayName": "Anthony So",
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
"userId": "11809607246124237079"
},
"user_tz": -660
},
"id": "0va4ykVzhj2P",
"outputId": "bc1d55b2-c36b-4bc7-e08c-2619e5c22014"
},
"outputs": [],
"source": [
"# Finding the data types\n",
"bankData.dtypes"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "y4LHG0PWh-yr"
},
"outputs": [],
"source": [
"# Converting all the categorical variables to dummy variables\n",
"bankCat = pd.get_dummies(bankData[['job','marital','education','default','housing','loan','contact','month','poutcome']])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"colab_type": "code",
"executionInfo": {
"elapsed": 1094,
"status": "ok",
"timestamp": 1573002260021,
"user": {
"displayName": "Anthony So",
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
"userId": "11809607246124237079"
},
"user_tz": -660
},
"id": "XvvWurd4iCe1",
"outputId": "e53521c2-2076-477c-f273-83578c2a7793"
},
"outputs": [],
"source": [
"bankCat.shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"colab_type": "code",
"executionInfo": {
"elapsed": 544,
"status": "ok",
"timestamp": 1573002260300,
"user": {
"displayName": "Anthony So",
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
"userId": "11809607246124237079"
},
"user_tz": -660
},
"id": "deGzAPEDiUNk",
"outputId": "70bb6f3f-3a7f-45a3-c0d1-93658a900106"
},
"outputs": [],
"source": [
"# Seperating the numerical variables\n",
"bankNum = bankData[['age','balance','day','duration','campaign','pdays','previous']]\n",
"bankNum.shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 275
},
"colab_type": "code",
"executionInfo": {
"elapsed": 772,
"status": "ok",
"timestamp": 1573002262056,
"user": {
"displayName": "Anthony So",
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
"userId": "11809607246124237079"
},
"user_tz": -660
},
"id": "-y6f9S1-icOZ",
"outputId": "62f057c2-2275-4653-ebae-8b1e0edf51c9"
},
"outputs": [],
"source": [
"# Preparing the X variables\n",
"X = pd.concat([bankCat, bankNum], axis=1)\n",
"print(X.shape)\n",
"# Preparing the Y variable\n",
"Y = bankData['y']\n",
"print(Y.shape)\n",
"X.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "OwD0UzFnijwu"
},
"outputs": [],
"source": [
"# Splitting the data into train and test sets\n",
"X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=123)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 156
},
"colab_type": "code",
"executionInfo": {
"elapsed": 1265,
"status": "ok",
"timestamp": 1573002268433,
"user": {
"displayName": "Anthony So",
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
"userId": "11809607246124237079"
},
"user_tz": -660
},
"id": "qazFufwEiqYF",
"outputId": "0b434c4b-d3e8-4847-e384-99815105658e"
},
"outputs": [],
"source": [
"# Defining the LogisticRegression function\n",
"bankModel = LogisticRegression()\n",
"bankModel.fit(X_train, y_train)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"colab_type": "code",
"executionInfo": {
"elapsed": 854,
"status": "ok",
"timestamp": 1573002270297,
"user": {
"displayName": "Anthony So",
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
"userId": "11809607246124237079"
},
"user_tz": -660
},
"id": "ccQefqFNi0dy",
"outputId": "58c94307-8654-4e27-c9cd-65b5c339ab10"
},
"outputs": [],
"source": [
"pred = bankModel.predict(X_test)\n",
"print('Accuracy of Logisticr regression model prediction on test set: {:.2f}'.format(bankModel.score(X_test, y_test)))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 51
},
"colab_type": "code",
"executionInfo": {
"elapsed": 1051,
"status": "ok",
"timestamp": 1573002272393,
"user": {
"displayName": "Anthony So",
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
"userId": "11809607246124237079"
},
"user_tz": -660
},
"id": "yrIBeRfyi9-L",
"outputId": "5521a69d-272c-4106-c557-1539065fee1a"
},
"outputs": [],
"source": [
"# Confusion Matrix for the model\n",
"from sklearn.metrics import confusion_matrix\n",
"confusionMatrix = confusion_matrix(y_test, pred)\n",
"print(confusionMatrix)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 170
},
"colab_type": "code",
"executionInfo": {
"elapsed": 1048,
"status": "ok",
"timestamp": 1573002275023,
"user": {
"displayName": "Anthony So",
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
"userId": "11809607246124237079"
},
"user_tz": -660
},
"id": "9vLNwnEcjLOi",
"outputId": "955e1709-2c0b-47cc-e8d5-a62704050f43"
},
"outputs": [],
"source": [
"from sklearn.metrics import classification_report\n",
"print(classification_report(y_test, pred))"
]
}
],
"metadata": {
"colab": {
"collapsed_sections": [],
"name": "Exercise3.06_updated.ipynb",
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.6"
}
},
"nbformat": 4,
"nbformat_minor": 1
}