{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "8UMFqsCD0xyF" }, "outputs": [], "source": [ "# Importing necessary packages\n", "import pandas as pd\n", "import altair as alt\n", "\n", "import warnings\n", "warnings.filterwarnings(\"ignore\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "HSXgY0ze09cY" }, "outputs": [], "source": [ "file_url = '../bank-full.csv'\n", "bankData = pd.read_csv(file_url, sep=\";\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "5SqiAWuMhase" }, "outputs": [], "source": [ "from sklearn.linear_model import LogisticRegression\n", "from sklearn.model_selection import train_test_split" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 323 }, "colab_type": "code", "executionInfo": { "elapsed": 2512, "status": "ok", "timestamp": 1573002258890, "user": { "displayName": "Anthony So", "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64", "userId": "11809607246124237079" }, "user_tz": -660 }, "id": "0va4ykVzhj2P", "outputId": "bc1d55b2-c36b-4bc7-e08c-2619e5c22014" }, "outputs": [], "source": [ "# Finding the data types\n", "bankData.dtypes" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "y4LHG0PWh-yr" }, "outputs": [], "source": [ "# Converting all the categorical variables to dummy variables\n", "bankCat = pd.get_dummies(bankData[['job','marital','education','default','housing','loan','contact','month','poutcome']])" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "colab_type": "code", "executionInfo": { "elapsed": 1094, "status": "ok", "timestamp": 1573002260021, "user": { "displayName": "Anthony So", "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64", "userId": "11809607246124237079" }, "user_tz": -660 }, "id": "XvvWurd4iCe1", "outputId": "e53521c2-2076-477c-f273-83578c2a7793" }, "outputs": [], "source": [ "bankCat.shape" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "colab_type": "code", "executionInfo": { "elapsed": 544, "status": "ok", "timestamp": 1573002260300, "user": { "displayName": "Anthony So", "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64", "userId": "11809607246124237079" }, "user_tz": -660 }, "id": "deGzAPEDiUNk", "outputId": "70bb6f3f-3a7f-45a3-c0d1-93658a900106" }, "outputs": [], "source": [ "# Seperating the numerical variables\n", "bankNum = bankData[['age','balance','day','duration','campaign','pdays','previous']]\n", "bankNum.shape" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 275 }, "colab_type": "code", "executionInfo": { "elapsed": 772, "status": "ok", "timestamp": 1573002262056, "user": { "displayName": "Anthony So", "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64", "userId": "11809607246124237079" }, "user_tz": -660 }, "id": "-y6f9S1-icOZ", "outputId": "62f057c2-2275-4653-ebae-8b1e0edf51c9" }, "outputs": [], "source": [ "# Preparing the X variables\n", "X = pd.concat([bankCat, bankNum], axis=1)\n", "print(X.shape)\n", "# Preparing the Y variable\n", "Y = bankData['y']\n", "print(Y.shape)\n", "X.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "OwD0UzFnijwu" }, "outputs": [], "source": [ "# Splitting the data into train and test sets\n", "X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=123)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 156 }, "colab_type": "code", "executionInfo": { "elapsed": 1265, "status": "ok", "timestamp": 1573002268433, "user": { "displayName": "Anthony So", "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64", "userId": "11809607246124237079" }, "user_tz": -660 }, "id": "qazFufwEiqYF", "outputId": "0b434c4b-d3e8-4847-e384-99815105658e" }, "outputs": [], "source": [ "# Defining the LogisticRegression function\n", "bankModel = LogisticRegression()\n", "bankModel.fit(X_train, y_train)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "colab_type": "code", "executionInfo": { "elapsed": 854, "status": "ok", "timestamp": 1573002270297, "user": { "displayName": "Anthony So", "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64", "userId": "11809607246124237079" }, "user_tz": -660 }, "id": "ccQefqFNi0dy", "outputId": "58c94307-8654-4e27-c9cd-65b5c339ab10" }, "outputs": [], "source": [ "pred = bankModel.predict(X_test)\n", "print('Accuracy of Logisticr regression model prediction on test set: {:.2f}'.format(bankModel.score(X_test, y_test)))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 51 }, "colab_type": "code", "executionInfo": { "elapsed": 1051, "status": "ok", "timestamp": 1573002272393, "user": { "displayName": "Anthony So", "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64", "userId": "11809607246124237079" }, "user_tz": -660 }, "id": "yrIBeRfyi9-L", "outputId": "5521a69d-272c-4106-c557-1539065fee1a" }, "outputs": [], "source": [ "# Confusion Matrix for the model\n", "from sklearn.metrics import confusion_matrix\n", "confusionMatrix = confusion_matrix(y_test, pred)\n", "print(confusionMatrix)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 170 }, "colab_type": "code", "executionInfo": { "elapsed": 1048, "status": "ok", "timestamp": 1573002275023, "user": { "displayName": "Anthony So", "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64", "userId": "11809607246124237079" }, "user_tz": -660 }, "id": "9vLNwnEcjLOi", "outputId": "955e1709-2c0b-47cc-e8d5-a62704050f43" }, "outputs": [], "source": [ "from sklearn.metrics import classification_report\n", "print(classification_report(y_test, pred))" ] } ], "metadata": { "colab": { "collapsed_sections": [], "name": "Exercise3.06_updated.ipynb", "provenance": [] }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.6" } }, "nbformat": 4, "nbformat_minor": 1 }