mirror of
https://github.com/fenago/data-science.git
synced 2026-05-04 08:31:59 +00:00
345 lines
8.2 KiB
Plaintext
345 lines
8.2 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "8UMFqsCD0xyF"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Importing necessary packages\n",
|
|
"import pandas as pd\n",
|
|
"import altair as alt\n",
|
|
"\n",
|
|
"import warnings\n",
|
|
"warnings.filterwarnings(\"ignore\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "HSXgY0ze09cY"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"file_url = '../bank-full.csv'\n",
|
|
"bankData = pd.read_csv(file_url, sep=\";\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "5SqiAWuMhase"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"from sklearn.linear_model import LogisticRegression\n",
|
|
"from sklearn.model_selection import train_test_split"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 323
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 2512,
|
|
"status": "ok",
|
|
"timestamp": 1573002258890,
|
|
"user": {
|
|
"displayName": "Anthony So",
|
|
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
|
|
"userId": "11809607246124237079"
|
|
},
|
|
"user_tz": -660
|
|
},
|
|
"id": "0va4ykVzhj2P",
|
|
"outputId": "bc1d55b2-c36b-4bc7-e08c-2619e5c22014"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Finding the data types\n",
|
|
"bankData.dtypes"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "y4LHG0PWh-yr"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Converting all the categorical variables to dummy variables\n",
|
|
"bankCat = pd.get_dummies(bankData[['job','marital','education','default','housing','loan','contact','month','poutcome']])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 34
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 1094,
|
|
"status": "ok",
|
|
"timestamp": 1573002260021,
|
|
"user": {
|
|
"displayName": "Anthony So",
|
|
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
|
|
"userId": "11809607246124237079"
|
|
},
|
|
"user_tz": -660
|
|
},
|
|
"id": "XvvWurd4iCe1",
|
|
"outputId": "e53521c2-2076-477c-f273-83578c2a7793"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"bankCat.shape"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 34
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 544,
|
|
"status": "ok",
|
|
"timestamp": 1573002260300,
|
|
"user": {
|
|
"displayName": "Anthony So",
|
|
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
|
|
"userId": "11809607246124237079"
|
|
},
|
|
"user_tz": -660
|
|
},
|
|
"id": "deGzAPEDiUNk",
|
|
"outputId": "70bb6f3f-3a7f-45a3-c0d1-93658a900106"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Seperating the numerical variables\n",
|
|
"bankNum = bankData[['age','balance','day','duration','campaign','pdays','previous']]\n",
|
|
"bankNum.shape"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 275
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 772,
|
|
"status": "ok",
|
|
"timestamp": 1573002262056,
|
|
"user": {
|
|
"displayName": "Anthony So",
|
|
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
|
|
"userId": "11809607246124237079"
|
|
},
|
|
"user_tz": -660
|
|
},
|
|
"id": "-y6f9S1-icOZ",
|
|
"outputId": "62f057c2-2275-4653-ebae-8b1e0edf51c9"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Preparing the X variables\n",
|
|
"X = pd.concat([bankCat, bankNum], axis=1)\n",
|
|
"print(X.shape)\n",
|
|
"# Preparing the Y variable\n",
|
|
"Y = bankData['y']\n",
|
|
"print(Y.shape)\n",
|
|
"X.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "OwD0UzFnijwu"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Splitting the data into train and test sets\n",
|
|
"X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=123)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 156
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 1265,
|
|
"status": "ok",
|
|
"timestamp": 1573002268433,
|
|
"user": {
|
|
"displayName": "Anthony So",
|
|
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
|
|
"userId": "11809607246124237079"
|
|
},
|
|
"user_tz": -660
|
|
},
|
|
"id": "qazFufwEiqYF",
|
|
"outputId": "0b434c4b-d3e8-4847-e384-99815105658e"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Defining the LogisticRegression function\n",
|
|
"bankModel = LogisticRegression()\n",
|
|
"bankModel.fit(X_train, y_train)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 34
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 854,
|
|
"status": "ok",
|
|
"timestamp": 1573002270297,
|
|
"user": {
|
|
"displayName": "Anthony So",
|
|
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
|
|
"userId": "11809607246124237079"
|
|
},
|
|
"user_tz": -660
|
|
},
|
|
"id": "ccQefqFNi0dy",
|
|
"outputId": "58c94307-8654-4e27-c9cd-65b5c339ab10"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"pred = bankModel.predict(X_test)\n",
|
|
"print('Accuracy of Logisticr regression model prediction on test set: {:.2f}'.format(bankModel.score(X_test, y_test)))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 51
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 1051,
|
|
"status": "ok",
|
|
"timestamp": 1573002272393,
|
|
"user": {
|
|
"displayName": "Anthony So",
|
|
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
|
|
"userId": "11809607246124237079"
|
|
},
|
|
"user_tz": -660
|
|
},
|
|
"id": "yrIBeRfyi9-L",
|
|
"outputId": "5521a69d-272c-4106-c557-1539065fee1a"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Confusion Matrix for the model\n",
|
|
"from sklearn.metrics import confusion_matrix\n",
|
|
"confusionMatrix = confusion_matrix(y_test, pred)\n",
|
|
"print(confusionMatrix)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 170
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 1048,
|
|
"status": "ok",
|
|
"timestamp": 1573002275023,
|
|
"user": {
|
|
"displayName": "Anthony So",
|
|
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
|
|
"userId": "11809607246124237079"
|
|
},
|
|
"user_tz": -660
|
|
},
|
|
"id": "9vLNwnEcjLOi",
|
|
"outputId": "955e1709-2c0b-47cc-e8d5-a62704050f43"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"from sklearn.metrics import classification_report\n",
|
|
"print(classification_report(y_test, pred))"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"colab": {
|
|
"collapsed_sections": [],
|
|
"name": "Exercise3.06_updated.ipynb",
|
|
"provenance": []
|
|
},
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.8.6"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 1
|
|
}
|