mlessentials/Lab14/Activity14.02/Activity14.02.ipynb

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "TsT6r6c61-tx"
   },
   "source": [
    "**Initial Steps**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "m20vY9Vprhr1"
   },
   "outputs": [],
   "source": [
    "# Defining the file name from github\n",
    "filename = '../Dataset/ad.data'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 287
    },
    "colab_type": "code",
    "id": "6ovSTOXI-eAK",
    "outputId": "0a5f80e7-bcbb-4c6e-e332-8ac891543c47"
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "# Loading the data using pandas\n",
    "\n",
    "adData = pd.read_csv(filename,sep=\",\",header = None,error_bad_lines=False)\n",
    "adData.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 51
    },
    "colab_type": "code",
    "id": "LsS_Bou6_zVr",
    "outputId": "2bd20c3a-c6aa-4a54-ce79-efa4aa22b785"
   },
   "outputs": [],
   "source": [
    "# Seperating the dependent and independent variables\n",
    "# Preparing the X variables\n",
    "X = adData.loc[:,0:1557]\n",
    "print(X.shape)\n",
    "# Preparing the Y variable\n",
    "Y = adData[1558]\n",
    "print(Y.shape)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "8QTLt_bb5eIM"
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "# Replacing special characters in first 3 columns which are of type object\n",
    "for i in range(0,3):\n",
    "  X[i] = X[i].str.replace(\"?\", 'NaN').values.astype(float)\n",
    "# Replacing special characters in the remaining columns which are of type integer\n",
    "for i in range(3,1557):\n",
    "  X[i] = X[i].replace(\"?\", 'NaN').values.astype(float)  \n",
    "# Imputing the 'nan'  with mean of the values\n",
    "for i in range(0,1557):\n",
    "  X[i] = X[i].fillna(X[i].mean())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 253
    },
    "colab_type": "code",
    "id": "nEBGUA6yBRhN",
    "outputId": "9d9cc6f9-fa45-4a1e-9c52-550650861538"
   },
   "outputs": [],
   "source": [
    "# Normalising the data sets\n",
    "# Normalising data\n",
    "from sklearn import preprocessing\n",
    "# Creating the scaling function\n",
    "minmaxScaler = preprocessing.MinMaxScaler()\n",
    "X_tran = pd.DataFrame(minmaxScaler.fit_transform(X))\n",
    "X_tran.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 34
    },
    "colab_type": "code",
    "id": "VohvJNDR5qLy",
    "outputId": "4508df58-e9e0-49b1-88bf-2b78a58dbc3a"
   },
   "outputs": [],
   "source": [
    "# Creating a high dimension data set\n",
    "X_hd = pd.DataFrame(pd.np.tile(X_tran, (1, 2)))\n",
    "\n",
    "print(X_hd.shape)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "R6PxDB_Fh6tq"
   },
   "source": [
    "**Adding noise to the dataset**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "zcoP5eZ6h4di"
   },
   "outputs": [],
   "source": [
    "# Defining the mean and standard deviation\n",
    "mu, sigma = 0, 0.1 \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 34
    },
    "colab_type": "code",
    "id": "o5REOfHoh4N_",
    "outputId": "f68afb40-eb8a-4862-a7a9-b74f5211f2e2"
   },
   "outputs": [],
   "source": [
    "# Generating samples from the distribution\n",
    "noise = np.random.normal(mu, sigma, [3279,3116]) \n",
    "noise.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 34
    },
    "colab_type": "code",
    "id": "fADnPg7Lh33J",
    "outputId": "113a8ff1-9cea-4d5e-87ff-d703251753a5"
   },
   "outputs": [],
   "source": [
    "# Creating a new data set by adding noise\n",
    "X_new = X_hd + noise\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 51
    },
    "colab_type": "code",
    "id": "gDJhKo9PkKDL",
    "outputId": "48112000-993b-4e2b-edc2-f681de21b649"
   },
   "outputs": [],
   "source": [
    "# Splitting data set into train and test sets\n",
    "from sklearn.model_selection import train_test_split\n",
    "# Splitting the data into train and test sets\n",
    "X_train, X_test, y_train, y_test = train_test_split(X_new, Y, test_size=0.3, random_state=123)\n",
    "\n",
    "print('Training set shape',X_train.shape)\n",
    "\n",
    "print('Test set shape',X_test.shape)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "gtBvWL3Wjwfw"
   },
   "source": [
    "**Backward Elimination Method**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "Y6yj35AbLiOz"
   },
   "outputs": [],
   "source": [
    "from sklearn.linear_model import LogisticRegression\n",
    "from sklearn.feature_selection import RFE\n",
    "\n",
    "# Defining the Classification function\n",
    "backModel = LogisticRegression()\n",
    "# Reducing dimensionality to 300 features for backward elimination model\n",
    "rfe = RFE(backModel, 300)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 1000
    },
    "colab_type": "code",
    "id": "CizOWQH8LxSh",
    "outputId": "982e8fdd-165f-4605-ad92-d1c5e953e9fe"
   },
   "outputs": [],
   "source": [
    "# Fitting the rfe for selecting the top 300 features\n",
    "import time\n",
    "t0 = time.time()\n",
    "rfe = rfe.fit(X_train, y_train)\n",
    "t1 = time.time()\n",
    "print(\"Backward Elimination time:\", round(t1-t0, 3), \"s\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 51
    },
    "colab_type": "code",
    "id": "E10U-DmxNcyv",
    "outputId": "0d6f6a6f-796e-4997-df95-c928c12deef5"
   },
   "outputs": [],
   "source": [
    "# Transforming both train and test sets\n",
    "\n",
    "X_train_tran = rfe.transform(X_train)\n",
    "\n",
    "X_test_tran = rfe.transform(X_test)\n",
    "\n",
    "print(\"Training set shape\",X_train_tran.shape)\n",
    "\n",
    "print(\"Test set shape\",X_test_tran.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 88
    },
    "colab_type": "code",
    "id": "2gPEoZ-UPL2q",
    "outputId": "339fd114-88da-4fd3-95c7-424699fa393e"
   },
   "outputs": [],
   "source": [
    "# Fitting the logistic regression model \n",
    "import time\n",
    "# Defining the LogisticRegression function\n",
    "RfeModel = LogisticRegression()\n",
    "# Starting a timing function\n",
    "t0=time.time()\n",
    "# Fitting the model\n",
    "RfeModel.fit(X_train_tran, y_train)\n",
    "# Finding the end time \n",
    "\n",
    "print(\"Total training time:\", round(time.time()-t0, 3), \"s\")\n",
    "\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 34
    },
    "colab_type": "code",
    "id": "m50cIU3hRYRk",
    "outputId": "f443bea0-da41-4a61-ad1b-220c0164005a"
   },
   "outputs": [],
   "source": [
    "# Predicting on the test set and getting the accuracy\n",
    "pred = RfeModel.predict(X_test_tran)\n",
    "\n",
    "print('Accuracy of Logistic regression model after backward elimination: {:.2f}'.format(RfeModel.score(X_test_tran, y_test)))\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 51
    },
    "colab_type": "code",
    "id": "YQc-inXWJvJJ",
    "outputId": "212ce777-2bc5-4362-bd6c-170bce6e3415"
   },
   "outputs": [],
   "source": [
    "# Printing the Confusion matrix\n",
    "from sklearn.metrics import confusion_matrix\n",
    "confusionMatrix = confusion_matrix(y_test, pred)\n",
    "print(confusionMatrix)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 170
    },
    "colab_type": "code",
    "id": "8VbpT7kjSpEs",
    "outputId": "1e6b7d25-2feb-4014-8d01-13daa375d33e"
   },
   "outputs": [],
   "source": [
    "from sklearn.metrics import classification_report\n",
    "# Getting the Classification_report\n",
    "print(classification_report(y_test, pred))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "Mny5_VF2wKWi"
   },
   "source": [
    "**Forward Selection Method**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "nqy3pVu2wPRR"
   },
   "outputs": [],
   "source": [
    "from sklearn.feature_selection import SelectKBest\n",
    "\n",
    "# feature extraction\n",
    "feats = SelectKBest(k=300)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 34
    },
    "colab_type": "code",
    "id": "-lG6SFbcwP--",
    "outputId": "e7e3e70f-604b-4966-f056-b2fd94fb3200"
   },
   "outputs": [],
   "source": [
    " # Fitting the features for training set\n",
    "import time\n",
    "t0 = time.time()\n",
    "fit = feats.fit(X_train, y_train)\n",
    "t1 = time.time()\n",
    "print(\"Forward selection fitting time:\", round(t1-t0, 3), \"s\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "wgidexHjwP-G"
   },
   "outputs": [],
   "source": [
    "# Creating new training set and test sets \n",
    "\n",
    "features_train = fit.transform(X_train)\n",
    "features_test = fit.transform(X_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 85
    },
    "colab_type": "code",
    "id": "qKoEhuOWwP5N",
    "outputId": "48bd98dc-a835-4ef0-e532-2dfc964936da"
   },
   "outputs": [],
   "source": [
    "# Printing the shape of train and test sets before transformation\n",
    "print('Train shape before transformation',X_train.shape)\n",
    "print('Test shape before transformation',X_test.shape)\n",
    "\n",
    "# Printing the shape of train and test sets after transformation\n",
    "print('Train shape after transformation',features_train.shape)\n",
    "print('Test shape after transformation',features_test.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 88
    },
    "colab_type": "code",
    "id": "-eksV6tAwP4T",
    "outputId": "005727d7-eea7-4f51-e170-5f69fa867461"
   },
   "outputs": [],
   "source": [
    "# Fitting a Logistic Regression Model\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "import time\n",
    "\n",
    "t0 = time.time()\n",
    "\n",
    "forwardModel = LogisticRegression()\n",
    "forwardModel.fit(features_train, y_train)\n",
    "\n",
    "t1 = time.time()\n",
    "print(\"Total training time:\", round(t1-t0, 3), \"s\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 34
    },
    "colab_type": "code",
    "id": "7ei2GemzwP1F",
    "outputId": "42e095c6-9808-4e8f-b59b-bdc81563339d"
   },
   "outputs": [],
   "source": [
    "# Predicting with the forward model\n",
    "pred = forwardModel.predict(features_test)\n",
    "print('Accuracy of Logistic regression model prediction on test set: {:.2f}'.format(forwardModel.score(features_test, y_test)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 51
    },
    "colab_type": "code",
    "id": "Pp_9Z-hBwPyJ",
    "outputId": "3f9983ce-d018-48ed-95eb-8dc3aed801d6"
   },
   "outputs": [],
   "source": [
    "# Generating confusion matrix\n",
    "from sklearn.metrics import confusion_matrix\n",
    "\n",
    "confusionMatrix = confusion_matrix(y_test, pred)\n",
    "print(confusionMatrix)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 170
    },
    "colab_type": "code",
    "id": "MpnmnsHnwPst",
    "outputId": "f4f7eb3f-19be-44e4-b2be-d5e81d1668f0"
   },
   "outputs": [],
   "source": [
    "from sklearn.metrics import classification_report\n",
    "# Getting the Classification_report\n",
    "print(classification_report(y_test, pred))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "I19d1bzHxXHm"
   },
   "source": [
    "**Principal Component Analysis**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 34
    },
    "colab_type": "code",
    "id": "--N4wlunwPrj",
    "outputId": "63d63a58-fc15-4336-da76-c9dc0cdf7643"
   },
   "outputs": [],
   "source": [
    "from sklearn.decomposition import PCA\n",
    "import time\n",
    "t0 = time.time()\n",
    "pca = PCA(n_components=300)\n",
    "# Fitting the PCA on the training set\n",
    "pca.fit(X_train)\n",
    "t1 = time.time()\n",
    "print(\"PCA fitting time:\", round(t1-t0, 3), \"s\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "8iQFpNhZyEGm"
   },
   "outputs": [],
   "source": [
    "# Transforming training set and test set\n",
    "X_pca = pca.transform(X_train)\n",
    "X_test_pca = pca.transform(X_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 85
    },
    "colab_type": "code",
    "id": "h_KA0QE_yNAc",
    "outputId": "e252de52-28c6-4a51-d52f-2c4cda7bc2b6"
   },
   "outputs": [],
   "source": [
    "print(\"original shape of Training set:   \", X_train.shape)\n",
    "print(\"original shape of Test set:   \", X_test.shape)\n",
    "print(\"Transformed shape of training set:\", X_pca.shape)\n",
    "print(\"Transformed shape of test set:\", X_test_pca.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 88
    },
    "colab_type": "code",
    "id": "ire6jJX7yPjM",
    "outputId": "f602a430-9ff1-4041-aee4-6f5f8bfd0924"
   },
   "outputs": [],
   "source": [
    "from sklearn.linear_model import LogisticRegression\n",
    "import time\n",
    "\n",
    "pcaModel = LogisticRegression()\n",
    "\n",
    "t0 = time.time()\n",
    "pcaModel.fit(X_pca, y_train)\n",
    "t1 = time.time()\n",
    "\n",
    "print(\"Total training time:\", round(t1-t0, 3), \"s\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 34
    },
    "colab_type": "code",
    "id": "qzoSCt3eybug",
    "outputId": "c4724962-1f7b-4d26-d19a-17e91c084000"
   },
   "outputs": [],
   "source": [
    "# Predicting with the pca model\n",
    "pred = pcaModel.predict(X_test_pca)\n",
    "print('Accuracy of Logistic regression model prediction on test set: {:.2f}'.format(pcaModel.score(X_test_pca, y_test)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 214
    },
    "colab_type": "code",
    "id": "3jKn81vTyq66",
    "outputId": "d9927472-7699-495b-fbe5-0e0eee49715d"
   },
   "outputs": [],
   "source": [
    "# Generating confusion matrix\n",
    "from sklearn.metrics import confusion_matrix\n",
    "\n",
    "confusionMatrix = confusion_matrix(y_test, pred)\n",
    "print(confusionMatrix)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 170
    },
    "colab_type": "code",
    "id": "_Ji6L-Q5ytgU",
    "outputId": "7c3addc0-faa4-4374-dd57-4e4c40a6031f"
   },
   "outputs": [],
   "source": [
    "from sklearn.metrics import classification_report\n",
    "# Getting the Classification_report\n",
    "print(classification_report(y_test, pred))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "rwBH-CB0y8nF"
   },
   "source": [
    "**Independent Component Analysis**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "meNHCSVkzFCz"
   },
   "outputs": [],
   "source": [
    "# Defining the ICA with number of components\n",
    "from sklearn.decomposition import FastICA \n",
    "ICA = FastICA(n_components=300, random_state=123) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 34
    },
    "colab_type": "code",
    "id": "FMP1cVMazSDy",
    "outputId": "9bb60f07-25a2-4e3b-f36a-198ef89d781f"
   },
   "outputs": [],
   "source": [
    "# Fitting the ICA method and transforming the training set and noting the time\n",
    "import time\n",
    "t0 = time.time()\n",
    "X_ica=ICA.fit_transform(X_train)\n",
    "t1 = time.time()\n",
    "print(\"ICA fitting time:\", round(t1-t0, 3), \"s\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "LRbOB4nOzhGU"
   },
   "outputs": [],
   "source": [
    "# Transfroming the test set \n",
    "X_test_ica=ICA.transform(X_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 85
    },
    "colab_type": "code",
    "id": "aw1POJKxzspB",
    "outputId": "0ba4038e-1a5d-464f-9e19-04be4b45f395"
   },
   "outputs": [],
   "source": [
    "print(\"original shape of Training set:   \", X_train.shape)\n",
    "print(\"original shape of Test set:   \", X_test.shape)\n",
    "print(\"Transformed shape of training set:\", X_ica.shape)\n",
    "print(\"Transformed shape of test set:\", X_test_ica.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 88
    },
    "colab_type": "code",
    "id": "J7gsribsz2Dq",
    "outputId": "7cf65053-42be-4c13-aa45-9083397af71c"
   },
   "outputs": [],
   "source": [
    "from sklearn.linear_model import LogisticRegression\n",
    "import time\n",
    "\n",
    "icaModel = LogisticRegression()\n",
    "\n",
    "t0 = time.time()\n",
    "icaModel.fit(X_ica, y_train)\n",
    "t1 = time.time()\n",
    "\n",
    "print(\"Total training time:\", round(t1-t0, 3), \"s\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 34
    },
    "colab_type": "code",
    "id": "5EKY388k0Frc",
    "outputId": "16a62c07-3fb3-41de-c4cc-cd5c573c6c6c"
   },
   "outputs": [],
   "source": [
    "# Predicting with the ica model\n",
    "pred = icaModel.predict(X_test_ica)\n",
    "print('Accuracy of Logistic regression model prediction on test set: {:.2f}'.format(icaModel.score(X_test_ica, y_test)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 51
    },
    "colab_type": "code",
    "id": "ENKhyVBv0S9o",
    "outputId": "88630bcc-9dd3-49ed-a6e1-a722a2c42f4a"
   },
   "outputs": [],
   "source": [
    "# Generating confusion matrix\n",
    "from sklearn.metrics import confusion_matrix\n",
    "\n",
    "confusionMatrix = confusion_matrix(y_test, pred)\n",
    "print(confusionMatrix)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 224
    },
    "colab_type": "code",
    "id": "UJFRJXe10XMN",
    "outputId": "6c86f5d2-9421-485f-e1d8-8484d3f81c55"
   },
   "outputs": [],
   "source": [
    "from sklearn.metrics import classification_report\n",
    "# Getting the Classification_report\n",
    "print(classification_report(y_test, pred))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "B7Zuah4K0iup"
   },
   "source": [
    "**Factor Analysis**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "o2QE4uN90sNH"
   },
   "outputs": [],
   "source": [
    "# Defining the number of factors\n",
    "from sklearn.decomposition import FactorAnalysis\n",
    "fa = FactorAnalysis(n_components = 30,random_state=123)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 34
    },
    "colab_type": "code",
    "id": "2u6INrCr04aU",
    "outputId": "573e5c1e-3698-49ca-ad09-00f064638b88"
   },
   "outputs": [],
   "source": [
    "# Fitting the Factor analysis method and transforming the training set\n",
    "import time\n",
    "t0 = time.time()\n",
    "X_fac=fa.fit_transform(X_train)\n",
    "t1 = time.time()\n",
    "print(\"Factor analysis fitting time:\", round(t1-t0, 3), \"s\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "_pey-VJM1DZL"
   },
   "outputs": [],
   "source": [
    "# Transfroming the test set \n",
    "X_test_fac=fa.transform(X_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 85
    },
    "colab_type": "code",
    "id": "4ssq_gws1L12",
    "outputId": "0b445092-aa30-4584-f356-f0f89f73ca02"
   },
   "outputs": [],
   "source": [
    "print(\"original shape of Training set:   \", X_train.shape)\n",
    "print(\"original shape of Test set:   \", X_test.shape)\n",
    "print(\"Transformed shape of training set:\", X_fac.shape)\n",
    "print(\"Transformed shape of test set:\", X_test_fac.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 88
    },
    "colab_type": "code",
    "id": "I5y56Qtc1UgQ",
    "outputId": "2f93933e-27dd-472b-f535-ea7de58d2472"
   },
   "outputs": [],
   "source": [
    "from sklearn.linear_model import LogisticRegression\n",
    "import time\n",
    "\n",
    "facModel = LogisticRegression()\n",
    "\n",
    "t0 = time.time()\n",
    "facModel.fit(X_fac, y_train)\n",
    "t1 = time.time()\n",
    "\n",
    "print(\"Total training time:\", round(t1-t0, 3), \"s\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 34
    },
    "colab_type": "code",
    "id": "YrE0HvR01g4k",
    "outputId": "5c0fcad2-d599-4f9a-c208-a3e1612c5b9a"
   },
   "outputs": [],
   "source": [
    "# Predicting with the factor analysis model\n",
    "pred = facModel.predict(X_test_fac)\n",
    "print('Accuracy of Logistic regression model prediction on test set: {:.2f}'.format(facModel.score(X_test_fac, y_test)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 51
    },
    "colab_type": "code",
    "id": "741BziZa1n9m",
    "outputId": "38ff2afb-47e7-4fb3-8d01-9cc2ebd758a2"
   },
   "outputs": [],
   "source": [
    "# Generating confusion matrix\n",
    "from sklearn.metrics import confusion_matrix\n",
    "\n",
    "confusionMatrix = confusion_matrix(y_test, pred)\n",
    "print(confusionMatrix)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 170
    },
    "colab_type": "code",
    "id": "VFfwspdG1tuS",
    "outputId": "0a206a37-1907-459a-b408-847303cc8005"
   },
   "outputs": [],
   "source": [
    "from sklearn.metrics import classification_report\n",
    "# Getting the Classification_report\n",
    "print(classification_report(y_test, pred))"
   ]
  }
 ],
 "metadata": {
  "accelerator": "TPU",
  "colab": {
   "name": "Chapter 14: Activity 14.02 Comparison of different methods",
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}