Files
fenago f3b24b4b7f added
2021-02-07 15:16:01 +05:00

1074 lines
26 KiB
Plaintext

{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "TsT6r6c61-tx"
},
"source": [
"**Initial Steps**"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "m20vY9Vprhr1"
},
"outputs": [],
"source": [
"# Defining the file name from github\n",
"filename = '../Dataset/ad.data'"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 287
},
"colab_type": "code",
"id": "6ovSTOXI-eAK",
"outputId": "0a5f80e7-bcbb-4c6e-e332-8ac891543c47"
},
"outputs": [],
"source": [
"import pandas as pd\n",
"# Loading the data using pandas\n",
"\n",
"adData = pd.read_csv(filename,sep=\",\",header = None,error_bad_lines=False)\n",
"adData.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 51
},
"colab_type": "code",
"id": "LsS_Bou6_zVr",
"outputId": "2bd20c3a-c6aa-4a54-ce79-efa4aa22b785"
},
"outputs": [],
"source": [
"# Seperating the dependent and independent variables\n",
"# Preparing the X variables\n",
"X = adData.loc[:,0:1557]\n",
"print(X.shape)\n",
"# Preparing the Y variable\n",
"Y = adData[1558]\n",
"print(Y.shape)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "8QTLt_bb5eIM"
},
"outputs": [],
"source": [
"import numpy as np\n",
"# Replacing special characters in first 3 columns which are of type object\n",
"for i in range(0,3):\n",
" X[i] = X[i].str.replace(\"?\", 'NaN').values.astype(float)\n",
"# Replacing special characters in the remaining columns which are of type integer\n",
"for i in range(3,1557):\n",
" X[i] = X[i].replace(\"?\", 'NaN').values.astype(float) \n",
"# Imputing the 'nan' with mean of the values\n",
"for i in range(0,1557):\n",
" X[i] = X[i].fillna(X[i].mean())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 253
},
"colab_type": "code",
"id": "nEBGUA6yBRhN",
"outputId": "9d9cc6f9-fa45-4a1e-9c52-550650861538"
},
"outputs": [],
"source": [
"# Normalising the data sets\n",
"# Normalising data\n",
"from sklearn import preprocessing\n",
"# Creating the scaling function\n",
"minmaxScaler = preprocessing.MinMaxScaler()\n",
"X_tran = pd.DataFrame(minmaxScaler.fit_transform(X))\n",
"X_tran.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"colab_type": "code",
"id": "VohvJNDR5qLy",
"outputId": "4508df58-e9e0-49b1-88bf-2b78a58dbc3a"
},
"outputs": [],
"source": [
"# Creating a high dimension data set\n",
"X_hd = pd.DataFrame(pd.np.tile(X_tran, (1, 2)))\n",
"\n",
"print(X_hd.shape)"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "R6PxDB_Fh6tq"
},
"source": [
"**Adding noise to the dataset**"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "zcoP5eZ6h4di"
},
"outputs": [],
"source": [
"# Defining the mean and standard deviation\n",
"mu, sigma = 0, 0.1 \n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"colab_type": "code",
"id": "o5REOfHoh4N_",
"outputId": "f68afb40-eb8a-4862-a7a9-b74f5211f2e2"
},
"outputs": [],
"source": [
"# Generating samples from the distribution\n",
"noise = np.random.normal(mu, sigma, [3279,3116]) \n",
"noise.shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"colab_type": "code",
"id": "fADnPg7Lh33J",
"outputId": "113a8ff1-9cea-4d5e-87ff-d703251753a5"
},
"outputs": [],
"source": [
"# Creating a new data set by adding noise\n",
"X_new = X_hd + noise\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 51
},
"colab_type": "code",
"id": "gDJhKo9PkKDL",
"outputId": "48112000-993b-4e2b-edc2-f681de21b649"
},
"outputs": [],
"source": [
"# Splitting data set into train and test sets\n",
"from sklearn.model_selection import train_test_split\n",
"# Splitting the data into train and test sets\n",
"X_train, X_test, y_train, y_test = train_test_split(X_new, Y, test_size=0.3, random_state=123)\n",
"\n",
"print('Training set shape',X_train.shape)\n",
"\n",
"print('Test set shape',X_test.shape)"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "gtBvWL3Wjwfw"
},
"source": [
"**Backward Elimination Method**"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "Y6yj35AbLiOz"
},
"outputs": [],
"source": [
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn.feature_selection import RFE\n",
"\n",
"# Defining the Classification function\n",
"backModel = LogisticRegression()\n",
"# Reducing dimensionality to 300 features for backward elimination model\n",
"rfe = RFE(backModel, 300)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 1000
},
"colab_type": "code",
"id": "CizOWQH8LxSh",
"outputId": "982e8fdd-165f-4605-ad92-d1c5e953e9fe"
},
"outputs": [],
"source": [
"# Fitting the rfe for selecting the top 300 features\n",
"import time\n",
"t0 = time.time()\n",
"rfe = rfe.fit(X_train, y_train)\n",
"t1 = time.time()\n",
"print(\"Backward Elimination time:\", round(t1-t0, 3), \"s\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 51
},
"colab_type": "code",
"id": "E10U-DmxNcyv",
"outputId": "0d6f6a6f-796e-4997-df95-c928c12deef5"
},
"outputs": [],
"source": [
"# Transforming both train and test sets\n",
"\n",
"X_train_tran = rfe.transform(X_train)\n",
"\n",
"X_test_tran = rfe.transform(X_test)\n",
"\n",
"print(\"Training set shape\",X_train_tran.shape)\n",
"\n",
"print(\"Test set shape\",X_test_tran.shape)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 88
},
"colab_type": "code",
"id": "2gPEoZ-UPL2q",
"outputId": "339fd114-88da-4fd3-95c7-424699fa393e"
},
"outputs": [],
"source": [
"# Fitting the logistic regression model \n",
"import time\n",
"# Defining the LogisticRegression function\n",
"RfeModel = LogisticRegression()\n",
"# Starting a timing function\n",
"t0=time.time()\n",
"# Fitting the model\n",
"RfeModel.fit(X_train_tran, y_train)\n",
"# Finding the end time \n",
"\n",
"print(\"Total training time:\", round(time.time()-t0, 3), \"s\")\n",
"\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"colab_type": "code",
"id": "m50cIU3hRYRk",
"outputId": "f443bea0-da41-4a61-ad1b-220c0164005a"
},
"outputs": [],
"source": [
"# Predicting on the test set and getting the accuracy\n",
"pred = RfeModel.predict(X_test_tran)\n",
"\n",
"print('Accuracy of Logistic regression model after backward elimination: {:.2f}'.format(RfeModel.score(X_test_tran, y_test)))\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 51
},
"colab_type": "code",
"id": "YQc-inXWJvJJ",
"outputId": "212ce777-2bc5-4362-bd6c-170bce6e3415"
},
"outputs": [],
"source": [
"# Printing the Confusion matrix\n",
"from sklearn.metrics import confusion_matrix\n",
"confusionMatrix = confusion_matrix(y_test, pred)\n",
"print(confusionMatrix)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 170
},
"colab_type": "code",
"id": "8VbpT7kjSpEs",
"outputId": "1e6b7d25-2feb-4014-8d01-13daa375d33e"
},
"outputs": [],
"source": [
"from sklearn.metrics import classification_report\n",
"# Getting the Classification_report\n",
"print(classification_report(y_test, pred))"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "Mny5_VF2wKWi"
},
"source": [
"**Forward Selection Method**"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "nqy3pVu2wPRR"
},
"outputs": [],
"source": [
"from sklearn.feature_selection import SelectKBest\n",
"\n",
"# feature extraction\n",
"feats = SelectKBest(k=300)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"colab_type": "code",
"id": "-lG6SFbcwP--",
"outputId": "e7e3e70f-604b-4966-f056-b2fd94fb3200"
},
"outputs": [],
"source": [
" # Fitting the features for training set\n",
"import time\n",
"t0 = time.time()\n",
"fit = feats.fit(X_train, y_train)\n",
"t1 = time.time()\n",
"print(\"Forward selection fitting time:\", round(t1-t0, 3), \"s\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "wgidexHjwP-G"
},
"outputs": [],
"source": [
"# Creating new training set and test sets \n",
"\n",
"features_train = fit.transform(X_train)\n",
"features_test = fit.transform(X_test)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 85
},
"colab_type": "code",
"id": "qKoEhuOWwP5N",
"outputId": "48bd98dc-a835-4ef0-e532-2dfc964936da"
},
"outputs": [],
"source": [
"# Printing the shape of train and test sets before transformation\n",
"print('Train shape before transformation',X_train.shape)\n",
"print('Test shape before transformation',X_test.shape)\n",
"\n",
"# Printing the shape of train and test sets after transformation\n",
"print('Train shape after transformation',features_train.shape)\n",
"print('Test shape after transformation',features_test.shape)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 88
},
"colab_type": "code",
"id": "-eksV6tAwP4T",
"outputId": "005727d7-eea7-4f51-e170-5f69fa867461"
},
"outputs": [],
"source": [
"# Fitting a Logistic Regression Model\n",
"from sklearn.linear_model import LogisticRegression\n",
"import time\n",
"\n",
"t0 = time.time()\n",
"\n",
"forwardModel = LogisticRegression()\n",
"forwardModel.fit(features_train, y_train)\n",
"\n",
"t1 = time.time()\n",
"print(\"Total training time:\", round(t1-t0, 3), \"s\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"colab_type": "code",
"id": "7ei2GemzwP1F",
"outputId": "42e095c6-9808-4e8f-b59b-bdc81563339d"
},
"outputs": [],
"source": [
"# Predicting with the forward model\n",
"pred = forwardModel.predict(features_test)\n",
"print('Accuracy of Logistic regression model prediction on test set: {:.2f}'.format(forwardModel.score(features_test, y_test)))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 51
},
"colab_type": "code",
"id": "Pp_9Z-hBwPyJ",
"outputId": "3f9983ce-d018-48ed-95eb-8dc3aed801d6"
},
"outputs": [],
"source": [
"# Generating confusion matrix\n",
"from sklearn.metrics import confusion_matrix\n",
"\n",
"confusionMatrix = confusion_matrix(y_test, pred)\n",
"print(confusionMatrix)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 170
},
"colab_type": "code",
"id": "MpnmnsHnwPst",
"outputId": "f4f7eb3f-19be-44e4-b2be-d5e81d1668f0"
},
"outputs": [],
"source": [
"from sklearn.metrics import classification_report\n",
"# Getting the Classification_report\n",
"print(classification_report(y_test, pred))"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "I19d1bzHxXHm"
},
"source": [
"**Principal Component Analysis**"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"colab_type": "code",
"id": "--N4wlunwPrj",
"outputId": "63d63a58-fc15-4336-da76-c9dc0cdf7643"
},
"outputs": [],
"source": [
"from sklearn.decomposition import PCA\n",
"import time\n",
"t0 = time.time()\n",
"pca = PCA(n_components=300)\n",
"# Fitting the PCA on the training set\n",
"pca.fit(X_train)\n",
"t1 = time.time()\n",
"print(\"PCA fitting time:\", round(t1-t0, 3), \"s\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "8iQFpNhZyEGm"
},
"outputs": [],
"source": [
"# Transforming training set and test set\n",
"X_pca = pca.transform(X_train)\n",
"X_test_pca = pca.transform(X_test)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 85
},
"colab_type": "code",
"id": "h_KA0QE_yNAc",
"outputId": "e252de52-28c6-4a51-d52f-2c4cda7bc2b6"
},
"outputs": [],
"source": [
"print(\"original shape of Training set: \", X_train.shape)\n",
"print(\"original shape of Test set: \", X_test.shape)\n",
"print(\"Transformed shape of training set:\", X_pca.shape)\n",
"print(\"Transformed shape of test set:\", X_test_pca.shape)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 88
},
"colab_type": "code",
"id": "ire6jJX7yPjM",
"outputId": "f602a430-9ff1-4041-aee4-6f5f8bfd0924"
},
"outputs": [],
"source": [
"from sklearn.linear_model import LogisticRegression\n",
"import time\n",
"\n",
"pcaModel = LogisticRegression()\n",
"\n",
"t0 = time.time()\n",
"pcaModel.fit(X_pca, y_train)\n",
"t1 = time.time()\n",
"\n",
"print(\"Total training time:\", round(t1-t0, 3), \"s\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"colab_type": "code",
"id": "qzoSCt3eybug",
"outputId": "c4724962-1f7b-4d26-d19a-17e91c084000"
},
"outputs": [],
"source": [
"# Predicting with the pca model\n",
"pred = pcaModel.predict(X_test_pca)\n",
"print('Accuracy of Logistic regression model prediction on test set: {:.2f}'.format(pcaModel.score(X_test_pca, y_test)))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 214
},
"colab_type": "code",
"id": "3jKn81vTyq66",
"outputId": "d9927472-7699-495b-fbe5-0e0eee49715d"
},
"outputs": [],
"source": [
"# Generating confusion matrix\n",
"from sklearn.metrics import confusion_matrix\n",
"\n",
"confusionMatrix = confusion_matrix(y_test, pred)\n",
"print(confusionMatrix)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 170
},
"colab_type": "code",
"id": "_Ji6L-Q5ytgU",
"outputId": "7c3addc0-faa4-4374-dd57-4e4c40a6031f"
},
"outputs": [],
"source": [
"from sklearn.metrics import classification_report\n",
"# Getting the Classification_report\n",
"print(classification_report(y_test, pred))"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "rwBH-CB0y8nF"
},
"source": [
"**Independent Component Analysis**"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "meNHCSVkzFCz"
},
"outputs": [],
"source": [
"# Defining the ICA with number of components\n",
"from sklearn.decomposition import FastICA \n",
"ICA = FastICA(n_components=300, random_state=123) "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"colab_type": "code",
"id": "FMP1cVMazSDy",
"outputId": "9bb60f07-25a2-4e3b-f36a-198ef89d781f"
},
"outputs": [],
"source": [
"# Fitting the ICA method and transforming the training set and noting the time\n",
"import time\n",
"t0 = time.time()\n",
"X_ica=ICA.fit_transform(X_train)\n",
"t1 = time.time()\n",
"print(\"ICA fitting time:\", round(t1-t0, 3), \"s\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "LRbOB4nOzhGU"
},
"outputs": [],
"source": [
"# Transfroming the test set \n",
"X_test_ica=ICA.transform(X_test)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 85
},
"colab_type": "code",
"id": "aw1POJKxzspB",
"outputId": "0ba4038e-1a5d-464f-9e19-04be4b45f395"
},
"outputs": [],
"source": [
"print(\"original shape of Training set: \", X_train.shape)\n",
"print(\"original shape of Test set: \", X_test.shape)\n",
"print(\"Transformed shape of training set:\", X_ica.shape)\n",
"print(\"Transformed shape of test set:\", X_test_ica.shape)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 88
},
"colab_type": "code",
"id": "J7gsribsz2Dq",
"outputId": "7cf65053-42be-4c13-aa45-9083397af71c"
},
"outputs": [],
"source": [
"from sklearn.linear_model import LogisticRegression\n",
"import time\n",
"\n",
"icaModel = LogisticRegression()\n",
"\n",
"t0 = time.time()\n",
"icaModel.fit(X_ica, y_train)\n",
"t1 = time.time()\n",
"\n",
"print(\"Total training time:\", round(t1-t0, 3), \"s\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"colab_type": "code",
"id": "5EKY388k0Frc",
"outputId": "16a62c07-3fb3-41de-c4cc-cd5c573c6c6c"
},
"outputs": [],
"source": [
"# Predicting with the ica model\n",
"pred = icaModel.predict(X_test_ica)\n",
"print('Accuracy of Logistic regression model prediction on test set: {:.2f}'.format(icaModel.score(X_test_ica, y_test)))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 51
},
"colab_type": "code",
"id": "ENKhyVBv0S9o",
"outputId": "88630bcc-9dd3-49ed-a6e1-a722a2c42f4a"
},
"outputs": [],
"source": [
"# Generating confusion matrix\n",
"from sklearn.metrics import confusion_matrix\n",
"\n",
"confusionMatrix = confusion_matrix(y_test, pred)\n",
"print(confusionMatrix)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 224
},
"colab_type": "code",
"id": "UJFRJXe10XMN",
"outputId": "6c86f5d2-9421-485f-e1d8-8484d3f81c55"
},
"outputs": [],
"source": [
"from sklearn.metrics import classification_report\n",
"# Getting the Classification_report\n",
"print(classification_report(y_test, pred))"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "B7Zuah4K0iup"
},
"source": [
"**Factor Analysis**"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "o2QE4uN90sNH"
},
"outputs": [],
"source": [
"# Defining the number of factors\n",
"from sklearn.decomposition import FactorAnalysis\n",
"fa = FactorAnalysis(n_components = 30,random_state=123)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"colab_type": "code",
"id": "2u6INrCr04aU",
"outputId": "573e5c1e-3698-49ca-ad09-00f064638b88"
},
"outputs": [],
"source": [
"# Fitting the Factor analysis method and transforming the training set\n",
"import time\n",
"t0 = time.time()\n",
"X_fac=fa.fit_transform(X_train)\n",
"t1 = time.time()\n",
"print(\"Factor analysis fitting time:\", round(t1-t0, 3), \"s\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "_pey-VJM1DZL"
},
"outputs": [],
"source": [
"# Transfroming the test set \n",
"X_test_fac=fa.transform(X_test)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 85
},
"colab_type": "code",
"id": "4ssq_gws1L12",
"outputId": "0b445092-aa30-4584-f356-f0f89f73ca02"
},
"outputs": [],
"source": [
"print(\"original shape of Training set: \", X_train.shape)\n",
"print(\"original shape of Test set: \", X_test.shape)\n",
"print(\"Transformed shape of training set:\", X_fac.shape)\n",
"print(\"Transformed shape of test set:\", X_test_fac.shape)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 88
},
"colab_type": "code",
"id": "I5y56Qtc1UgQ",
"outputId": "2f93933e-27dd-472b-f535-ea7de58d2472"
},
"outputs": [],
"source": [
"from sklearn.linear_model import LogisticRegression\n",
"import time\n",
"\n",
"facModel = LogisticRegression()\n",
"\n",
"t0 = time.time()\n",
"facModel.fit(X_fac, y_train)\n",
"t1 = time.time()\n",
"\n",
"print(\"Total training time:\", round(t1-t0, 3), \"s\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"colab_type": "code",
"id": "YrE0HvR01g4k",
"outputId": "5c0fcad2-d599-4f9a-c208-a3e1612c5b9a"
},
"outputs": [],
"source": [
"# Predicting with the factor analysis model\n",
"pred = facModel.predict(X_test_fac)\n",
"print('Accuracy of Logistic regression model prediction on test set: {:.2f}'.format(facModel.score(X_test_fac, y_test)))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 51
},
"colab_type": "code",
"id": "741BziZa1n9m",
"outputId": "38ff2afb-47e7-4fb3-8d01-9cc2ebd758a2"
},
"outputs": [],
"source": [
"# Generating confusion matrix\n",
"from sklearn.metrics import confusion_matrix\n",
"\n",
"confusionMatrix = confusion_matrix(y_test, pred)\n",
"print(confusionMatrix)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 170
},
"colab_type": "code",
"id": "VFfwspdG1tuS",
"outputId": "0a206a37-1907-459a-b408-847303cc8005"
},
"outputs": [],
"source": [
"from sklearn.metrics import classification_report\n",
"# Getting the Classification_report\n",
"print(classification_report(y_test, pred))"
]
}
],
"metadata": {
"accelerator": "TPU",
"colab": {
"name": "Chapter 14: Activity 14.02 Comparison of different methods",
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.6"
}
},
"nbformat": 4,
"nbformat_minor": 1
}