mirror of
https://github.com/fenago/data-science.git
synced 2026-05-04 16:41:05 +00:00
1074 lines
26 KiB
Plaintext
1074 lines
26 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"colab_type": "text",
|
|
"id": "TsT6r6c61-tx"
|
|
},
|
|
"source": [
|
|
"**Initial Steps**"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "m20vY9Vprhr1"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Defining the file name from github\n",
|
|
"filename = '../Dataset/ad.data'"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 287
|
|
},
|
|
"colab_type": "code",
|
|
"id": "6ovSTOXI-eAK",
|
|
"outputId": "0a5f80e7-bcbb-4c6e-e332-8ac891543c47"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"import pandas as pd\n",
|
|
"# Loading the data using pandas\n",
|
|
"\n",
|
|
"adData = pd.read_csv(filename,sep=\",\",header = None,error_bad_lines=False)\n",
|
|
"adData.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 51
|
|
},
|
|
"colab_type": "code",
|
|
"id": "LsS_Bou6_zVr",
|
|
"outputId": "2bd20c3a-c6aa-4a54-ce79-efa4aa22b785"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Seperating the dependent and independent variables\n",
|
|
"# Preparing the X variables\n",
|
|
"X = adData.loc[:,0:1557]\n",
|
|
"print(X.shape)\n",
|
|
"# Preparing the Y variable\n",
|
|
"Y = adData[1558]\n",
|
|
"print(Y.shape)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "8QTLt_bb5eIM"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"import numpy as np\n",
|
|
"# Replacing special characters in first 3 columns which are of type object\n",
|
|
"for i in range(0,3):\n",
|
|
" X[i] = X[i].str.replace(\"?\", 'NaN').values.astype(float)\n",
|
|
"# Replacing special characters in the remaining columns which are of type integer\n",
|
|
"for i in range(3,1557):\n",
|
|
" X[i] = X[i].replace(\"?\", 'NaN').values.astype(float) \n",
|
|
"# Imputing the 'nan' with mean of the values\n",
|
|
"for i in range(0,1557):\n",
|
|
" X[i] = X[i].fillna(X[i].mean())"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 253
|
|
},
|
|
"colab_type": "code",
|
|
"id": "nEBGUA6yBRhN",
|
|
"outputId": "9d9cc6f9-fa45-4a1e-9c52-550650861538"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Normalising the data sets\n",
|
|
"# Normalising data\n",
|
|
"from sklearn import preprocessing\n",
|
|
"# Creating the scaling function\n",
|
|
"minmaxScaler = preprocessing.MinMaxScaler()\n",
|
|
"X_tran = pd.DataFrame(minmaxScaler.fit_transform(X))\n",
|
|
"X_tran.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 34
|
|
},
|
|
"colab_type": "code",
|
|
"id": "VohvJNDR5qLy",
|
|
"outputId": "4508df58-e9e0-49b1-88bf-2b78a58dbc3a"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Creating a high dimension data set\n",
|
|
"X_hd = pd.DataFrame(pd.np.tile(X_tran, (1, 2)))\n",
|
|
"\n",
|
|
"print(X_hd.shape)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"colab_type": "text",
|
|
"id": "R6PxDB_Fh6tq"
|
|
},
|
|
"source": [
|
|
"**Adding noise to the dataset**"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "zcoP5eZ6h4di"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Defining the mean and standard deviation\n",
|
|
"mu, sigma = 0, 0.1 \n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 34
|
|
},
|
|
"colab_type": "code",
|
|
"id": "o5REOfHoh4N_",
|
|
"outputId": "f68afb40-eb8a-4862-a7a9-b74f5211f2e2"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Generating samples from the distribution\n",
|
|
"noise = np.random.normal(mu, sigma, [3279,3116]) \n",
|
|
"noise.shape"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 34
|
|
},
|
|
"colab_type": "code",
|
|
"id": "fADnPg7Lh33J",
|
|
"outputId": "113a8ff1-9cea-4d5e-87ff-d703251753a5"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Creating a new data set by adding noise\n",
|
|
"X_new = X_hd + noise\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 51
|
|
},
|
|
"colab_type": "code",
|
|
"id": "gDJhKo9PkKDL",
|
|
"outputId": "48112000-993b-4e2b-edc2-f681de21b649"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Splitting data set into train and test sets\n",
|
|
"from sklearn.model_selection import train_test_split\n",
|
|
"# Splitting the data into train and test sets\n",
|
|
"X_train, X_test, y_train, y_test = train_test_split(X_new, Y, test_size=0.3, random_state=123)\n",
|
|
"\n",
|
|
"print('Training set shape',X_train.shape)\n",
|
|
"\n",
|
|
"print('Test set shape',X_test.shape)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"colab_type": "text",
|
|
"id": "gtBvWL3Wjwfw"
|
|
},
|
|
"source": [
|
|
"**Backward Elimination Method**"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "Y6yj35AbLiOz"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"from sklearn.linear_model import LogisticRegression\n",
|
|
"from sklearn.feature_selection import RFE\n",
|
|
"\n",
|
|
"# Defining the Classification function\n",
|
|
"backModel = LogisticRegression()\n",
|
|
"# Reducing dimensionality to 300 features for backward elimination model\n",
|
|
"rfe = RFE(backModel, 300)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 1000
|
|
},
|
|
"colab_type": "code",
|
|
"id": "CizOWQH8LxSh",
|
|
"outputId": "982e8fdd-165f-4605-ad92-d1c5e953e9fe"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Fitting the rfe for selecting the top 300 features\n",
|
|
"import time\n",
|
|
"t0 = time.time()\n",
|
|
"rfe = rfe.fit(X_train, y_train)\n",
|
|
"t1 = time.time()\n",
|
|
"print(\"Backward Elimination time:\", round(t1-t0, 3), \"s\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 51
|
|
},
|
|
"colab_type": "code",
|
|
"id": "E10U-DmxNcyv",
|
|
"outputId": "0d6f6a6f-796e-4997-df95-c928c12deef5"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Transforming both train and test sets\n",
|
|
"\n",
|
|
"X_train_tran = rfe.transform(X_train)\n",
|
|
"\n",
|
|
"X_test_tran = rfe.transform(X_test)\n",
|
|
"\n",
|
|
"print(\"Training set shape\",X_train_tran.shape)\n",
|
|
"\n",
|
|
"print(\"Test set shape\",X_test_tran.shape)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 88
|
|
},
|
|
"colab_type": "code",
|
|
"id": "2gPEoZ-UPL2q",
|
|
"outputId": "339fd114-88da-4fd3-95c7-424699fa393e"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Fitting the logistic regression model \n",
|
|
"import time\n",
|
|
"# Defining the LogisticRegression function\n",
|
|
"RfeModel = LogisticRegression()\n",
|
|
"# Starting a timing function\n",
|
|
"t0=time.time()\n",
|
|
"# Fitting the model\n",
|
|
"RfeModel.fit(X_train_tran, y_train)\n",
|
|
"# Finding the end time \n",
|
|
"\n",
|
|
"print(\"Total training time:\", round(time.time()-t0, 3), \"s\")\n",
|
|
"\n",
|
|
"\n",
|
|
"\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 34
|
|
},
|
|
"colab_type": "code",
|
|
"id": "m50cIU3hRYRk",
|
|
"outputId": "f443bea0-da41-4a61-ad1b-220c0164005a"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Predicting on the test set and getting the accuracy\n",
|
|
"pred = RfeModel.predict(X_test_tran)\n",
|
|
"\n",
|
|
"print('Accuracy of Logistic regression model after backward elimination: {:.2f}'.format(RfeModel.score(X_test_tran, y_test)))\n",
|
|
"\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 51
|
|
},
|
|
"colab_type": "code",
|
|
"id": "YQc-inXWJvJJ",
|
|
"outputId": "212ce777-2bc5-4362-bd6c-170bce6e3415"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Printing the Confusion matrix\n",
|
|
"from sklearn.metrics import confusion_matrix\n",
|
|
"confusionMatrix = confusion_matrix(y_test, pred)\n",
|
|
"print(confusionMatrix)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 170
|
|
},
|
|
"colab_type": "code",
|
|
"id": "8VbpT7kjSpEs",
|
|
"outputId": "1e6b7d25-2feb-4014-8d01-13daa375d33e"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"from sklearn.metrics import classification_report\n",
|
|
"# Getting the Classification_report\n",
|
|
"print(classification_report(y_test, pred))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"colab_type": "text",
|
|
"id": "Mny5_VF2wKWi"
|
|
},
|
|
"source": [
|
|
"**Forward Selection Method**"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "nqy3pVu2wPRR"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"from sklearn.feature_selection import SelectKBest\n",
|
|
"\n",
|
|
"# feature extraction\n",
|
|
"feats = SelectKBest(k=300)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 34
|
|
},
|
|
"colab_type": "code",
|
|
"id": "-lG6SFbcwP--",
|
|
"outputId": "e7e3e70f-604b-4966-f056-b2fd94fb3200"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
" # Fitting the features for training set\n",
|
|
"import time\n",
|
|
"t0 = time.time()\n",
|
|
"fit = feats.fit(X_train, y_train)\n",
|
|
"t1 = time.time()\n",
|
|
"print(\"Forward selection fitting time:\", round(t1-t0, 3), \"s\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "wgidexHjwP-G"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Creating new training set and test sets \n",
|
|
"\n",
|
|
"features_train = fit.transform(X_train)\n",
|
|
"features_test = fit.transform(X_test)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 85
|
|
},
|
|
"colab_type": "code",
|
|
"id": "qKoEhuOWwP5N",
|
|
"outputId": "48bd98dc-a835-4ef0-e532-2dfc964936da"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Printing the shape of train and test sets before transformation\n",
|
|
"print('Train shape before transformation',X_train.shape)\n",
|
|
"print('Test shape before transformation',X_test.shape)\n",
|
|
"\n",
|
|
"# Printing the shape of train and test sets after transformation\n",
|
|
"print('Train shape after transformation',features_train.shape)\n",
|
|
"print('Test shape after transformation',features_test.shape)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 88
|
|
},
|
|
"colab_type": "code",
|
|
"id": "-eksV6tAwP4T",
|
|
"outputId": "005727d7-eea7-4f51-e170-5f69fa867461"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Fitting a Logistic Regression Model\n",
|
|
"from sklearn.linear_model import LogisticRegression\n",
|
|
"import time\n",
|
|
"\n",
|
|
"t0 = time.time()\n",
|
|
"\n",
|
|
"forwardModel = LogisticRegression()\n",
|
|
"forwardModel.fit(features_train, y_train)\n",
|
|
"\n",
|
|
"t1 = time.time()\n",
|
|
"print(\"Total training time:\", round(t1-t0, 3), \"s\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 34
|
|
},
|
|
"colab_type": "code",
|
|
"id": "7ei2GemzwP1F",
|
|
"outputId": "42e095c6-9808-4e8f-b59b-bdc81563339d"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Predicting with the forward model\n",
|
|
"pred = forwardModel.predict(features_test)\n",
|
|
"print('Accuracy of Logistic regression model prediction on test set: {:.2f}'.format(forwardModel.score(features_test, y_test)))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 51
|
|
},
|
|
"colab_type": "code",
|
|
"id": "Pp_9Z-hBwPyJ",
|
|
"outputId": "3f9983ce-d018-48ed-95eb-8dc3aed801d6"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Generating confusion matrix\n",
|
|
"from sklearn.metrics import confusion_matrix\n",
|
|
"\n",
|
|
"confusionMatrix = confusion_matrix(y_test, pred)\n",
|
|
"print(confusionMatrix)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 170
|
|
},
|
|
"colab_type": "code",
|
|
"id": "MpnmnsHnwPst",
|
|
"outputId": "f4f7eb3f-19be-44e4-b2be-d5e81d1668f0"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"from sklearn.metrics import classification_report\n",
|
|
"# Getting the Classification_report\n",
|
|
"print(classification_report(y_test, pred))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"colab_type": "text",
|
|
"id": "I19d1bzHxXHm"
|
|
},
|
|
"source": [
|
|
"**Principal Component Analysis**"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 34
|
|
},
|
|
"colab_type": "code",
|
|
"id": "--N4wlunwPrj",
|
|
"outputId": "63d63a58-fc15-4336-da76-c9dc0cdf7643"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"from sklearn.decomposition import PCA\n",
|
|
"import time\n",
|
|
"t0 = time.time()\n",
|
|
"pca = PCA(n_components=300)\n",
|
|
"# Fitting the PCA on the training set\n",
|
|
"pca.fit(X_train)\n",
|
|
"t1 = time.time()\n",
|
|
"print(\"PCA fitting time:\", round(t1-t0, 3), \"s\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "8iQFpNhZyEGm"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Transforming training set and test set\n",
|
|
"X_pca = pca.transform(X_train)\n",
|
|
"X_test_pca = pca.transform(X_test)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 85
|
|
},
|
|
"colab_type": "code",
|
|
"id": "h_KA0QE_yNAc",
|
|
"outputId": "e252de52-28c6-4a51-d52f-2c4cda7bc2b6"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"print(\"original shape of Training set: \", X_train.shape)\n",
|
|
"print(\"original shape of Test set: \", X_test.shape)\n",
|
|
"print(\"Transformed shape of training set:\", X_pca.shape)\n",
|
|
"print(\"Transformed shape of test set:\", X_test_pca.shape)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 88
|
|
},
|
|
"colab_type": "code",
|
|
"id": "ire6jJX7yPjM",
|
|
"outputId": "f602a430-9ff1-4041-aee4-6f5f8bfd0924"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"from sklearn.linear_model import LogisticRegression\n",
|
|
"import time\n",
|
|
"\n",
|
|
"pcaModel = LogisticRegression()\n",
|
|
"\n",
|
|
"t0 = time.time()\n",
|
|
"pcaModel.fit(X_pca, y_train)\n",
|
|
"t1 = time.time()\n",
|
|
"\n",
|
|
"print(\"Total training time:\", round(t1-t0, 3), \"s\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 34
|
|
},
|
|
"colab_type": "code",
|
|
"id": "qzoSCt3eybug",
|
|
"outputId": "c4724962-1f7b-4d26-d19a-17e91c084000"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Predicting with the pca model\n",
|
|
"pred = pcaModel.predict(X_test_pca)\n",
|
|
"print('Accuracy of Logistic regression model prediction on test set: {:.2f}'.format(pcaModel.score(X_test_pca, y_test)))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 214
|
|
},
|
|
"colab_type": "code",
|
|
"id": "3jKn81vTyq66",
|
|
"outputId": "d9927472-7699-495b-fbe5-0e0eee49715d"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Generating confusion matrix\n",
|
|
"from sklearn.metrics import confusion_matrix\n",
|
|
"\n",
|
|
"confusionMatrix = confusion_matrix(y_test, pred)\n",
|
|
"print(confusionMatrix)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 170
|
|
},
|
|
"colab_type": "code",
|
|
"id": "_Ji6L-Q5ytgU",
|
|
"outputId": "7c3addc0-faa4-4374-dd57-4e4c40a6031f"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"from sklearn.metrics import classification_report\n",
|
|
"# Getting the Classification_report\n",
|
|
"print(classification_report(y_test, pred))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"colab_type": "text",
|
|
"id": "rwBH-CB0y8nF"
|
|
},
|
|
"source": [
|
|
"**Independent Component Analysis**"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "meNHCSVkzFCz"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Defining the ICA with number of components\n",
|
|
"from sklearn.decomposition import FastICA \n",
|
|
"ICA = FastICA(n_components=300, random_state=123) "
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 34
|
|
},
|
|
"colab_type": "code",
|
|
"id": "FMP1cVMazSDy",
|
|
"outputId": "9bb60f07-25a2-4e3b-f36a-198ef89d781f"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Fitting the ICA method and transforming the training set and noting the time\n",
|
|
"import time\n",
|
|
"t0 = time.time()\n",
|
|
"X_ica=ICA.fit_transform(X_train)\n",
|
|
"t1 = time.time()\n",
|
|
"print(\"ICA fitting time:\", round(t1-t0, 3), \"s\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "LRbOB4nOzhGU"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Transfroming the test set \n",
|
|
"X_test_ica=ICA.transform(X_test)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 85
|
|
},
|
|
"colab_type": "code",
|
|
"id": "aw1POJKxzspB",
|
|
"outputId": "0ba4038e-1a5d-464f-9e19-04be4b45f395"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"print(\"original shape of Training set: \", X_train.shape)\n",
|
|
"print(\"original shape of Test set: \", X_test.shape)\n",
|
|
"print(\"Transformed shape of training set:\", X_ica.shape)\n",
|
|
"print(\"Transformed shape of test set:\", X_test_ica.shape)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 88
|
|
},
|
|
"colab_type": "code",
|
|
"id": "J7gsribsz2Dq",
|
|
"outputId": "7cf65053-42be-4c13-aa45-9083397af71c"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"from sklearn.linear_model import LogisticRegression\n",
|
|
"import time\n",
|
|
"\n",
|
|
"icaModel = LogisticRegression()\n",
|
|
"\n",
|
|
"t0 = time.time()\n",
|
|
"icaModel.fit(X_ica, y_train)\n",
|
|
"t1 = time.time()\n",
|
|
"\n",
|
|
"print(\"Total training time:\", round(t1-t0, 3), \"s\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 34
|
|
},
|
|
"colab_type": "code",
|
|
"id": "5EKY388k0Frc",
|
|
"outputId": "16a62c07-3fb3-41de-c4cc-cd5c573c6c6c"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Predicting with the ica model\n",
|
|
"pred = icaModel.predict(X_test_ica)\n",
|
|
"print('Accuracy of Logistic regression model prediction on test set: {:.2f}'.format(icaModel.score(X_test_ica, y_test)))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 51
|
|
},
|
|
"colab_type": "code",
|
|
"id": "ENKhyVBv0S9o",
|
|
"outputId": "88630bcc-9dd3-49ed-a6e1-a722a2c42f4a"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Generating confusion matrix\n",
|
|
"from sklearn.metrics import confusion_matrix\n",
|
|
"\n",
|
|
"confusionMatrix = confusion_matrix(y_test, pred)\n",
|
|
"print(confusionMatrix)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 224
|
|
},
|
|
"colab_type": "code",
|
|
"id": "UJFRJXe10XMN",
|
|
"outputId": "6c86f5d2-9421-485f-e1d8-8484d3f81c55"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"from sklearn.metrics import classification_report\n",
|
|
"# Getting the Classification_report\n",
|
|
"print(classification_report(y_test, pred))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"colab_type": "text",
|
|
"id": "B7Zuah4K0iup"
|
|
},
|
|
"source": [
|
|
"**Factor Analysis**"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "o2QE4uN90sNH"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Defining the number of factors\n",
|
|
"from sklearn.decomposition import FactorAnalysis\n",
|
|
"fa = FactorAnalysis(n_components = 30,random_state=123)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 34
|
|
},
|
|
"colab_type": "code",
|
|
"id": "2u6INrCr04aU",
|
|
"outputId": "573e5c1e-3698-49ca-ad09-00f064638b88"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Fitting the Factor analysis method and transforming the training set\n",
|
|
"import time\n",
|
|
"t0 = time.time()\n",
|
|
"X_fac=fa.fit_transform(X_train)\n",
|
|
"t1 = time.time()\n",
|
|
"print(\"Factor analysis fitting time:\", round(t1-t0, 3), \"s\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "_pey-VJM1DZL"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Transfroming the test set \n",
|
|
"X_test_fac=fa.transform(X_test)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 85
|
|
},
|
|
"colab_type": "code",
|
|
"id": "4ssq_gws1L12",
|
|
"outputId": "0b445092-aa30-4584-f356-f0f89f73ca02"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"print(\"original shape of Training set: \", X_train.shape)\n",
|
|
"print(\"original shape of Test set: \", X_test.shape)\n",
|
|
"print(\"Transformed shape of training set:\", X_fac.shape)\n",
|
|
"print(\"Transformed shape of test set:\", X_test_fac.shape)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 88
|
|
},
|
|
"colab_type": "code",
|
|
"id": "I5y56Qtc1UgQ",
|
|
"outputId": "2f93933e-27dd-472b-f535-ea7de58d2472"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"from sklearn.linear_model import LogisticRegression\n",
|
|
"import time\n",
|
|
"\n",
|
|
"facModel = LogisticRegression()\n",
|
|
"\n",
|
|
"t0 = time.time()\n",
|
|
"facModel.fit(X_fac, y_train)\n",
|
|
"t1 = time.time()\n",
|
|
"\n",
|
|
"print(\"Total training time:\", round(t1-t0, 3), \"s\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 34
|
|
},
|
|
"colab_type": "code",
|
|
"id": "YrE0HvR01g4k",
|
|
"outputId": "5c0fcad2-d599-4f9a-c208-a3e1612c5b9a"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Predicting with the factor analysis model\n",
|
|
"pred = facModel.predict(X_test_fac)\n",
|
|
"print('Accuracy of Logistic regression model prediction on test set: {:.2f}'.format(facModel.score(X_test_fac, y_test)))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 51
|
|
},
|
|
"colab_type": "code",
|
|
"id": "741BziZa1n9m",
|
|
"outputId": "38ff2afb-47e7-4fb3-8d01-9cc2ebd758a2"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Generating confusion matrix\n",
|
|
"from sklearn.metrics import confusion_matrix\n",
|
|
"\n",
|
|
"confusionMatrix = confusion_matrix(y_test, pred)\n",
|
|
"print(confusionMatrix)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 170
|
|
},
|
|
"colab_type": "code",
|
|
"id": "VFfwspdG1tuS",
|
|
"outputId": "0a206a37-1907-459a-b408-847303cc8005"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"from sklearn.metrics import classification_report\n",
|
|
"# Getting the Classification_report\n",
|
|
"print(classification_report(y_test, pred))"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"accelerator": "TPU",
|
|
"colab": {
|
|
"name": "Chapter 14: Activity 14.02 Comparison of different methods",
|
|
"provenance": []
|
|
},
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.8.6"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 1
|
|
}
|