Files
mlessentials/Lab15/Exercise15.07/Exercise_15_07_Ensemble_learning_Stacking.ipynb
Your Name 54ccb1423f added
2021-02-08 11:17:02 +00:00

336 lines
7.6 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "ktFt6IzKJkSn"
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import warnings\n",
"warnings.filterwarnings(\"ignore\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "_JoUdRaiIwo4"
},
"outputs": [],
"source": [
"#Loading data from the Github repository to colab notebook\n",
"filename = '../Dataset/crx.data'\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 204
},
"colab_type": "code",
"id": "0ZmzTR-CJra-",
"outputId": "d3930e63-2ba8-4ed5-b587-cd7b4dc209e7"
},
"outputs": [],
"source": [
"# Loading the data using pandas\n",
"\n",
"credData = pd.read_csv(filename,sep=\",\",header = None,na_values = \"?\")\n",
"credData.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 204
},
"colab_type": "code",
"id": "rXYA47JRKVz-",
"outputId": "09439927-ebd1-4a6f-adb4-de3a7cb2a55d"
},
"outputs": [],
"source": [
"# Changing the Classes to 1 & 0\n",
"credData.loc[credData[15] == '+' , 15] = 1\n",
"credData.loc[credData[15] == '-' , 15] = 0\n",
"credData.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"colab_type": "code",
"id": "R9-NFhigmokr",
"outputId": "502dc65e-de17-423a-b2af-7778fd4e76ba"
},
"outputs": [],
"source": [
"# Dropping all the rows with na values\n",
"newcred = credData.dropna(axis = 0)\n",
"newcred.shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "vxzQZpXMZZN6"
},
"outputs": [],
"source": [
"# Seperating the categorical variables to make dummy variables\n",
"\n",
"credCat = pd.get_dummies(newcred[[0,3,4,5,6,8,9,11,12]])\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "__Gup8InbTmf"
},
"outputs": [],
"source": [
"# Seperating the numerical variables\n",
"\n",
"credNum = newcred[[1,2,7,10,13,14]]\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 51
},
"colab_type": "code",
"id": "5S_Icyu1r8YJ",
"outputId": "4ae45242-019c-46f7-e79f-37451cea8ad1"
},
"outputs": [],
"source": [
"# Making the X variable which is a concatenation of categorical and numerical data\n",
"\n",
"X = pd.concat([credCat,credNum],axis = 1)\n",
"print(X.shape)\n",
"\n",
"# Seperating the label as y variable\n",
"y = pd.Series(newcred[15], dtype=\"int\")\n",
"print(y.shape)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 224
},
"colab_type": "code",
"id": "ZflX7J-5GtY_",
"outputId": "235118e8-e90c-4b51-c073-ba12614510f9"
},
"outputs": [],
"source": [
"# Normalising the data sets\n",
"# Import library function\n",
"from sklearn import preprocessing\n",
"# Creating the scaling function\n",
"minmaxScaler = preprocessing.MinMaxScaler()\n",
"# Transforming with the scaler function\n",
"X_tran = pd.DataFrame(minmaxScaler.fit_transform(X))\n",
"# Printing the output\n",
"X_tran.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "iJiHJ6zWJ9y_"
},
"outputs": [],
"source": [
"# Splitting the data set to train and test sets\n",
"from sklearn.model_selection import train_test_split\n",
"\n",
"# Splitting the data into train and test sets\n",
"X_train, X_test, y_train, y_test = train_test_split(X_tran, y, test_size=0.3, random_state=123)\n",
"\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "n4I7ujXBxnNj"
},
"source": [
"**Stacking**"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "TsNS-IAN3urW"
},
"outputs": [],
"source": [
"# Importing the meta learner and base learners\n",
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn.neighbors import KNeighborsClassifier\n",
"from sklearn.ensemble import RandomForestClassifier\n",
"\n",
"bl1 = KNeighborsClassifier(n_neighbors=5)\n",
"bl2 = RandomForestClassifier(random_state=123)\n",
"ml = LogisticRegression(random_state=123)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 71
},
"colab_type": "code",
"id": "Z6BvZ7Jzxmph",
"outputId": "22028791-1461-406f-9e4d-8fb820e19e11"
},
"outputs": [],
"source": [
"# Creating the stacking classifier\n",
"from mlxtend.classifier import StackingClassifier\n",
"stackclf = StackingClassifier(classifiers=[bl1, bl2], \n",
" meta_classifier=ml)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 105
},
"colab_type": "code",
"id": "LVq5ljcdx4R4",
"outputId": "b5a6170f-a284-411b-ddbf-bf8ab163f974"
},
"outputs": [],
"source": [
"# Fitting the model on the training set\n",
"model = stackclf.fit(X_train, y_train)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "R9kr6DBeyKuX"
},
"outputs": [],
"source": [
"# Generating predictions on test set\n",
"pred = model.predict(X_test)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 170
},
"colab_type": "code",
"id": "BMFA_SivyVRv",
"outputId": "68f9115b-28b0-4d9a-bf35-fc72df2e6dda"
},
"outputs": [],
"source": [
"# Printing the classification report\n",
"from sklearn.metrics import classification_report\n",
"print(classification_report(y_test, pred))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 51
},
"colab_type": "code",
"id": "zTwx5_3GyeoR",
"outputId": "da4d0c06-b93c-4509-9fdd-3a20fea54323"
},
"outputs": [],
"source": [
"# Printing the confusion matrix\n",
"from sklearn.metrics import confusion_matrix\n",
"print(confusion_matrix(y_test, pred))"
]
}
],
"metadata": {
"colab": {
"name": "Exercise 15.07 : Ensemble learning - Stacking",
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.6"
}
},
"nbformat": 4,
"nbformat_minor": 1
}