Files
fenago f3b24b4b7f added
2021-02-07 15:16:01 +05:00

139 lines
7.4 KiB
Python

import unittest
import import_ipynb
import pandas as pd
import pandas.testing as pd_testing
import numpy.testing as np_testing
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
class Test(unittest.TestCase):
def setUp(self):
import Activity4_1
self.exercises = Activity4_1
self.file_url = '../Dataset/test/phpB0xrNj.csv'
self.df = pd.read_csv(self.file_url)
self.y = self.df.pop('class')
self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(self.df, self.y, test_size=0.3, random_state=888)
self.rf_1 = self.train_rf(self.X_train, self.y_train)
self.trn_preds, self.tst_preds = self.get_preds(self.rf_1, self.X_train, self.X_test)
self.trn_acc, self.tst_preds = self.print_accuracy(self.y_train, self.y_test, self.trn_preds, self.tst_preds)
self.rf_model_1, self.trn_preds_1, self.tst_preds_1, self.trn_acc_1, self.tst_acc_1 = self.fit_predict_rf(self.X_train, self.X_test, self.y_train, self.y_test,random_state=888, n_estimators=20,max_depth=None, min_samples_leaf=1,max_features='sqrt')
self.rf_model_2, self.trn_preds_2, self.tst_preds_2, self.trn_acc_2, self.tst_acc_2 = self.fit_predict_rf(self.X_train, self.X_test, self.y_train, self.y_test,random_state=888, n_estimators=50,max_depth=None, min_samples_leaf=1,max_features='sqrt')
self.rf_model_3, self.trn_preds_3, self.tst_preds_3, self.trn_acc_3, self.tst_acc_3 = self.fit_predict_rf(self.X_train, self.X_test, self.y_train, self.y_test,random_state=888, n_estimators=50,max_depth=5, min_samples_leaf=1,max_features='sqrt')
self.rf_model_4, self.trn_preds_4, self.tst_preds_4, self.trn_acc_4, self.tst_acc_4 = self.fit_predict_rf(self.X_train, self.X_test, self.y_train, self.y_test,random_state=888, n_estimators=50,max_depth=10, min_samples_leaf=1,max_features='sqrt')
self.rf_model_5, self.trn_preds_5, self.tst_preds_5, self.trn_acc_5, self.tst_acc_5 = self.fit_predict_rf(self.X_train, self.X_test, self.y_train, self.y_test,random_state=888, n_estimators=50,max_depth=10, min_samples_leaf=10,max_features='sqrt')
self.rf_model_6, self.trn_preds_6, self.tst_preds_6, self.trn_acc_6, self.tst_acc_6 = self.fit_predict_rf(self.X_train, self.X_test, self.y_train, self.y_test,random_state=888, n_estimators=50,max_depth=10, min_samples_leaf=50,max_features='sqrt')
self.rf_model_7, self.trn_preds_7, self.tst_preds_7, self.trn_acc_7, self.tst_acc_7 = self.fit_predict_rf(self.X_train, self.X_test, self.y_train, self.y_test,random_state=888, n_estimators=50,max_depth=10, min_samples_leaf=50,max_features=0.5)
self.rf_model_8, self.trn_preds_8, self.tst_preds_8, self.trn_acc_8, self.tst_acc_8 = self.fit_predict_rf(self.X_train, self.X_test, self.y_train, self.y_test,random_state=888, n_estimators=50,max_depth=10, min_samples_leaf=50,max_features=0.3)
def train_rf(self, X_train, y_train, random_state=888, n_estimators=10, max_depth=None, min_samples_leaf=1,
max_features='sqrt'):
rf_model = RandomForestClassifier(random_state=random_state, n_estimators=n_estimators, max_depth=max_depth,
min_samples_leaf=min_samples_leaf, max_features=max_features)
rf_model.fit(X_train, y_train)
return rf_model
def get_preds(self, rf_model, X_train, X_test):
train_preds = rf_model.predict(X_train)
test_preds = rf_model.predict(X_test)
return train_preds, test_preds
def print_accuracy(self, y_train, y_test, train_preds, test_preds):
train_acc = accuracy_score(y_train, train_preds)
test_acc = accuracy_score(y_test, test_preds)
return train_acc, test_acc
def fit_predict_rf(self, X_train, X_test, y_train, y_test, random_state=888, n_estimators=10, max_depth=None,
min_samples_leaf=1, max_features='sqrt'):
rf_model = self.train_rf(X_train, y_train, random_state=random_state, n_estimators=n_estimators, max_depth=max_depth,
min_samples_leaf=min_samples_leaf, max_features=max_features)
train_preds, test_preds = self.get_preds(rf_model, X_train, X_test)
train_acc, test_acc = self.print_accuracy(y_train, y_test, train_preds, test_preds)
return rf_model, train_preds, test_preds, train_acc, test_acc
def test_file_url(self):
self.assertEqual(self.exercises.file_url, self.file_url)
def test_df(self):
pd_testing.assert_frame_equal(self.exercises.df, self.df)
def test_y(self):
np_testing.assert_array_equal(self.exercises.y , self.y)
def test_X_train(self):
np_testing.assert_array_equal(self.exercises.X_train , self.X_train)
def test_X_test(self):
np_testing.assert_array_equal(self.exercises.X_test, self.X_test)
def test_y_train(self):
np_testing.assert_array_equal(self.exercises.y_train , self.y_train)
def test_y_test(self):
np_testing.assert_array_equal(self.exercises.y_test , self.y_test)
def test_rf_model(self):
np_testing.assert_array_equal(self.exercises.trn_preds, self.trn_preds)
np_testing.assert_array_equal(self.exercises.tst_preds, self.tst_preds)
self.assertEqual(self.exercises.trn_acc, self.trn_acc)
self.assertEqual(self.exercises.tst_preds, self.tst_preds)
def test_rf_model_1(self):
np_testing.assert_array_equal(self.exercises.trn_preds_1, self.trn_preds_1)
np_testing.assert_array_equal(self.exercises.tst_preds_1, self.tst_preds_1)
self.assertEqual(self.exercises.trn_acc_1, self.trn_acc_1)
self.assertEqual(self.exercises.tst_acc_1, self.tst_acc_1)
def test_rf_model_2(self):
np_testing.assert_array_equal(self.exercises.trn_preds_2, self.trn_preds_2)
np_testing.assert_array_equal(self.exercises.tst_preds_2, self.tst_preds_2)
self.assertEqual(self.exercises.trn_acc_2, self.trn_acc_2)
self.assertEqual(self.exercises.tst_acc_2, self.tst_acc_2)
def test_rf_model_3(self):
np_testing.assert_array_equal(self.exercises.trn_preds_3, self.trn_preds_3)
np_testing.assert_array_equal(self.exercises.tst_preds_3, self.tst_preds_3)
self.assertEqual(self.exercises.trn_acc_3, self.trn_acc_3)
self.assertEqual(self.exercises.tst_acc_3, self.tst_acc_3)
def test_rf_model_4(self):
np_testing.assert_array_equal(self.exercises.trn_preds_4, self.trn_preds_4)
np_testing.assert_array_equal(self.exercises.tst_preds_4, self.tst_preds_4)
self.assertEqual(self.exercises.trn_acc_4, self.trn_acc_4)
self.assertEqual(self.exercises.tst_acc_4, self.tst_acc_4)
def test_rf_model_5(self):
np_testing.assert_array_equal(self.exercises.trn_preds_5, self.trn_preds_5)
np_testing.assert_array_equal(self.exercises.tst_preds_5, self.tst_preds_5)
self.assertEqual(self.exercises.trn_acc_5, self.trn_acc_5)
self.assertEqual(self.exercises.tst_acc_5, self.tst_acc_5)
def test_rf_model_6(self):
np_testing.assert_array_equal(self.exercises.trn_preds_6, self.trn_preds_6)
np_testing.assert_array_equal(self.exercises.tst_preds_6, self.tst_preds_6)
self.assertEqual(self.exercises.trn_acc_6, self.trn_acc_6)
self.assertEqual(self.exercises.tst_acc_6, self.tst_acc_6)
def test_rf_model_7(self):
np_testing.assert_array_equal(self.exercises.trn_preds_7, self.trn_preds_7)
np_testing.assert_array_equal(self.exercises.tst_preds_7, self.tst_preds_7)
self.assertEqual(self.exercises.trn_acc_7, self.trn_acc_7)
self.assertEqual(self.exercises.tst_acc_7, self.tst_acc_7)
def test_rf_model_8(self):
np_testing.assert_array_equal(self.exercises.trn_preds_8, self.trn_preds_8)
np_testing.assert_array_equal(self.exercises.tst_preds_8, self.tst_preds_8)
self.assertEqual(self.exercises.trn_acc_8, self.trn_acc_8)
self.assertEqual(self.exercises.tst_acc_8, self.tst_acc_8)
if __name__ == '__main__':
unittest.main()