mirror of
https://github.com/fenago/data-science.git
synced 2026-05-04 16:41:05 +00:00
87 lines
3.1 KiB
Python
87 lines
3.1 KiB
Python
import unittest
|
|
import import_ipynb
|
|
import pandas as pd
|
|
import pandas.testing as pd_testing
|
|
import numpy.testing as np_testing
|
|
|
|
import numpy as np
|
|
from sklearn.model_selection import train_test_split
|
|
import statsmodels.formula.api as smf
|
|
|
|
|
|
class Test(unittest.TestCase):
|
|
def setUp(self):
|
|
import Activity2_02
|
|
self.exercises = Activity2_02
|
|
|
|
self.rawBostonData = pd.read_csv('../Dataset/Boston.csv')
|
|
self.rawBostonData = self.rawBostonData.dropna()
|
|
self.rawBostonData = self.rawBostonData.drop_duplicates()
|
|
self.renamedBostonData = self.rawBostonData.rename(columns={'CRIM': 'crimeRatePerCapita',
|
|
' ZN ': 'landOver25K_sqft',
|
|
'INDUS ': 'non-retailLandProptn',
|
|
'CHAS': 'riverDummy',
|
|
'NOX': 'nitrixOxide_pp10m',
|
|
'RM': 'AvgNo.RoomsPerDwelling',
|
|
'AGE': 'ProptnOwnerOccupied',
|
|
'DIS': 'weightedDist',
|
|
'RAD': 'radialHighwaysAccess',
|
|
'TAX': 'propTaxRate_per10K',
|
|
'PTRATIO': 'pupilTeacherRatio',
|
|
'LSTAT': 'pctLowerStatus',
|
|
'MEDV': 'medianValue_Ks'})
|
|
self.X = self.renamedBostonData.drop('crimeRatePerCapita', axis=1)
|
|
self.y = self.renamedBostonData[['crimeRatePerCapita']]
|
|
self.seed = 10
|
|
self.test_data_size = 0.3
|
|
self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(self.X, self.y, test_size=self.test_data_size, random_state=self.seed)
|
|
self.train_data = pd.concat([self.X_train, self.y_train], axis=1)
|
|
self.test_data = pd.concat([self.X_test, self.y_test], axis=1)
|
|
|
|
self.multiLogLinMod = smf.ols(
|
|
formula='np.log(crimeRatePerCapita) ~ (pctLowerStatus + radialHighwaysAccess + medianValue_Ks + nitrixOxide_pp10m)**2', \
|
|
data=self.train_data)
|
|
self.multiLogLinModResult = self.multiLogLinMod.fit()
|
|
|
|
def test_rawBostonData(self):
|
|
pd_testing.assert_frame_equal(self.exercises.rawBostonData, self.rawBostonData)
|
|
|
|
def test_renamedBostonData(self):
|
|
pd_testing.assert_frame_equal(self.exercises.renamedBostonData, self.renamedBostonData)
|
|
|
|
def test_X(self):
|
|
np_testing.assert_array_equal(self.exercises.X , self.X)
|
|
|
|
def test_y(self):
|
|
np_testing.assert_array_equal(self.exercises.y , self.y)
|
|
|
|
def test_seed(self):
|
|
self.assertEqual(self.exercises.seed, self.seed)
|
|
|
|
def test_test_data_size(self):
|
|
self.assertEqual(self.exercises.test_data_size, self.test_data_size)
|
|
|
|
def test_X_train(self):
|
|
np_testing.assert_array_equal(self.exercises.X_train , self.X_train)
|
|
|
|
def test_X_test(self):
|
|
np_testing.assert_array_equal(self.exercises.X_test, self.X_test)
|
|
|
|
def test_y_train(self):
|
|
np_testing.assert_array_equal(self.exercises.y_train , self.y_train)
|
|
|
|
def test_y_test(self):
|
|
np_testing.assert_array_equal(self.exercises.y_test , self.y_test)
|
|
|
|
def test_train_data(self):
|
|
pd_testing.assert_frame_equal(self.exercises.train_data, self.train_data)
|
|
|
|
def test_test_data(self):
|
|
pd_testing.assert_frame_equal(self.exercises.test_data, self.test_data)
|
|
|
|
def test_params(self):
|
|
np_testing.assert_array_equal(self.exercises.multiLogLinModResult.params, self.multiLogLinModResult.params)
|
|
|
|
if __name__ == '__main__':
|
|
unittest.main()
|