mirror of
https://github.com/fenago/data-science.git
synced 2026-05-04 16:41:05 +00:00
53 lines
1.7 KiB
Python
53 lines
1.7 KiB
Python
import unittest
|
|
import import_ipynb
|
|
import pandas as pd
|
|
import numpy as np
|
|
import pandas.testing as pd_testing
|
|
import numpy.testing as np_testing
|
|
from sklearn.cluster import KMeans
|
|
from sklearn.preprocessing import StandardScaler
|
|
|
|
|
|
class Test(unittest.TestCase):
|
|
def setUp(self):
|
|
import Activity5_1
|
|
self.exercises = Activity5_1
|
|
|
|
self.file_url = '../DataSet/german.data-numeric'
|
|
self.df = pd.read_csv(self.file_url, header=None, sep='\s\s+', prefix='X')
|
|
self.X = self.df[['X3', 'X9']]
|
|
self.standard_scaler = StandardScaler()
|
|
self.X_scaled = self.standard_scaler.fit_transform(self.X)
|
|
self.clusters = pd.DataFrame()
|
|
self.inertia = []
|
|
self.clusters['cluster_range'] = range(1, 15)
|
|
for k in self.clusters['cluster_range']:
|
|
self.kmeans = KMeans(n_clusters=k, random_state=8).fit(self.X_scaled)
|
|
self.inertia.append(self.kmeans.inertia_)
|
|
self.clusters['inertia'] = self.inertia
|
|
self.clusters_number = 5
|
|
self.kmeans = KMeans(random_state=1, n_clusters=self.clusters_number, init='k-means++', n_init=50, max_iter=1000)
|
|
self.kmeans.fit(self.X_scaled)
|
|
self.df['cluster'] = self.kmeans.predict(self.X_scaled)
|
|
|
|
def test_file_url(self):
|
|
self.assertEqual(self.exercises.file_url, self.file_url)
|
|
|
|
def test_df(self):
|
|
pd_testing.assert_frame_equal(self.exercises.df, self.df)
|
|
|
|
def test_X_scaled(self):
|
|
np_testing.assert_array_equal(self.exercises.X_scaled, self.X_scaled)
|
|
|
|
def test_clusters(self):
|
|
pd_testing.assert_frame_equal(self.exercises.clusters, self.clusters)
|
|
|
|
def test_inertia(self):
|
|
np_testing.assert_array_equal(self.exercises.inertia, self.inertia)
|
|
|
|
def test_clusters_number(self):
|
|
self.assertEqual(self.exercises.clusters_number, self.clusters_number)
|
|
|
|
if __name__ == '__main__':
|
|
unittest.main()
|