Files
fenago f3b24b4b7f added
2021-02-07 15:16:01 +05:00

70 lines
3.1 KiB
Python

import unittest
import import_ipynb
import pandas as pd
import numpy as np
import pandas.testing as pd_testing
import numpy.testing as np_testing
from sklearn.cluster import KMeans
class Test(unittest.TestCase):
def setUp(self):
import Activity12_1
self.exercises = Activity12_1
self.disp_url = '../Dataset/disp.csv'
self.trans_url = '../Dataset/trans.csv'
self.account_url = '../Dataset/account.csv'
self.client_url = '../Dataset/client.csv'
self.df_disp = pd.read_csv(self.disp_url, sep=';')
self.df_trans = pd.read_csv(self.trans_url, sep=';')
self.df_account = pd.read_csv(self.account_url, sep=';')
self.df_client = pd.read_csv(self.client_url, sep=';')
self.df_trans_acc = pd.merge(self.df_trans, self.df_account, how='left', on='account_id')
self.df_disp_owner = self.df_disp[self.df_disp['type'] == 'OWNER']
self.df_trans_acc_disp = pd.merge(self.df_trans_acc, self.df_disp_owner, how='left', on='account_id')
self.df_merged = pd.merge(self.df_trans_acc_disp, self.df_client, how='left', on=['client_id', 'district_id'])
self.df_merged.rename(columns={'date_x': 'trans_date', 'type_x': 'trans_type', 'date_y': 'account_creation',
'type_y': 'client_type'}, inplace=True)
self.df_merged['trans_date'] = pd.to_datetime(self.df_merged['trans_date'], format="%y%m%d")
self.df_merged['account_creation'] = pd.to_datetime(self.df_merged['account_creation'], format="%y%m%d")
self.df_merged['is_female'] = (self.df_merged['birth_number'] % 10000) / 5000 > 1
self.df_merged.loc[self.df_merged['is_female'] == True, 'birth_number'] -= 5000
self.df_merged['birth_number'] = self.df_merged['birth_number'].astype(str)
self.df_merged.loc[self.df_merged['birth_number'] == 'nan', 'birth_number'] = np.nan
self.df_merged.loc[~self.df_merged['birth_number'].isna(), 'birth_number'] = '19' + self.df_merged.loc[
~self.df_merged['birth_number'].isna(), 'birth_number']
self.df_merged['birth_number'] = pd.to_datetime(self.df_merged['birth_number'], format="%Y%m%d", errors='coerce')
self.df_merged['age_at_creation'] = self.df_merged['account_creation'] - self.df_merged['birth_number']
self.df_merged['age_at_creation'] = self.df_merged['age_at_creation'] / np.timedelta64(1, 'Y')
self.df_merged['age_at_creation'] = self.df_merged['age_at_creation'].round()
def test_disp_url(self):
self.assertEqual(self.exercises.disp_url, self.disp_url)
def test_client_url(self):
self.assertEqual(self.exercises.client_url, self.client_url)
def test_trans_url(self):
self.assertEqual(self.exercises.trans_url, self.trans_url)
def test_account_url(self):
self.assertEqual(self.exercises.account_url, self.account_url)
def test_df_trans_acc(self):
pd_testing.assert_frame_equal(self.exercises.df_trans_acc, self.df_trans_acc)
def test_df_disp_owner(self):
pd_testing.assert_frame_equal(self.exercises.df_disp_owner, self.df_disp_owner)
def test_df_trans_acc_disp(self):
pd_testing.assert_frame_equal(self.exercises.df_trans_acc_disp, self.df_trans_acc_disp)
def test_df_merged(self):
pd_testing.assert_frame_equal(self.exercises.df_merged, self.df_merged)
if __name__ == '__main__':
unittest.main()