mirror of
https://github.com/fenago/data-science.git
synced 2026-05-13 13:04:30 +00:00
792 lines
19 KiB
Plaintext
792 lines
19 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "HEiOAwQPW0qb"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"import pandas as pd"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "j7whidfaYjns"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"disp_url = '../Dataset/disp.csv'\n",
|
|
"trans_url = '../Dataset/trans.csv'\n",
|
|
"account_url = '../Dataset/account.csv'\n",
|
|
"client_url = '../Dataset/client.csv'"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 71
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 8712,
|
|
"status": "ok",
|
|
"timestamp": 1574650430652,
|
|
"user": {
|
|
"displayName": "Anthony So",
|
|
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
|
|
"userId": "11809607246124237079"
|
|
},
|
|
"user_tz": -660
|
|
},
|
|
"id": "nFMz2jNVt-xy",
|
|
"outputId": "95f4874b-575f-4b8d-ede4-734116c4a502"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df_disp = pd.read_csv(disp_url, sep=';')\n",
|
|
"df_trans = pd.read_csv(trans_url, sep=';')\n",
|
|
"df_account = pd.read_csv(account_url, sep=';')\n",
|
|
"df_client = pd.read_csv(client_url, sep=';')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 204
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 8706,
|
|
"status": "ok",
|
|
"timestamp": 1574650430653,
|
|
"user": {
|
|
"displayName": "Anthony So",
|
|
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
|
|
"userId": "11809607246124237079"
|
|
},
|
|
"user_tz": -660
|
|
},
|
|
"id": "xAqQ7rpV0nkT",
|
|
"outputId": "d059dc0b-ace2-467e-c3cd-8361faf44170"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df_trans.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 34
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 8701,
|
|
"status": "ok",
|
|
"timestamp": 1574650430654,
|
|
"user": {
|
|
"displayName": "Anthony So",
|
|
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
|
|
"userId": "11809607246124237079"
|
|
},
|
|
"user_tz": -660
|
|
},
|
|
"id": "zybRLGrd2bPf",
|
|
"outputId": "8e1da605-af26-46e1-9bf1-6e5b9a337857"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df_trans.shape"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 204
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 8696,
|
|
"status": "ok",
|
|
"timestamp": 1574650430655,
|
|
"user": {
|
|
"displayName": "Anthony So",
|
|
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
|
|
"userId": "11809607246124237079"
|
|
},
|
|
"user_tz": -660
|
|
},
|
|
"id": "IvvSvYl42Koi",
|
|
"outputId": "c90574b1-5620-4d6b-e13b-ce8ba5ae66f0"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df_account.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "Fk2bTvEp17Bp"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df_trans_acc = pd.merge(df_trans, df_account, how='left', on='account_id')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 34
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 9099,
|
|
"status": "ok",
|
|
"timestamp": 1574650431066,
|
|
"user": {
|
|
"displayName": "Anthony So",
|
|
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
|
|
"userId": "11809607246124237079"
|
|
},
|
|
"user_tz": -660
|
|
},
|
|
"id": "9BN6zHeL2lOx",
|
|
"outputId": "357d0b26-daed-429e-dd5f-7f9c0e5a4a27"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df_trans_acc.shape"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 204
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 9095,
|
|
"status": "ok",
|
|
"timestamp": 1574650431067,
|
|
"user": {
|
|
"displayName": "Anthony So",
|
|
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
|
|
"userId": "11809607246124237079"
|
|
},
|
|
"user_tz": -660
|
|
},
|
|
"id": "98f8YCkO2q41",
|
|
"outputId": "213b7737-8471-49f8-f25a-7aa78acac4f8"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df_disp.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "ap--Zddi3KYG"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df_disp_owner = df_disp[df_disp['type'] == 'OWNER']"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 34
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 9088,
|
|
"status": "ok",
|
|
"timestamp": 1574650431068,
|
|
"user": {
|
|
"displayName": "Anthony So",
|
|
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
|
|
"userId": "11809607246124237079"
|
|
},
|
|
"user_tz": -660
|
|
},
|
|
"id": "8rvJpD6V3VWU",
|
|
"outputId": "e950a8f3-2d85-4c82-b9b6-d62b6df6cc2d"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df_disp_owner.duplicated(subset='account_id').sum()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 34
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 9081,
|
|
"status": "ok",
|
|
"timestamp": 1574650431069,
|
|
"user": {
|
|
"displayName": "Anthony So",
|
|
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
|
|
"userId": "11809607246124237079"
|
|
},
|
|
"user_tz": -660
|
|
},
|
|
"id": "aeshCYj63Ezw",
|
|
"outputId": "27de9e85-0643-4997-e318-f3d6bd7b3dd6"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df_trans_acc_disp = pd.merge(df_trans_acc, df_disp_owner, how='left', on='account_id')\n",
|
|
"df_trans_acc_disp.shape"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 204
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 9076,
|
|
"status": "ok",
|
|
"timestamp": 1574650431070,
|
|
"user": {
|
|
"displayName": "Anthony So",
|
|
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
|
|
"userId": "11809607246124237079"
|
|
},
|
|
"user_tz": -660
|
|
},
|
|
"id": "J-_jUOwP3GGJ",
|
|
"outputId": "3b19d650-c852-41d9-fa99-7123158a34ce"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df_client.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 34
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 9440,
|
|
"status": "ok",
|
|
"timestamp": 1574650431439,
|
|
"user": {
|
|
"displayName": "Anthony So",
|
|
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
|
|
"userId": "11809607246124237079"
|
|
},
|
|
"user_tz": -660
|
|
},
|
|
"id": "35JidayO4C5X",
|
|
"outputId": "da451ba8-56fd-4d66-f147-5d3c8f817991"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df_merged = pd.merge(df_trans_acc_disp, df_client, how='left', on=['client_id', 'district_id'])\n",
|
|
"df_merged.shape"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 85
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 9436,
|
|
"status": "ok",
|
|
"timestamp": 1574650431440,
|
|
"user": {
|
|
"displayName": "Anthony So",
|
|
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
|
|
"userId": "11809607246124237079"
|
|
},
|
|
"user_tz": -660
|
|
},
|
|
"id": "8wGMCNV547RR",
|
|
"outputId": "1069eb5a-7f28-47b4-8cf5-aa03fee7eac9"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df_merged.columns"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "2oyeYQnB5ODm"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df_merged.rename(columns={'date_x': 'trans_date', 'type_x': 'trans_type', 'date_y':'account_creation', 'type_y':'client_type'}, inplace=True)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 309
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 9997,
|
|
"status": "ok",
|
|
"timestamp": 1574650432011,
|
|
"user": {
|
|
"displayName": "Anthony So",
|
|
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
|
|
"userId": "11809607246124237079"
|
|
},
|
|
"user_tz": -660
|
|
},
|
|
"id": "tiPA68we6G-v",
|
|
"outputId": "be267f16-4953-459f-a9d9-2139b91f4f31"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df_merged.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 323
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 9993,
|
|
"status": "ok",
|
|
"timestamp": 1574650432012,
|
|
"user": {
|
|
"displayName": "Anthony So",
|
|
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
|
|
"userId": "11809607246124237079"
|
|
},
|
|
"user_tz": -660
|
|
},
|
|
"id": "GV_1Jh4H6Qy4",
|
|
"outputId": "e0a7024c-8bc7-429c-a4c8-c012c21347fc"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df_merged.dtypes"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "tLCnpFRU5lna"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df_merged['trans_date'] = pd.to_datetime(df_merged['trans_date'], format=\"%y%m%d\")\n",
|
|
"df_merged['account_creation'] = pd.to_datetime(df_merged['account_creation'], format=\"%y%m%d\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 323
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 9986,
|
|
"status": "ok",
|
|
"timestamp": 1574650432014,
|
|
"user": {
|
|
"displayName": "Anthony So",
|
|
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
|
|
"userId": "11809607246124237079"
|
|
},
|
|
"user_tz": -660
|
|
},
|
|
"id": "6f791uGS6P25",
|
|
"outputId": "37cec372-1702-40de-8981-c1320a696ff1"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df_merged.dtypes"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "p3Z3dB8h7lXd"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df_merged['is_female'] = (df_merged['birth_number'] % 10000) / 5000 > 1"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 119
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 9980,
|
|
"status": "ok",
|
|
"timestamp": 1574650432015,
|
|
"user": {
|
|
"displayName": "Anthony So",
|
|
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
|
|
"userId": "11809607246124237079"
|
|
},
|
|
"user_tz": -660
|
|
},
|
|
"id": "4_Ml3J8f7PI_",
|
|
"outputId": "68f53d4a-618f-4c25-b9f7-9ded0650191a"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df_merged['birth_number'].head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "X4yKolNl8ys8"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df_merged.loc[df_merged['is_female'] == True, 'birth_number'] -= 5000"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 119
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 9973,
|
|
"status": "ok",
|
|
"timestamp": 1574650432016,
|
|
"user": {
|
|
"displayName": "Anthony So",
|
|
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
|
|
"userId": "11809607246124237079"
|
|
},
|
|
"user_tz": -660
|
|
},
|
|
"id": "HxWTe7zS91aB",
|
|
"outputId": "4260e7bd-1b96-49d6-a392-a7efa8ac9e3d"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df_merged['birth_number'].head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 221
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 9967,
|
|
"status": "ok",
|
|
"timestamp": 1574650432017,
|
|
"user": {
|
|
"displayName": "Anthony So",
|
|
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
|
|
"userId": "11809607246124237079"
|
|
},
|
|
"user_tz": -660
|
|
},
|
|
"id": "A8cKsZf86v9u",
|
|
"outputId": "75087216-14a2-40fa-966d-960ffeb45acb"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"pd.to_datetime(df_merged['birth_number'], format=\"%y%m%d\", errors='coerce')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 119
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 10897,
|
|
"status": "ok",
|
|
"timestamp": 1574650432952,
|
|
"user": {
|
|
"displayName": "Anthony So",
|
|
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
|
|
"userId": "11809607246124237079"
|
|
},
|
|
"user_tz": -660
|
|
},
|
|
"id": "F6YkBC4eClRA",
|
|
"outputId": "f676d8f3-f03a-4bbf-f93a-69e2b032c225"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df_merged['birth_number'] = df_merged['birth_number'].astype(str)\n",
|
|
"df_merged['birth_number'].head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 119
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 10893,
|
|
"status": "ok",
|
|
"timestamp": 1574650432953,
|
|
"user": {
|
|
"displayName": "Anthony So",
|
|
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
|
|
"userId": "11809607246124237079"
|
|
},
|
|
"user_tz": -660
|
|
},
|
|
"id": "RkEYHyd4Dtz7",
|
|
"outputId": "5cba124b-35fc-4a9c-f790-a30ff44a6cb0"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"import numpy as np\n",
|
|
"df_merged.loc[df_merged['birth_number'] == 'nan', 'birth_number'] = np.nan\n",
|
|
"df_merged['birth_number'].head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 119
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 11346,
|
|
"status": "ok",
|
|
"timestamp": 1574650433411,
|
|
"user": {
|
|
"displayName": "Anthony So",
|
|
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
|
|
"userId": "11809607246124237079"
|
|
},
|
|
"user_tz": -660
|
|
},
|
|
"id": "y2LZf7Q3CIU7",
|
|
"outputId": "1e4fa5c2-3a93-4015-a27f-067466bdc28e"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df_merged.loc[~df_merged['birth_number'].isna(), 'birth_number'] = '19' + df_merged.loc[~df_merged['birth_number'].isna(), 'birth_number']\n",
|
|
"df_merged['birth_number'].head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 374
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 11341,
|
|
"status": "ok",
|
|
"timestamp": 1574650433411,
|
|
"user": {
|
|
"displayName": "Anthony So",
|
|
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
|
|
"userId": "11809607246124237079"
|
|
},
|
|
"user_tz": -660
|
|
},
|
|
"id": "14FMFck_D5pE",
|
|
"outputId": "64ade2cc-2cea-43eb-c7eb-37931a93c001"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df_merged['birth_number'] = pd.to_datetime(df_merged['birth_number'], format=\"%Y%m%d\", errors='coerce')\n",
|
|
"df_merged['birth_number'].head(20)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "UnOMNI5w5-jD"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df_merged['age_at_creation'] = df_merged['account_creation'] - df_merged['birth_number']"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "DFt1MP-6E7cI"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df_merged['age_at_creation'] = df_merged['age_at_creation'] / np.timedelta64(1,'Y')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 309
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 11751,
|
|
"status": "ok",
|
|
"timestamp": 1574650433833,
|
|
"user": {
|
|
"displayName": "Anthony So",
|
|
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
|
|
"userId": "11809607246124237079"
|
|
},
|
|
"user_tz": -660
|
|
},
|
|
"id": "a_c3aRiiFzga",
|
|
"outputId": "13446355-b3af-478f-a143-931f06f91617"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df_merged['age_at_creation'] = df_merged['age_at_creation'].round()\n",
|
|
"df_merged.head()"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"colab": {
|
|
"collapsed_sections": [],
|
|
"name": "Activity12_1.ipynb",
|
|
"provenance": []
|
|
},
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.8.6"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 1
|
|
}
|