mirror of
https://github.com/fenago/data-science.git
synced 2026-05-04 08:31:59 +00:00
369 lines
7.1 KiB
Plaintext
369 lines
7.1 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "lPeZ4zc4NZ3G"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"import pandas as pd\n",
|
|
"import altair as alt"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "aPjMg_BfNjk7"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"file_url = '../dataset/churn.csv'"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "6ynf1L9mOm0p"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df = pd.read_csv(file_url)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 34
|
|
},
|
|
"colab_type": "code",
|
|
"id": "5BH1588M5Xiu",
|
|
"outputId": "d9c77a45-9b3c-4f0e-b1e3-9e5ca93cf5cc"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df.shape"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 340
|
|
},
|
|
"colab_type": "code",
|
|
"id": "lGNVxXRa5ml7",
|
|
"outputId": "c3ab5acb-1c0e-430a-b373-f9c549565401"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df.dtypes"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 224
|
|
},
|
|
"colab_type": "code",
|
|
"id": "v9a1K2z45yj8",
|
|
"outputId": "4e12e44e-aae0-47a3-b045-c3e35338a974"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 224
|
|
},
|
|
"colab_type": "code",
|
|
"id": "j8HTP8ev6JfU",
|
|
"outputId": "adbb749c-35e4-414b-e873-2a1c57833089"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df.tail()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 224
|
|
},
|
|
"colab_type": "code",
|
|
"id": "jAe01B6c6ouX",
|
|
"outputId": "f464197a-49c6-422b-b1b0-18d5253d397d"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df.sample(n=5, random_state=8)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 317
|
|
},
|
|
"colab_type": "code",
|
|
"id": "_YRYPtRW7FrQ",
|
|
"outputId": "3c0f9d06-3892-49cf-f50a-c258434dc36c"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df.describe()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 68
|
|
},
|
|
"colab_type": "code",
|
|
"id": "Vu1Cj1Ad7lvn",
|
|
"outputId": "286dc11a-cb89-458b-a6ea-d834557b0599"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"for col_name in ['churn', 'internationalplan', 'voicemailplan']:\n",
|
|
" print(f\"{col_name}: {df[col_name].unique()}\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "lWJ8lXbX7m8u"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"chart1 = alt.Chart(df).mark_bar().encode(\n",
|
|
" alt.X(\"churn\"), \n",
|
|
" y='count()'\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "9HIBNaD79_X7"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"chart2 = alt.Chart(df).mark_bar().encode(\n",
|
|
" alt.X(\"internationalplan\"), \n",
|
|
" y='count()'\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "EXEXUCR8-I8v"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"chart3 = alt.Chart(df).mark_bar().encode(\n",
|
|
" alt.X(\"voicemailplan\"), \n",
|
|
" y='count()'\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 377
|
|
},
|
|
"colab_type": "code",
|
|
"id": "6QNfvoXD-UR0",
|
|
"outputId": "15358e7d-23c6-48d5-82b5-415ccc717762"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"chart1 | chart2 | chart3"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "HIIQZYLg_aIM"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"num_df = df.select_dtypes(include='number')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 119
|
|
},
|
|
"colab_type": "code",
|
|
"id": "YIfkkZpI_gyt",
|
|
"outputId": "5035d229-3e77-4945-8406-b43c44bb6b41"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"num_cols = num_df.columns\n",
|
|
"num_cols.values"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "AZ2zHyKjCFo-"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"charts = alt.Chart(df).mark_bar().encode(\n",
|
|
" alt.X(alt.repeat(\"column\"), type='quantitative', bin=True),\n",
|
|
" y='count()'\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 390
|
|
},
|
|
"colab_type": "code",
|
|
"id": "O74hCOKP-tBC",
|
|
"outputId": "d5ff5894-285e-41c0-c59c-fefd56a36a97"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"charts.repeat(\n",
|
|
" column=list(num_cols)\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 375
|
|
},
|
|
"colab_type": "code",
|
|
"id": "0AEba_79GRDO",
|
|
"outputId": "cfc83547-6f3a-42d4-c0cc-27f2dacbe186"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"alt.Chart(df).mark_boxplot().encode(\n",
|
|
" x='churn',\n",
|
|
" y='totaldaycalls'\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 375
|
|
},
|
|
"colab_type": "code",
|
|
"id": "Ui2womT0GGra",
|
|
"outputId": "f62243a9-7aa8-4e84-f016-9b3afc982763"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"alt.Chart(df).mark_boxplot().encode(\n",
|
|
" x='churn',\n",
|
|
" y='numbervmailmessages'\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"colab": {
|
|
"collapsed_sections": [],
|
|
"name": "Activity10_1.ipynb",
|
|
"provenance": []
|
|
},
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.8.6"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 1
|
|
}
|