mirror of
https://github.com/fenago/data-science.git
synced 2026-05-06 01:22:41 +00:00
282 lines
6.4 KiB
Plaintext
282 lines
6.4 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 1482,
|
|
"status": "ok",
|
|
"timestamp": 1596620773872,
|
|
"user": {
|
|
"displayName": "Mahesh Dhyani",
|
|
"photoUrl": "",
|
|
"userId": "02713575687182950579"
|
|
},
|
|
"user_tz": -330
|
|
},
|
|
"id": "jC-tIAMXIT_l"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"from sklearn import datasets\n",
|
|
"\n",
|
|
"# import data\n",
|
|
"digits = datasets.load_digits()\n",
|
|
"\n",
|
|
"# target\n",
|
|
"y = digits.target\n",
|
|
"\n",
|
|
"# features\n",
|
|
"X = digits.data"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 1767,
|
|
"status": "ok",
|
|
"timestamp": 1596620863966,
|
|
"user": {
|
|
"displayName": "Mahesh Dhyani",
|
|
"photoUrl": "",
|
|
"userId": "02713575687182950579"
|
|
},
|
|
"user_tz": -330
|
|
},
|
|
"id": "IyLRPz2EIWJ_"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"from sklearn import ensemble\n",
|
|
"\n",
|
|
"# an ensemble of 100 estimators\n",
|
|
"rfc = ensemble.RandomForestClassifier(n_estimators=100, random_state=100)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 35
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 1245,
|
|
"status": "ok",
|
|
"timestamp": 1596620863968,
|
|
"user": {
|
|
"displayName": "Mahesh Dhyani",
|
|
"photoUrl": "",
|
|
"userId": "02713575687182950579"
|
|
},
|
|
"user_tz": -330
|
|
},
|
|
"id": "Tr0ZLj2yJnM2",
|
|
"outputId": "fc20432b-d520-4462-c8b6-5b3cd950a17f"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# how many features do we have in our dataset?\n",
|
|
"n_features = X.shape[1]\n",
|
|
"\n",
|
|
"print(n_features)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 911,
|
|
"status": "ok",
|
|
"timestamp": 1596620864645,
|
|
"user": {
|
|
"displayName": "Mahesh Dhyani",
|
|
"photoUrl": "",
|
|
"userId": "02713575687182950579"
|
|
},
|
|
"user_tz": -330
|
|
},
|
|
"id": "vuJQ0pJuIvCL"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"from scipy import stats\n",
|
|
"import numpy as np\n",
|
|
"np.random.seed(100)\n",
|
|
"\n",
|
|
"# we would like to smaple from criterion and max_features as discrete uniform distributions\n",
|
|
"param_dist = {\n",
|
|
" 'criterion': ['gini', 'entropy'],\n",
|
|
" 'max_features': stats.randint(low=1, high=n_features)\n",
|
|
"}"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 1096,
|
|
"status": "ok",
|
|
"timestamp": 1596620866276,
|
|
"user": {
|
|
"displayName": "Mahesh Dhyani",
|
|
"photoUrl": "",
|
|
"userId": "02713575687182950579"
|
|
},
|
|
"user_tz": -330
|
|
},
|
|
"id": "3-vP3LMpKQ2S"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"from sklearn import model_selection\n",
|
|
"\n",
|
|
"# setting up the random search sampling 50 times and conducting 5-fold cross-validation\n",
|
|
"rscv = model_selection.RandomizedSearchCV(estimator=rfc, param_distributions=param_dist, n_iter=50, cv=5, scoring='accuracy', random_state=100)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 444
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 234185,
|
|
"status": "ok",
|
|
"timestamp": 1596621100375,
|
|
"user": {
|
|
"displayName": "Mahesh Dhyani",
|
|
"photoUrl": "",
|
|
"userId": "02713575687182950579"
|
|
},
|
|
"user_tz": -330
|
|
},
|
|
"id": "yOporT_hKaTd",
|
|
"outputId": "d34916b6-03da-4183-dd64-904177f2f0ff"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# start the process\n",
|
|
"rscv.fit(X,y)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 124
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 1908,
|
|
"status": "ok",
|
|
"timestamp": 1596621454897,
|
|
"user": {
|
|
"displayName": "Mahesh Dhyani",
|
|
"photoUrl": "",
|
|
"userId": "02713575687182950579"
|
|
},
|
|
"user_tz": -330
|
|
},
|
|
"id": "Hn06r3ktKkln",
|
|
"outputId": "2cdb8c61-2f87-4fb8-bac6-accbac0e5166"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"import pandas as pd\n",
|
|
"\n",
|
|
"# convert the dictionary of results to a pandas dataframe\n",
|
|
"results = pd.DataFrame(rscv.cv_results_)\n",
|
|
"\n",
|
|
"# removing duplication\n",
|
|
"distinct_results = results.loc[:,['params','mean_test_score']]\n",
|
|
"\n",
|
|
"# convert the params dictionaries to string data types\n",
|
|
"distinct_results.loc[:,'params'] = distinct_results.loc[:,'params'].astype('str')\n",
|
|
"\n",
|
|
"# remove duplicates\n",
|
|
"distinct_results.drop_duplicates(inplace=True)\n",
|
|
"\n",
|
|
"# look at the top 5 best hyperparamaterizations\n",
|
|
"print(distinct_results.sort_values('mean_test_score', ascending=False).head(5))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 283
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 1979,
|
|
"status": "ok",
|
|
"timestamp": 1596621460667,
|
|
"user": {
|
|
"displayName": "Mahesh Dhyani",
|
|
"photoUrl": "",
|
|
"userId": "02713575687182950579"
|
|
},
|
|
"user_tz": -330
|
|
},
|
|
"id": "zctH43A48Z0r",
|
|
"outputId": "9de9e641-e20b-460e-8336-3c9d528c8726"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# top performing models\n",
|
|
"distinct_results[distinct_results.mean_test_score > 0.93].sort_values('mean_test_score').plot.barh(x='params', xlim=(0.9))"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"colab": {
|
|
"collapsed_sections": [],
|
|
"name": "Excerise_8_3.ipynb",
|
|
"provenance": []
|
|
},
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.8.6"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 1
|
|
}
|