mirror of
https://github.com/fenago/data-science.git
synced 2026-05-06 01:22:41 +00:00
163 lines
3.8 KiB
Plaintext
163 lines
3.8 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "D7JgAqboXaYz"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"import pandas as pd"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "rV4rlbk_X_Cx"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"mushrooms = pd.read_csv('../Dataset/agaricus-lepiota.data', header=None)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "-nC04CscYImK"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"y_raw = mushrooms.iloc[:,0]\n",
|
|
"X_raw = mushrooms.iloc[:,1:]\n",
|
|
"y = (y_raw == 'p') * 1"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "_b7quE4PYLIZ"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"from sklearn import preprocessing\n",
|
|
"encoder = preprocessing.OneHotEncoder()\n",
|
|
"\n",
|
|
"encoder.fit(X_raw)\n",
|
|
"\n",
|
|
"X = encoder.transform(X_raw).toarray()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 889
|
|
},
|
|
"colab_type": "code",
|
|
"id": "gIdVSmXOYPuC",
|
|
"outputId": "f27f27ea-6128-417d-db22-e72a2f529c5e"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"#Initialize the classifier.\n",
|
|
"from sklearn import ensemble\n",
|
|
"rfc = ensemble.RandomForestClassifier(n_estimators=100, random_state=100)\n",
|
|
"\n",
|
|
"#Conduct a grid search.\n",
|
|
"from sklearn import model_selection\n",
|
|
"grid = {\n",
|
|
" 'criterion': ['gini', 'entropy'],\n",
|
|
" 'max_features': [2, 4, 6, 8, 10, 12, 14]\n",
|
|
"}\n",
|
|
"gscv = model_selection.GridSearchCV(estimator=rfc, param_grid=grid, cv=5, scoring='accuracy')\n",
|
|
"gscv.fit(X,y)\n",
|
|
"results = pd.DataFrame(gscv.cv_results_)\n",
|
|
"results.sort_values('rank_test_score', ascending=True).head(10)\n",
|
|
"\n",
|
|
"#Conduct a random search.\n",
|
|
"from scipy import stats\n",
|
|
"max_features = X.shape[1]\n",
|
|
"param_dist = {\n",
|
|
" 'criterion': ['gini', 'entropy'],\n",
|
|
" 'max_features': stats.randint(low=1, high=max_features)\n",
|
|
"}\n",
|
|
"rscv = model_selection.RandomizedSearchCV(estimator=rfc, param_distributions=param_dist, n_iter=50, cv=5, scoring='accuracy', random_state=100)\n",
|
|
"rscv.fit(X,y)\n",
|
|
"results = pd.DataFrame(rscv.cv_results_)\n",
|
|
"results.sort_values('rank_test_score', ascending=True).head(10)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 282
|
|
},
|
|
"colab_type": "code",
|
|
"id": "QVVAb9cjYfhh",
|
|
"outputId": "94a4bd34-512a-4813-e356-aaa03674b58b"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"results.loc[:,'params'] = results.loc[:,'params'].astype(str)\n",
|
|
"\n",
|
|
"(\n",
|
|
" results.sort_values('rank_test_score', ascending=False)\n",
|
|
" .loc[:,['params','mean_test_score']]\n",
|
|
" .drop_duplicates()\n",
|
|
" .head(10)\n",
|
|
" .plot.barh(x='params', xlim=(0.8))\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"colab": {
|
|
"name": "Activity8.01.ipynb",
|
|
"provenance": []
|
|
},
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.8.6"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 1
|
|
}
|