Files
fenago f3b24b4b7f added
2021-02-07 15:16:01 +05:00

163 lines
3.8 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "D7JgAqboXaYz"
},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "rV4rlbk_X_Cx"
},
"outputs": [],
"source": [
"mushrooms = pd.read_csv('../Dataset/agaricus-lepiota.data', header=None)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "-nC04CscYImK"
},
"outputs": [],
"source": [
"y_raw = mushrooms.iloc[:,0]\n",
"X_raw = mushrooms.iloc[:,1:]\n",
"y = (y_raw == 'p') * 1"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "_b7quE4PYLIZ"
},
"outputs": [],
"source": [
"from sklearn import preprocessing\n",
"encoder = preprocessing.OneHotEncoder()\n",
"\n",
"encoder.fit(X_raw)\n",
"\n",
"X = encoder.transform(X_raw).toarray()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 889
},
"colab_type": "code",
"id": "gIdVSmXOYPuC",
"outputId": "f27f27ea-6128-417d-db22-e72a2f529c5e"
},
"outputs": [],
"source": [
"#Initialize the classifier.\n",
"from sklearn import ensemble\n",
"rfc = ensemble.RandomForestClassifier(n_estimators=100, random_state=100)\n",
"\n",
"#Conduct a grid search.\n",
"from sklearn import model_selection\n",
"grid = {\n",
" 'criterion': ['gini', 'entropy'],\n",
" 'max_features': [2, 4, 6, 8, 10, 12, 14]\n",
"}\n",
"gscv = model_selection.GridSearchCV(estimator=rfc, param_grid=grid, cv=5, scoring='accuracy')\n",
"gscv.fit(X,y)\n",
"results = pd.DataFrame(gscv.cv_results_)\n",
"results.sort_values('rank_test_score', ascending=True).head(10)\n",
"\n",
"#Conduct a random search.\n",
"from scipy import stats\n",
"max_features = X.shape[1]\n",
"param_dist = {\n",
" 'criterion': ['gini', 'entropy'],\n",
" 'max_features': stats.randint(low=1, high=max_features)\n",
"}\n",
"rscv = model_selection.RandomizedSearchCV(estimator=rfc, param_distributions=param_dist, n_iter=50, cv=5, scoring='accuracy', random_state=100)\n",
"rscv.fit(X,y)\n",
"results = pd.DataFrame(rscv.cv_results_)\n",
"results.sort_values('rank_test_score', ascending=True).head(10)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 282
},
"colab_type": "code",
"id": "QVVAb9cjYfhh",
"outputId": "94a4bd34-512a-4813-e356-aaa03674b58b"
},
"outputs": [],
"source": [
"results.loc[:,'params'] = results.loc[:,'params'].astype(str)\n",
"\n",
"(\n",
" results.sort_values('rank_test_score', ascending=False)\n",
" .loc[:,['params','mean_test_score']]\n",
" .drop_duplicates()\n",
" .head(10)\n",
" .plot.barh(x='params', xlim=(0.8))\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"colab": {
"name": "Activity8.01.ipynb",
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.6"
}
},
"nbformat": 4,
"nbformat_minor": 1
}