{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "6lf7dlK6Ykw8" }, "outputs": [], "source": [ "import pandas as pd\n", "from sklearn.ensemble import RandomForestClassifier\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.metrics import accuracy_score\n", "import altair as alt\n", "from sklearn.inspection import plot_partial_dependence\n", "from mlxtend.evaluate import feature_importance_permutation" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "_5m6hmonYhPH" }, "outputs": [], "source": [ "file_url = '../Dataset/KDDCup99.csv'" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "vdt3sXysYrRj" }, "outputs": [], "source": [ "df = pd.read_csv(file_url)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 224 }, "colab_type": "code", "id": "TanrIa0OY5CL", "outputId": "485b874f-83bb-472a-8578-ed931d592740" }, "outputs": [], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "MMGyKdDxZDmG" }, "outputs": [], "source": [ "y = df.pop('label')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "YR3muhtlMpSN" }, "outputs": [], "source": [ "df = pd.get_dummies(df)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "FTQJZEYfZDrt" }, "outputs": [], "source": [ "X_train, X_test, y_train, y_test = train_test_split(df, y, test_size=0.3, random_state=1)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 170 }, "colab_type": "code", "id": "R3FoM7q_YqDS", "outputId": "8ea7c58f-cfdf-4d6b-9b77-d1c3b7c86632" }, "outputs": [], "source": [ "rf_model = RandomForestClassifier(random_state=168)\n", "rf_model.fit(X_train, y_train)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "7OrrC55GNKyH" }, "outputs": [], "source": [ "train_preds = rf_model.predict(X_train)\n", "test_preds = rf_model.predict(X_test)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 51 }, "colab_type": "code", "id": "kf31T5toNK_X", "outputId": "3243de5a-304e-4533-da6f-c27dc929bf81" }, "outputs": [], "source": [ "train_acc = accuracy_score(y_train, train_preds)\n", "test_acc = accuracy_score(y_test, test_preds)\n", "print(train_acc)\n", "print(test_acc)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 527 }, "colab_type": "code", "id": "m0PbG5G1C4jl", "outputId": "01e44422-7535-4e09-e520-6055b4c813b2" }, "outputs": [], "source": [ "imp_vals, _ = feature_importance_permutation(predict_method=rf_model.predict, X=X_train.values, y=y_train.values, metric='accuracy', num_rounds=1, seed=2)\n", "imp_vals" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "QU8f-YtcfPDv" }, "outputs": [], "source": [ "perm_varimp_df = pd.DataFrame({'feature': X_train.columns, 'importance': imp_vals})" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 204 }, "colab_type": "code", "id": "dxT-DTETDDJM", "outputId": "f6cfc9b8-f854-4808-9a7c-788a31a12a0e" }, "outputs": [], "source": [ "perm_varimp_df.sort_values('importance', ascending=False, inplace=True)\n", "perm_varimp_df.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 463 }, "colab_type": "code", "id": "hOvXZ892DJnG", "outputId": "e7a12985-07f0-45a5-ad63-9dd9a65b591e", "scrolled": true }, "outputs": [], "source": [ "alt.Chart(perm_varimp_df[:20]).mark_bar().encode(\n", " x='importance',\n", " y=alt.Y('feature:N', sort=alt.SortField(field='importance', order='descending'))\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 221 }, "colab_type": "code", "id": "REYq4IgnX_3z", "outputId": "6e472dbb-315e-45ad-cd0c-ee3ef4b4aefd" }, "outputs": [], "source": [ "feature_index = df.columns.get_loc(\"src_bytes\")\n", "plot_partial_dependence(rf_model, X_train, features=[feature_index], feature_names=X_train.columns, target=\"normal\", response_method=\"predict_proba\", n_jobs=-1) " ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 493 }, "colab_type": "code", "id": "teXyiRIIu4ru", "outputId": "b39c0aef-22fa-4929-e2d5-e6ba28681beb" }, "outputs": [], "source": [ "!pip install lime" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "8j-mE2RcEcEe" }, "outputs": [], "source": [ "from lime.lime_tabular import LimeTabularExplainer" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 408 }, "colab_type": "code", "id": "8UP9bJlLggn5", "outputId": "ff22695c-19b6-4747-b2cd-431e0308a581" }, "outputs": [], "source": [ "class_names = sorted(y.unique())\n", "class_names" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "K4F-4QHsvuuN" }, "outputs": [], "source": [ "lime_explainer = LimeTabularExplainer(X_train.values,\n", " feature_names=X_train.columns,\n", " class_names=class_names,\n", " mode='classification')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 1000 }, "colab_type": "code", "id": "nDFatUx0N_3x", "outputId": "7771ebc5-0086-499e-ab02-99295c7e85fb" }, "outputs": [], "source": [ "exp = lime_explainer.explain_instance(X_test.iloc[99893,], rf_model.predict_proba, num_features=50, top_labels=1)\n", "exp.show_in_notebook()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "colab": { "collapsed_sections": [], "name": "Activity9_1", "provenance": [] }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.6" } }, "nbformat": 4, "nbformat_minor": 1 }