{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "_b0A-ElAnHj2" }, "outputs": [], "source": [ "import pandas as pd\n", "import joblib\n", "from sklearn.ensemble import RandomForestClassifier" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "xuyNn2VlnHm9" }, "outputs": [], "source": [ "file_url = '../Dataset/breast-cancer-wisconsin.data'" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "MSH55O2Qn3nn" }, "outputs": [], "source": [ "col_names = ['Sample code number','Clump Thickness','Uniformity of Cell Size','Uniformity of Cell Shape','Marginal Adhesion','Single Epithelial Cell Size',\n", "'Bare Nuclei','Bland Chromatin','Normal Nucleoli','Mitoses','Class']" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "qHM7W8jTnHye" }, "outputs": [], "source": [ "df = pd.read_csv(file_url, header=None, names=col_names, na_values='?')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "ZOV3ihu0dnnG" }, "outputs": [], "source": [ "y = df.pop('Class')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "hnADQ-gGfGk4" }, "outputs": [], "source": [ "df.drop('Sample code number', axis=1, inplace=True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "colab_type": "code", "executionInfo": { "elapsed": 7414, "status": "ok", "timestamp": 1574652486313, "user": { "displayName": "Anthony So", "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64", "userId": "11809607246124237079" }, "user_tz": -660 }, "id": "mtp9sRXbdnpx", "outputId": "b52bbbc5-c6c2-41eb-df8b-75e01d4af5cf" }, "outputs": [], "source": [ "training_rows = int(df.shape[0] * 0.7)\n", "training_rows" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "5QiCAi94dnsE" }, "outputs": [], "source": [ "X_train = df[:training_rows]\n", "y_train = y[:training_rows]\n", "X_test = df[training_rows:]\n", "y_test = y[training_rows:]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 187 }, "colab_type": "code", "executionInfo": { "elapsed": 7406, "status": "ok", "timestamp": 1574652486314, "user": { "displayName": "Anthony So", "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64", "userId": "11809607246124237079" }, "user_tz": -660 }, "id": "APxUiwuLdnzH", "outputId": "0748da40-ea30-495a-efea-90c8746f8dac" }, "outputs": [], "source": [ "X_train.isna().sum()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 170 }, "colab_type": "code", "executionInfo": { "elapsed": 7397, "status": "ok", "timestamp": 1574652486315, "user": { "displayName": "Anthony So", "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64", "userId": "11809607246124237079" }, "user_tz": -660 }, "id": "ChSTYAQHdn16", "outputId": "1fc7d4ca-42eb-4a4b-f06c-af5b69cc996f" }, "outputs": [], "source": [ "num_columns = [col for col in X_train.columns if X_train[col].dtype != 'object']\n", "num_columns" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 170 }, "colab_type": "code", "executionInfo": { "elapsed": 7393, "status": "ok", "timestamp": 1574652486317, "user": { "displayName": "Anthony So", "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64", "userId": "11809607246124237079" }, "user_tz": -660 }, "id": "C3rO4euNekF3", "outputId": "e0611fd0-85d2-4556-9dcf-ad87afbcd831" }, "outputs": [], "source": [ "column_mean = {}\n", "for col in num_columns:\n", " column_mean[col] = X_train[col].mean()\n", "column_mean" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "XNPAWdnNe_ug" }, "outputs": [], "source": [ "import pickle\n", "pickle.dump(column_mean, open(\"columns_mean.pkl\", \"wb\" ) )" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 102 }, "colab_type": "code", "executionInfo": { "elapsed": 7385, "status": "ok", "timestamp": 1574652486318, "user": { "displayName": "Anthony So", "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64", "userId": "11809607246124237079" }, "user_tz": -660 }, "id": "V1vXPXzie_rt", "outputId": "f8e0ee5c-87d2-4296-e222-b9cd2c597c7f" }, "outputs": [], "source": [ "for col in num_columns:\n", " X_train[col].fillna(column_mean[col], inplace=True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 88 }, "colab_type": "code", "executionInfo": { "elapsed": 7381, "status": "ok", "timestamp": 1574652486319, "user": { "displayName": "Anthony So", "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64", "userId": "11809607246124237079" }, "user_tz": -660 }, "id": "QkFwEdz-ficr", "outputId": "d9224d81-9712-4447-af82-c85023a05c1f" }, "outputs": [], "source": [ "rf_model = RandomForestClassifier(random_state=1)\n", "rf_model.fit(X_train, y_train)\n", "joblib.dump(rf_model, \"model.pkl\") " ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "7E2YdRNef1eP" }, "outputs": [], "source": [ "import socket\n", "import threading\n", "import requests\n", "import json\n", "from flask import Flask, jsonify, request\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "eo2FtaYSf1kw" }, "outputs": [], "source": [ "app = Flask(__name__)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "ketUKpP0f1nE" }, "outputs": [], "source": [ "trained_model = joblib.load(\"model.pkl\")\n", "var_means = pickle.load(open(\"columns_mean.pkl\", \"rb\" ) )" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "hYSIZLGxf1pb" }, "outputs": [], "source": [ "@app.route('/api', methods=['POST'])\n", "def predict():\n", " data = request.get_json()\n", " df_test = pd.DataFrame(data, index=[0])\n", " for col, avg_value in var_means.items():\n", " df_test[col].fillna(avg_value, inplace=True)\n", " prediction = trained_model.predict(df_test)\n", " str_pred = np.array2string(prediction)\n", " return jsonify(str_pred)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "colab_type": "code", "executionInfo": { "elapsed": 7703, "status": "ok", "timestamp": 1574652486658, "user": { "displayName": "Anthony So", "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64", "userId": "11809607246124237079" }, "user_tz": -660 }, "id": "d-8fz7aFf1jm", "outputId": "3faa1f57-9d89-4146-f8ed-e3d259645a26" }, "outputs": [], "source": [ "flask_thread = threading.Thread(target=app.run, kwargs={'host':'0.0.0.0','port':8080})\n", "flask_thread.start()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 139 }, "colab_type": "code", "executionInfo": { "elapsed": 7699, "status": "ok", "timestamp": 1574652486660, "user": { "displayName": "Anthony So", "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64", "userId": "11809607246124237079" }, "user_tz": -660 }, "id": "AF4eiCKugmZ1", "outputId": "eabdc4e6-82a0-45ae-a081-e6738a409794" }, "outputs": [], "source": [ "record = X_test[X_test['Bare Nuclei'].isna()].iloc[0].to_json()\n", "record" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "-GLgAVCTf1g3" }, "outputs": [], "source": [ "headers = {'content-type': 'application/json', 'Accept-Charset': 'UTF-8'}\n", "ip_address = socket.gethostbyname(socket.gethostname()) + ':8080' \n", "\n", "ip_address" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 51 }, "colab_type": "code", "executionInfo": { "elapsed": 7692, "status": "ok", "timestamp": 1574652486661, "user": { "displayName": "Anthony So", "photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64", "userId": "11809607246124237079" }, "user_tz": -660 }, "id": "DR7wNIjkgmfa", "outputId": "7bb6a95a-9bd7-48f9-fcaf-0fb82dffbd56" }, "outputs": [], "source": [ "r = requests.post(f\"http://{ip_address}/api\", data=record, headers=headers)\n", "r.text" ] } ], "metadata": { "colab": { "collapsed_sections": [], "name": "Exercise18_03.ipynb", "provenance": [] }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.6" } }, "nbformat": 4, "nbformat_minor": 1 }