mirror of
https://github.com/fenago/data-science.git
synced 2026-05-04 08:31:59 +00:00
496 lines
11 KiB
Plaintext
496 lines
11 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "_b0A-ElAnHj2"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"import pandas as pd\n",
|
|
"import joblib\n",
|
|
"from sklearn.ensemble import RandomForestClassifier\n",
|
|
"from sklearn.model_selection import train_test_split"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "xuyNn2VlnHm9"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"file_url = '../Dataset/breast-cancer-wisconsin.data'"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "MSH55O2Qn3nn"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"col_names = ['Sample code number','Clump Thickness','Uniformity of Cell Size','Uniformity of Cell Shape','Marginal Adhesion','Single Epithelial Cell Size',\n",
|
|
"'Bare Nuclei','Bland Chromatin','Normal Nucleoli','Mitoses','Class']"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "qHM7W8jTnHye"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df = pd.read_csv(file_url, header=None, names=col_names, na_values='?')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 221
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 2805,
|
|
"status": "ok",
|
|
"timestamp": 1574652400671,
|
|
"user": {
|
|
"displayName": "Anthony So",
|
|
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
|
|
"userId": "11809607246124237079"
|
|
},
|
|
"user_tz": -660
|
|
},
|
|
"id": "HZ5iAGhvntZU",
|
|
"outputId": "e1ab5f63-dd7e-4b9d-d6fa-c185fca8e268"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "pHG3ADzWpIXU"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df.fillna(0, inplace=True)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "bjMOEqmuntdD"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"y = df.pop('Class')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "HHs8bS9qntgL"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"X = df.drop('Sample code number', axis=1)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 221
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 2792,
|
|
"status": "ok",
|
|
"timestamp": 1574652400674,
|
|
"user": {
|
|
"displayName": "Anthony So",
|
|
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
|
|
"userId": "11809607246124237079"
|
|
},
|
|
"user_tz": -660
|
|
},
|
|
"id": "8fB7TTDuoZw1",
|
|
"outputId": "f0219dd4-a5f8-4e4c-d909-8f74d173b63a"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"X.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "TnmZuUKbnti9"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=888)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "rM2D75s0otzY"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"rf_model = RandomForestClassifier(random_state=1)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 190
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 3128,
|
|
"status": "ok",
|
|
"timestamp": 1574652401023,
|
|
"user": {
|
|
"displayName": "Anthony So",
|
|
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
|
|
"userId": "11809607246124237079"
|
|
},
|
|
"user_tz": -660
|
|
},
|
|
"id": "VAOTJvwCot_C",
|
|
"outputId": "1e6f7e74-ae56-4269-f7fd-dfc531fedbae"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"rf_model.fit(X_train, y_train)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 34
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 3122,
|
|
"status": "ok",
|
|
"timestamp": 1574652401024,
|
|
"user": {
|
|
"displayName": "Anthony So",
|
|
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
|
|
"userId": "11809607246124237079"
|
|
},
|
|
"user_tz": -660
|
|
},
|
|
"id": "rMI1mOihouBq",
|
|
"outputId": "dd35bd04-d606-4bcc-8e9c-8e54e5f185a1"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"rf_model.predict([X_test.iloc[0,]])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 34
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 3117,
|
|
"status": "ok",
|
|
"timestamp": 1574652401027,
|
|
"user": {
|
|
"displayName": "Anthony So",
|
|
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
|
|
"userId": "11809607246124237079"
|
|
},
|
|
"user_tz": -660
|
|
},
|
|
"id": "E3wJTFm2qgUV",
|
|
"outputId": "3e59acd8-28e2-4fa0-8d77-fa768064eeb1"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"joblib.dump(rf_model, \"model.pkl\") "
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "kl29WUo3W4D5"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"import socket\n",
|
|
"import threading\n",
|
|
"import requests\n",
|
|
"import json\n",
|
|
"from flask import Flask, jsonify, request\n",
|
|
"import numpy as np"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 34
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 3110,
|
|
"status": "ok",
|
|
"timestamp": 1574652401029,
|
|
"user": {
|
|
"displayName": "Anthony So",
|
|
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
|
|
"userId": "11809607246124237079"
|
|
},
|
|
"user_tz": -660
|
|
},
|
|
"id": "FHV797Dlqac_",
|
|
"outputId": "2ff13d41-9be1-4217-ac32-3759c232900a"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"ip_address = socket.gethostbyname(socket.gethostname()) + '8080'\n",
|
|
"ip_address"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "rBMrb4UNqb3U"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"app = Flask(__name__)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "YoseBWhgqcd7"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"trained_model = joblib.load(\"model.pkl\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "ZwPHtIQKqn5c"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"@app.route('/api', methods=['POST'])\n",
|
|
"def predict():\n",
|
|
" data = request.get_json()\n",
|
|
" prediction = trained_model.predict(data)\n",
|
|
" str_pred = np.array2string(prediction)\n",
|
|
" return jsonify(str_pred)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 34
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 3098,
|
|
"status": "ok",
|
|
"timestamp": 1574652401032,
|
|
"user": {
|
|
"displayName": "Anthony So",
|
|
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
|
|
"userId": "11809607246124237079"
|
|
},
|
|
"user_tz": -660
|
|
},
|
|
"id": "6iLQtxEEz6vK",
|
|
"outputId": "ca7b029e-e96d-4b19-d1e0-ec301e80083a"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"flask_thread = threading.Thread(target=app.run, kwargs={'host':'0.0.0.0','port':8080})\n",
|
|
"flask_thread.start()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 102
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 3092,
|
|
"status": "ok",
|
|
"timestamp": 1574652401032,
|
|
"user": {
|
|
"displayName": "Anthony So",
|
|
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
|
|
"userId": "11809607246124237079"
|
|
},
|
|
"user_tz": -660
|
|
},
|
|
"id": "_KmZqH-mqn72",
|
|
"outputId": "e6c3a62a-bdd7-400d-9f5d-a8fe5f8d755f"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"record = X_test.iloc[0,].to_list()\n",
|
|
"record"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "iL7ezCzKqn-Y"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"j_data = json.dumps([record])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "uRU36nLVq1Vh"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"headers = {'content-type': 'application/json', 'Accept-Charset': 'UTF-8'}"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 51
|
|
},
|
|
"colab_type": "code",
|
|
"executionInfo": {
|
|
"elapsed": 4177,
|
|
"status": "ok",
|
|
"timestamp": 1574652402130,
|
|
"user": {
|
|
"displayName": "Anthony So",
|
|
"photoUrl": "https://lh3.googleusercontent.com/a-/AAuE7mCYY-iGjUIqBSnlLoszfZTN7rU7FRNg05Rdt9Ii3A=s64",
|
|
"userId": "11809607246124237079"
|
|
},
|
|
"user_tz": -660
|
|
},
|
|
"id": "ORrZn6Y3q1cl",
|
|
"outputId": "ab0c1a64-66f9-4d3e-b4e3-7b4f5ff82c92"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"r = requests.post(f\"http://{ip_address}/api\", data=j_data, headers=headers)\n",
|
|
"r.text"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"colab": {
|
|
"collapsed_sections": [],
|
|
"name": "Exercise18_02.ipynb",
|
|
"provenance": []
|
|
},
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.8.6"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 1
|
|
}
|