mirror of
https://github.com/fenago/data-science.git
synced 2026-05-04 08:31:59 +00:00
281 lines
5.2 KiB
Plaintext
281 lines
5.2 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "6GidYkS3bVYK"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"import pandas as pd"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "9eyKK38Rbanb"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"file_url = '../Dataset/openml_phpZNNasq.csv'"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "_id92wzgbc4Y"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df = pd.read_csv(file_url)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 204
|
|
},
|
|
"colab_type": "code",
|
|
"id": "iCk_NHFJbvaK",
|
|
"outputId": "2064acaf-c432-4cf2-a717-9e0ade02b7df"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "VeD8BgUcb3cD"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df.drop(columns='animal', inplace=True)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "NJRvFFbhbtYy"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"y = df.pop('type')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 204
|
|
},
|
|
"colab_type": "code",
|
|
"id": "NZgdsim9cIfo",
|
|
"outputId": "be252a2a-a04f-4235-f006-403caf18fa15"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"df.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "c0VgFDj4bgE_"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"from sklearn.model_selection import train_test_split"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "wiSxDUrVd5ZE"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"X_train, X_test, y_train, y_test = train_test_split(df, y, test_size=0.4, random_state=188)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "9uqyaQ_Sd5ZB"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"from sklearn.ensemble import RandomForestClassifier"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "4LQz_9AId5ZA"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"rf_model = RandomForestClassifier(random_state=42, n_estimators=10)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 170
|
|
},
|
|
"colab_type": "code",
|
|
"id": "SSrRmKxfd5Y-",
|
|
"outputId": "3a4e4d49-6f8b-4cf4-be1b-753d91c5601c"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"rf_model.fit(X_train, y_train)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "aMN0ckfjd5Y8"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"train_preds = rf_model.predict(X_train)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "Je8ehG4md5Y5"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"from sklearn.metrics import accuracy_score"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 34
|
|
},
|
|
"colab_type": "code",
|
|
"id": "fd8CL_NMd5Y0",
|
|
"outputId": "6606c863-c608-4328-ed3f-693c7ea9b495"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"train_acc = accuracy_score(y_train, train_preds)\n",
|
|
"print(train_acc)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {},
|
|
"colab_type": "code",
|
|
"id": "83LhoI9_d5Yw"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"test_preds = rf_model.predict(X_test)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 34
|
|
},
|
|
"colab_type": "code",
|
|
"id": "Z7uzCM8zd5Yq",
|
|
"outputId": "60f08248-47b4-4b7e-9977-dcf0f8abc240"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"test_acc = accuracy_score(y_test, test_preds)\n",
|
|
"print(test_acc)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"colab": {
|
|
"collapsed_sections": [],
|
|
"name": "Exercise4_01.ipynb",
|
|
"provenance": []
|
|
},
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.8.6"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 1
|
|
}
|