{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [], "authorship_tag": "ABX9TyPEBJ6UnJGiLAl/F4VbQEn/", "include_colab_link": true }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "view-in-github", "colab_type": "text" }, "source": [ "\"Open" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "4BYhNI_DVlrd" }, "outputs": [], "source": [] }, { "source": [ "from IPython import get_ipython\n", "from IPython.display import display\n", "import numpy as np\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "from sklearn.model_selection import train_test_split\n", "from google.colab import files\n", "from sklearn.preprocessing import LabelEncoder\n", "from xgboost import XGBClassifier\n", "from sklearn.metrics import confusion_matrix, accuracy_score\n", "from sklearn.model_selection import cross_val_score" ], "cell_type": "code", "metadata": { "id": "x58skzkAVrK9" }, "execution_count": 1, "outputs": [] }, { "source": [ "# Load Dataset from Local Directory\n", "uploaded = files.upload()\n", "\n", "# Importing the dataset\n", "dataset = pd.read_csv('dataset.csv')\n", "print(dataset.shape)\n", "print(dataset.head(5))" ], "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 460 }, "id": "Wv58GcVQVsAP", "outputId": "1c7e07ef-7408-4d02-fcc4-54546265d3e3" }, "execution_count": 2, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "" ], "text/html": [ "\n", " \n", " \n", " Upload widget is only available when the cell has been executed in the\n", " current browser session. Please rerun this cell to enable.\n", " \n", " " ] }, "metadata": {} }, { "output_type": "stream", "name": "stdout", "text": [ "Saving dataset.csv to dataset.csv\n", "(683, 11)\n", " Sample code number Clump Thickness Uniformity of Cell Size \\\n", "0 1000025 5 1 \n", "1 1002945 5 4 \n", "2 1015425 3 1 \n", "3 1016277 6 8 \n", "4 1017023 4 1 \n", "\n", " Uniformity of Cell Shape Marginal Adhesion Single Epithelial Cell Size \\\n", "0 1 1 2 \n", "1 4 5 7 \n", "2 1 1 2 \n", "3 8 1 3 \n", "4 1 3 2 \n", "\n", " Bare Nuclei Bland Chromatin Normal Nucleoli Mitoses Class \n", "0 1 3 1 1 2 \n", "1 10 3 2 1 2 \n", "2 2 3 1 1 2 \n", "3 4 3 7 1 2 \n", "4 1 3 1 1 2 \n" ] } ] }, { "source": [ "# Segregating Dataset\n", "X = dataset.iloc[:, :-1].values\n", "y = dataset.iloc[:, -1].values\n", "\n", "# Encode the target variable 'y'\n", "le = LabelEncoder()\n", "y = le.fit_transform(y)\n", "\n", "# Splitting Dataset into Train & Test\n", "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)" ], "cell_type": "code", "metadata": { "id": "RcSmj6qPVs6C" }, "execution_count": 3, "outputs": [] }, { "source": [ "# Training with XGBoost\n", "classifier = XGBClassifier()\n", "classifier.fit(X_train, y_train)" ], "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 254 }, "id": "kikaoHatVvp7", "outputId": "37c5e300-7dd3-459b-9d2f-11f86aececa5" }, "execution_count": 4, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "XGBClassifier(base_score=None, booster=None, callbacks=None,\n", " colsample_bylevel=None, colsample_bynode=None,\n", " colsample_bytree=None, device=None, early_stopping_rounds=None,\n", " enable_categorical=False, eval_metric=None, feature_types=None,\n", " gamma=None, grow_policy=None, importance_type=None,\n", " interaction_constraints=None, learning_rate=None, max_bin=None,\n", " max_cat_threshold=None, max_cat_to_onehot=None,\n", " max_delta_step=None, max_depth=None, max_leaves=None,\n", " min_child_weight=None, missing=nan, monotone_constraints=None,\n", " multi_strategy=None, n_estimators=None, n_jobs=None,\n", " num_parallel_tree=None, random_state=None, ...)" ], "text/html": [ "
XGBClassifier(base_score=None, booster=None, callbacks=None,\n",
              "              colsample_bylevel=None, colsample_bynode=None,\n",
              "              colsample_bytree=None, device=None, early_stopping_rounds=None,\n",
              "              enable_categorical=False, eval_metric=None, feature_types=None,\n",
              "              gamma=None, grow_policy=None, importance_type=None,\n",
              "              interaction_constraints=None, learning_rate=None, max_bin=None,\n",
              "              max_cat_threshold=None, max_cat_to_onehot=None,\n",
              "              max_delta_step=None, max_depth=None, max_leaves=None,\n",
              "              min_child_weight=None, missing=nan, monotone_constraints=None,\n",
              "              multi_strategy=None, n_estimators=None, n_jobs=None,\n",
              "              num_parallel_tree=None, random_state=None, ...)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ] }, "metadata": {}, "execution_count": 4 } ] }, { "source": [ "# Forming Confusion Matrix\n", "y_pred = classifier.predict(X_test)\n", "cm = confusion_matrix(y_test, y_pred)\n", "print(cm)\n", "accuracy_score(y_test, y_pred)\n", "\n", "# K-Fold Cross Validation\n", "accuracies = cross_val_score(estimator=classifier, X=X_train, y=y_train, cv=10)\n", "print(\"Accuracy: {:.2f} %\".format(accuracies.mean() * 100))" ], "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "LoXwTRLPVwby", "outputId": "54849688-c935-4532-8d8e-0e29beb8c857" }, "execution_count": 5, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "[[85 2]\n", " [ 1 49]]\n", "Accuracy: 96.71 %\n" ] } ] } ] }