DeepSeek-Coder/BreastCancerDetection_VariousMLAlgorithm.ipynb
2025-02-18 23:04:02 -08:00

938 lines
55 KiB
Plaintext

{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/github/Orrm23/DeepSeek-Coder/blob/main/BreastCancerDetection_VariousMLAlgorithm.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "AhpeX5Dm_eLW"
},
"source": [
"# **Day-10_BreastCancerDetection_VariousMLAlgorithm**"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "8rvHswu2td0Z"
},
"source": [
"### *Importing Libraries*"
]
},
{
"cell_type": "code",
"metadata": {
"id": "P-YXxKhu_Uk4"
},
"source": [
"import pandas as pd #useful for loading the dataset\n",
"import numpy as np #to perform array\n",
"from matplotlib import pyplot"
],
"execution_count": 1,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "mOt3nocbwvZk"
},
"source": [
"### *Choose Dataset from Local Directory*"
]
},
{
"cell_type": "code",
"metadata": {
"id": "xB2NMwXtw2dG",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 73
},
"outputId": "ec354929-3ba9-42a1-b9bb-ccf832a4bb1b"
},
"source": [
"from google.colab import files\n",
"uploaded = files.upload()"
],
"execution_count": 2,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
"<IPython.core.display.HTML object>"
],
"text/html": [
"\n",
" <input type=\"file\" id=\"files-fecec7e8-bcb2-40b8-af0a-82ebe3c06ca1\" name=\"files[]\" multiple disabled\n",
" style=\"border:none\" />\n",
" <output id=\"result-fecec7e8-bcb2-40b8-af0a-82ebe3c06ca1\">\n",
" Upload widget is only available when the cell has been executed in the\n",
" current browser session. Please rerun this cell to enable.\n",
" </output>\n",
" <script>// Copyright 2017 Google LLC\n",
"//\n",
"// Licensed under the Apache License, Version 2.0 (the \"License\");\n",
"// you may not use this file except in compliance with the License.\n",
"// You may obtain a copy of the License at\n",
"//\n",
"// http://www.apache.org/licenses/LICENSE-2.0\n",
"//\n",
"// Unless required by applicable law or agreed to in writing, software\n",
"// distributed under the License is distributed on an \"AS IS\" BASIS,\n",
"// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
"// See the License for the specific language governing permissions and\n",
"// limitations under the License.\n",
"\n",
"/**\n",
" * @fileoverview Helpers for google.colab Python module.\n",
" */\n",
"(function(scope) {\n",
"function span(text, styleAttributes = {}) {\n",
" const element = document.createElement('span');\n",
" element.textContent = text;\n",
" for (const key of Object.keys(styleAttributes)) {\n",
" element.style[key] = styleAttributes[key];\n",
" }\n",
" return element;\n",
"}\n",
"\n",
"// Max number of bytes which will be uploaded at a time.\n",
"const MAX_PAYLOAD_SIZE = 100 * 1024;\n",
"\n",
"function _uploadFiles(inputId, outputId) {\n",
" const steps = uploadFilesStep(inputId, outputId);\n",
" const outputElement = document.getElementById(outputId);\n",
" // Cache steps on the outputElement to make it available for the next call\n",
" // to uploadFilesContinue from Python.\n",
" outputElement.steps = steps;\n",
"\n",
" return _uploadFilesContinue(outputId);\n",
"}\n",
"\n",
"// This is roughly an async generator (not supported in the browser yet),\n",
"// where there are multiple asynchronous steps and the Python side is going\n",
"// to poll for completion of each step.\n",
"// This uses a Promise to block the python side on completion of each step,\n",
"// then passes the result of the previous step as the input to the next step.\n",
"function _uploadFilesContinue(outputId) {\n",
" const outputElement = document.getElementById(outputId);\n",
" const steps = outputElement.steps;\n",
"\n",
" const next = steps.next(outputElement.lastPromiseValue);\n",
" return Promise.resolve(next.value.promise).then((value) => {\n",
" // Cache the last promise value to make it available to the next\n",
" // step of the generator.\n",
" outputElement.lastPromiseValue = value;\n",
" return next.value.response;\n",
" });\n",
"}\n",
"\n",
"/**\n",
" * Generator function which is called between each async step of the upload\n",
" * process.\n",
" * @param {string} inputId Element ID of the input file picker element.\n",
" * @param {string} outputId Element ID of the output display.\n",
" * @return {!Iterable<!Object>} Iterable of next steps.\n",
" */\n",
"function* uploadFilesStep(inputId, outputId) {\n",
" const inputElement = document.getElementById(inputId);\n",
" inputElement.disabled = false;\n",
"\n",
" const outputElement = document.getElementById(outputId);\n",
" outputElement.innerHTML = '';\n",
"\n",
" const pickedPromise = new Promise((resolve) => {\n",
" inputElement.addEventListener('change', (e) => {\n",
" resolve(e.target.files);\n",
" });\n",
" });\n",
"\n",
" const cancel = document.createElement('button');\n",
" inputElement.parentElement.appendChild(cancel);\n",
" cancel.textContent = 'Cancel upload';\n",
" const cancelPromise = new Promise((resolve) => {\n",
" cancel.onclick = () => {\n",
" resolve(null);\n",
" };\n",
" });\n",
"\n",
" // Wait for the user to pick the files.\n",
" const files = yield {\n",
" promise: Promise.race([pickedPromise, cancelPromise]),\n",
" response: {\n",
" action: 'starting',\n",
" }\n",
" };\n",
"\n",
" cancel.remove();\n",
"\n",
" // Disable the input element since further picks are not allowed.\n",
" inputElement.disabled = true;\n",
"\n",
" if (!files) {\n",
" return {\n",
" response: {\n",
" action: 'complete',\n",
" }\n",
" };\n",
" }\n",
"\n",
" for (const file of files) {\n",
" const li = document.createElement('li');\n",
" li.append(span(file.name, {fontWeight: 'bold'}));\n",
" li.append(span(\n",
" `(${file.type || 'n/a'}) - ${file.size} bytes, ` +\n",
" `last modified: ${\n",
" file.lastModifiedDate ? file.lastModifiedDate.toLocaleDateString() :\n",
" 'n/a'} - `));\n",
" const percent = span('0% done');\n",
" li.appendChild(percent);\n",
"\n",
" outputElement.appendChild(li);\n",
"\n",
" const fileDataPromise = new Promise((resolve) => {\n",
" const reader = new FileReader();\n",
" reader.onload = (e) => {\n",
" resolve(e.target.result);\n",
" };\n",
" reader.readAsArrayBuffer(file);\n",
" });\n",
" // Wait for the data to be ready.\n",
" let fileData = yield {\n",
" promise: fileDataPromise,\n",
" response: {\n",
" action: 'continue',\n",
" }\n",
" };\n",
"\n",
" // Use a chunked sending to avoid message size limits. See b/62115660.\n",
" let position = 0;\n",
" do {\n",
" const length = Math.min(fileData.byteLength - position, MAX_PAYLOAD_SIZE);\n",
" const chunk = new Uint8Array(fileData, position, length);\n",
" position += length;\n",
"\n",
" const base64 = btoa(String.fromCharCode.apply(null, chunk));\n",
" yield {\n",
" response: {\n",
" action: 'append',\n",
" file: file.name,\n",
" data: base64,\n",
" },\n",
" };\n",
"\n",
" let percentDone = fileData.byteLength === 0 ?\n",
" 100 :\n",
" Math.round((position / fileData.byteLength) * 100);\n",
" percent.textContent = `${percentDone}% done`;\n",
"\n",
" } while (position < fileData.byteLength);\n",
" }\n",
"\n",
" // All done.\n",
" yield {\n",
" response: {\n",
" action: 'complete',\n",
" }\n",
" };\n",
"}\n",
"\n",
"scope.google = scope.google || {};\n",
"scope.google.colab = scope.google.colab || {};\n",
"scope.google.colab._files = {\n",
" _uploadFiles,\n",
" _uploadFilesContinue,\n",
"};\n",
"})(self);\n",
"</script> "
]
},
"metadata": {}
},
{
"output_type": "stream",
"name": "stdout",
"text": [
"Saving data.csv to data.csv\n"
]
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "69Kx6TycwzHo"
},
"source": [
"### *Load Dataset*"
]
},
{
"cell_type": "code",
"metadata": {
"id": "7aXWQK9Cw7Dz"
},
"source": [
"dataset = pd.read_csv('data.csv')"
],
"execution_count": 3,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "huZ5AaCtxD0p"
},
"source": [
"### *Summarize Dataset*"
]
},
{
"cell_type": "code",
"metadata": {
"id": "SmD4EDrkxFzq",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "c8de07bb-c31d-45b6-d7f1-f37549d27d5f"
},
"source": [
"print(dataset.shape)\n",
"print(dataset.head(5))"
],
"execution_count": 4,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"(569, 33)\n",
" id diagnosis radius_mean texture_mean perimeter_mean area_mean \\\n",
"0 842302 M 17.99 10.38 122.80 1001.0 \n",
"1 842517 M 20.57 17.77 132.90 1326.0 \n",
"2 84300903 M 19.69 21.25 130.00 1203.0 \n",
"3 84348301 M 11.42 20.38 77.58 386.1 \n",
"4 84358402 M 20.29 14.34 135.10 1297.0 \n",
"\n",
" smoothness_mean compactness_mean concavity_mean concave points_mean \\\n",
"0 0.11840 0.27760 0.3001 0.14710 \n",
"1 0.08474 0.07864 0.0869 0.07017 \n",
"2 0.10960 0.15990 0.1974 0.12790 \n",
"3 0.14250 0.28390 0.2414 0.10520 \n",
"4 0.10030 0.13280 0.1980 0.10430 \n",
"\n",
" ... texture_worst perimeter_worst area_worst smoothness_worst \\\n",
"0 ... 17.33 184.60 2019.0 0.1622 \n",
"1 ... 23.41 158.80 1956.0 0.1238 \n",
"2 ... 25.53 152.50 1709.0 0.1444 \n",
"3 ... 26.50 98.87 567.7 0.2098 \n",
"4 ... 16.67 152.20 1575.0 0.1374 \n",
"\n",
" compactness_worst concavity_worst concave points_worst symmetry_worst \\\n",
"0 0.6656 0.7119 0.2654 0.4601 \n",
"1 0.1866 0.2416 0.1860 0.2750 \n",
"2 0.4245 0.4504 0.2430 0.3613 \n",
"3 0.8663 0.6869 0.2575 0.6638 \n",
"4 0.2050 0.4000 0.1625 0.2364 \n",
"\n",
" fractal_dimension_worst Unnamed: 32 \n",
"0 0.11890 NaN \n",
"1 0.08902 NaN \n",
"2 0.08758 NaN \n",
"3 0.17300 NaN \n",
"4 0.07678 NaN \n",
"\n",
"[5 rows x 33 columns]\n"
]
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "-MFwV-9k3Tu1"
},
"source": [
"### *Mapping Class String Values to Numbers*"
]
},
{
"cell_type": "code",
"metadata": {
"id": "yJvwzEfF3ZMe",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "7138e1d5-0f52-4a8d-80e1-63f3280ea23f"
},
"source": [
"dataset['diagnosis'] = dataset['diagnosis'].map({'B': 0, 'M': 1}).astype(int)\n",
"print(dataset.head)"
],
"execution_count": 5,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"<bound method NDFrame.head of id diagnosis radius_mean texture_mean perimeter_mean \\\n",
"0 842302 1 17.99 10.38 122.80 \n",
"1 842517 1 20.57 17.77 132.90 \n",
"2 84300903 1 19.69 21.25 130.00 \n",
"3 84348301 1 11.42 20.38 77.58 \n",
"4 84358402 1 20.29 14.34 135.10 \n",
".. ... ... ... ... ... \n",
"564 926424 1 21.56 22.39 142.00 \n",
"565 926682 1 20.13 28.25 131.20 \n",
"566 926954 1 16.60 28.08 108.30 \n",
"567 927241 1 20.60 29.33 140.10 \n",
"568 92751 0 7.76 24.54 47.92 \n",
"\n",
" area_mean smoothness_mean compactness_mean concavity_mean \\\n",
"0 1001.0 0.11840 0.27760 0.30010 \n",
"1 1326.0 0.08474 0.07864 0.08690 \n",
"2 1203.0 0.10960 0.15990 0.19740 \n",
"3 386.1 0.14250 0.28390 0.24140 \n",
"4 1297.0 0.10030 0.13280 0.19800 \n",
".. ... ... ... ... \n",
"564 1479.0 0.11100 0.11590 0.24390 \n",
"565 1261.0 0.09780 0.10340 0.14400 \n",
"566 858.1 0.08455 0.10230 0.09251 \n",
"567 1265.0 0.11780 0.27700 0.35140 \n",
"568 181.0 0.05263 0.04362 0.00000 \n",
"\n",
" concave points_mean ... texture_worst perimeter_worst area_worst \\\n",
"0 0.14710 ... 17.33 184.60 2019.0 \n",
"1 0.07017 ... 23.41 158.80 1956.0 \n",
"2 0.12790 ... 25.53 152.50 1709.0 \n",
"3 0.10520 ... 26.50 98.87 567.7 \n",
"4 0.10430 ... 16.67 152.20 1575.0 \n",
".. ... ... ... ... ... \n",
"564 0.13890 ... 26.40 166.10 2027.0 \n",
"565 0.09791 ... 38.25 155.00 1731.0 \n",
"566 0.05302 ... 34.12 126.70 1124.0 \n",
"567 0.15200 ... 39.42 184.60 1821.0 \n",
"568 0.00000 ... 30.37 59.16 268.6 \n",
"\n",
" smoothness_worst compactness_worst concavity_worst \\\n",
"0 0.16220 0.66560 0.7119 \n",
"1 0.12380 0.18660 0.2416 \n",
"2 0.14440 0.42450 0.4504 \n",
"3 0.20980 0.86630 0.6869 \n",
"4 0.13740 0.20500 0.4000 \n",
".. ... ... ... \n",
"564 0.14100 0.21130 0.4107 \n",
"565 0.11660 0.19220 0.3215 \n",
"566 0.11390 0.30940 0.3403 \n",
"567 0.16500 0.86810 0.9387 \n",
"568 0.08996 0.06444 0.0000 \n",
"\n",
" concave points_worst symmetry_worst fractal_dimension_worst \\\n",
"0 0.2654 0.4601 0.11890 \n",
"1 0.1860 0.2750 0.08902 \n",
"2 0.2430 0.3613 0.08758 \n",
"3 0.2575 0.6638 0.17300 \n",
"4 0.1625 0.2364 0.07678 \n",
".. ... ... ... \n",
"564 0.2216 0.2060 0.07115 \n",
"565 0.1628 0.2572 0.06637 \n",
"566 0.1418 0.2218 0.07820 \n",
"567 0.2650 0.4087 0.12400 \n",
"568 0.0000 0.2871 0.07039 \n",
"\n",
" Unnamed: 32 \n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
".. ... \n",
"564 NaN \n",
"565 NaN \n",
"566 NaN \n",
"567 NaN \n",
"568 NaN \n",
"\n",
"[569 rows x 33 columns]>\n"
]
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "0qgR6rGRxH5y"
},
"source": [
"### *Segregate Dataset into X(Input/IndependentVariable) & Y(Output/DependentVariable)*"
]
},
{
"cell_type": "code",
"metadata": {
"id": "8qOVIILpxefB",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "a50556fe-21af-46eb-ff7e-056be4e2d174"
},
"source": [
"X = dataset.iloc[:, 2:32].values\n",
"X"
],
"execution_count": 6,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"array([[1.799e+01, 1.038e+01, 1.228e+02, ..., 2.654e-01, 4.601e-01,\n",
" 1.189e-01],\n",
" [2.057e+01, 1.777e+01, 1.329e+02, ..., 1.860e-01, 2.750e-01,\n",
" 8.902e-02],\n",
" [1.969e+01, 2.125e+01, 1.300e+02, ..., 2.430e-01, 3.613e-01,\n",
" 8.758e-02],\n",
" ...,\n",
" [1.660e+01, 2.808e+01, 1.083e+02, ..., 1.418e-01, 2.218e-01,\n",
" 7.820e-02],\n",
" [2.060e+01, 2.933e+01, 1.401e+02, ..., 2.650e-01, 4.087e-01,\n",
" 1.240e-01],\n",
" [7.760e+00, 2.454e+01, 4.792e+01, ..., 0.000e+00, 2.871e-01,\n",
" 7.039e-02]])"
]
},
"metadata": {},
"execution_count": 6
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "TE6LNAwmxkBn",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "93adc199-d960-4cdd-9d42-3ad978584020"
},
"source": [
"Y = dataset.iloc[:,1].values\n",
"Y"
],
"execution_count": 7,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0,\n",
" 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,\n",
" 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1,\n",
" 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1,\n",
" 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1,\n",
" 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0,\n",
" 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1,\n",
" 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1,\n",
" 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0,\n",
" 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
" 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1,\n",
" 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,\n",
" 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1,\n",
" 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0,\n",
" 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0,\n",
" 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,\n",
" 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,\n",
" 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0,\n",
" 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0])"
]
},
"metadata": {},
"execution_count": 7
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "oOzExtMjxmup"
},
"source": [
"### *Splitting Dataset into Train & Test*"
]
},
{
"cell_type": "code",
"metadata": {
"id": "uJXcK2PHxqJ9"
},
"source": [
"from sklearn.model_selection import train_test_split\n",
"X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = 0.25, random_state = 0)"
],
"execution_count": 8,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "89y8rh-3yv15"
},
"source": [
"### *Feature Scaling*\n",
"### we scale our data to make all the features contribute equally to the result\n",
"###Fit_Transform - fit method is calculating the mean and variance of each of the features present in our data\n",
"###Transform - Transform method is transforming all the features using the respective mean and variance,\n",
"###We want our test data to be a completely new and a surprise set for our model"
]
},
{
"cell_type": "code",
"metadata": {
"id": "ehsC_5BSy-Pa"
},
"source": [
"from sklearn.preprocessing import StandardScaler\n",
"sc = StandardScaler()\n",
"X_train = sc.fit_transform(X_train)\n",
"X_test = sc.transform(X_test)"
],
"execution_count": 9,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "X1gsZ3YZ51gz"
},
"source": [
"### *Validating some ML algorithm by its accuracy - Model Score*"
]
},
{
"cell_type": "code",
"metadata": {
"id": "ekrjJPx_5-rJ"
},
"source": [
"from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n",
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn.tree import DecisionTreeClassifier\n",
"from sklearn.neighbors import KNeighborsClassifier\n",
"from sklearn.naive_bayes import GaussianNB\n",
"from sklearn.svm import SVC\n",
"\n",
"from sklearn.model_selection import cross_val_score\n",
"from sklearn.model_selection import StratifiedKFold"
],
"execution_count": 10,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "7fmWEBDq6fUM"
},
"source": [
"models = []\n",
"models.append(('LR', LogisticRegression(solver='liblinear', multi_class='ovr')))\n",
"models.append(('LDA', LinearDiscriminantAnalysis()))\n",
"models.append(('KNN', KNeighborsClassifier()))\n",
"models.append(('CART', DecisionTreeClassifier()))\n",
"models.append(('NB', GaussianNB()))\n",
"models.append(('SVM', SVC(gamma='auto')))"
],
"execution_count": 11,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "r0dYFpqw6iXs",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 923
},
"outputId": "c7d83a67-7567-499b-b99c-3792b5c0f9c5"
},
"source": [
"results = []\n",
"names = []\n",
"res = []\n",
"for name, model in models:\n",
" kfold = StratifiedKFold(n_splits=10, random_state=None)\n",
" cv_results = cross_val_score(model, X_train, y_train, cv=kfold, scoring='accuracy')\n",
" results.append(cv_results)\n",
" names.append(name)\n",
" res.append(cv_results.mean())\n",
" print('%s: %f' % (name, cv_results.mean()))\n",
"\n",
"pyplot.ylim(.900, .999)\n",
"pyplot.bar(names, res, color ='maroon', width = 0.6)\n",
"\n",
"pyplot.title('Algorithm Comparison')\n",
"pyplot.show()"
],
"execution_count": 12,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"/usr/local/lib/python3.11/dist-packages/sklearn/linear_model/_logistic.py:1256: FutureWarning: 'multi_class' was deprecated in version 1.5 and will be removed in 1.7. Use OneVsRestClassifier(LogisticRegression(..)) instead. Leave it to its default value to avoid this warning.\n",
" warnings.warn(\n",
"/usr/local/lib/python3.11/dist-packages/sklearn/linear_model/_logistic.py:1256: FutureWarning: 'multi_class' was deprecated in version 1.5 and will be removed in 1.7. Use OneVsRestClassifier(LogisticRegression(..)) instead. Leave it to its default value to avoid this warning.\n",
" warnings.warn(\n",
"/usr/local/lib/python3.11/dist-packages/sklearn/linear_model/_logistic.py:1256: FutureWarning: 'multi_class' was deprecated in version 1.5 and will be removed in 1.7. Use OneVsRestClassifier(LogisticRegression(..)) instead. Leave it to its default value to avoid this warning.\n",
" warnings.warn(\n",
"/usr/local/lib/python3.11/dist-packages/sklearn/linear_model/_logistic.py:1256: FutureWarning: 'multi_class' was deprecated in version 1.5 and will be removed in 1.7. Use OneVsRestClassifier(LogisticRegression(..)) instead. Leave it to its default value to avoid this warning.\n",
" warnings.warn(\n",
"/usr/local/lib/python3.11/dist-packages/sklearn/linear_model/_logistic.py:1256: FutureWarning: 'multi_class' was deprecated in version 1.5 and will be removed in 1.7. Use OneVsRestClassifier(LogisticRegression(..)) instead. Leave it to its default value to avoid this warning.\n",
" warnings.warn(\n",
"/usr/local/lib/python3.11/dist-packages/sklearn/linear_model/_logistic.py:1256: FutureWarning: 'multi_class' was deprecated in version 1.5 and will be removed in 1.7. Use OneVsRestClassifier(LogisticRegression(..)) instead. Leave it to its default value to avoid this warning.\n",
" warnings.warn(\n",
"/usr/local/lib/python3.11/dist-packages/sklearn/linear_model/_logistic.py:1256: FutureWarning: 'multi_class' was deprecated in version 1.5 and will be removed in 1.7. Use OneVsRestClassifier(LogisticRegression(..)) instead. Leave it to its default value to avoid this warning.\n",
" warnings.warn(\n",
"/usr/local/lib/python3.11/dist-packages/sklearn/linear_model/_logistic.py:1256: FutureWarning: 'multi_class' was deprecated in version 1.5 and will be removed in 1.7. Use OneVsRestClassifier(LogisticRegression(..)) instead. Leave it to its default value to avoid this warning.\n",
" warnings.warn(\n",
"/usr/local/lib/python3.11/dist-packages/sklearn/linear_model/_logistic.py:1256: FutureWarning: 'multi_class' was deprecated in version 1.5 and will be removed in 1.7. Use OneVsRestClassifier(LogisticRegression(..)) instead. Leave it to its default value to avoid this warning.\n",
" warnings.warn(\n",
"/usr/local/lib/python3.11/dist-packages/sklearn/linear_model/_logistic.py:1256: FutureWarning: 'multi_class' was deprecated in version 1.5 and will be removed in 1.7. Use OneVsRestClassifier(LogisticRegression(..)) instead. Leave it to its default value to avoid this warning.\n",
" warnings.warn(\n"
]
},
{
"output_type": "stream",
"name": "stdout",
"text": [
"LR: 0.981285\n",
"LDA: 0.957863\n",
"KNN: 0.964839\n",
"CART: 0.929568\n",
"NB: 0.941417\n",
"SVM: 0.979014\n"
]
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
],
"image/png": "\n"
},
"metadata": {}
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "ospEktZ3_KgQ"
},
"source": [
"### *Training & Prediction using the algorithm with high accuracy*"
]
},
{
"cell_type": "code",
"metadata": {
"id": "zbi3Uvd0_Yn7",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "c3532d24-4239-4945-daee-c3d5ba95fcda"
},
"source": [
"from sklearn.svm import SVC\n",
"model.fit(X_train, y_train)\n",
"y_pred = model.predict(X_test)\n",
"print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))"
],
"execution_count": 13,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"[[1 1]\n",
" [0 0]\n",
" [0 0]\n",
" [0 0]\n",
" [0 0]\n",
" [0 0]\n",
" [0 0]\n",
" [0 0]\n",
" [0 0]\n",
" [0 0]\n",
" [0 0]\n",
" [0 0]\n",
" [0 0]\n",
" [1 0]\n",
" [0 0]\n",
" [1 1]\n",
" [0 0]\n",
" [1 1]\n",
" [1 1]\n",
" [1 1]\n",
" [1 1]\n",
" [1 1]\n",
" [0 0]\n",
" [0 0]\n",
" [1 1]\n",
" [0 0]\n",
" [0 0]\n",
" [1 1]\n",
" [0 0]\n",
" [1 1]\n",
" [0 0]\n",
" [1 1]\n",
" [0 0]\n",
" [1 1]\n",
" [0 0]\n",
" [1 1]\n",
" [0 0]\n",
" [1 1]\n",
" [0 0]\n",
" [1 1]\n",
" [1 1]\n",
" [0 0]\n",
" [1 1]\n",
" [0 0]\n",
" [0 0]\n",
" [1 1]\n",
" [0 0]\n",
" [0 0]\n",
" [0 0]\n",
" [1 1]\n",
" [1 1]\n",
" [1 1]\n",
" [1 1]\n",
" [0 0]\n",
" [0 0]\n",
" [0 0]\n",
" [0 0]\n",
" [0 0]\n",
" [0 0]\n",
" [1 1]\n",
" [1 1]\n",
" [1 1]\n",
" [0 0]\n",
" [0 0]\n",
" [1 1]\n",
" [0 0]\n",
" [1 1]\n",
" [1 1]\n",
" [1 1]\n",
" [0 0]\n",
" [0 0]\n",
" [1 1]\n",
" [0 0]\n",
" [1 1]\n",
" [1 1]\n",
" [0 0]\n",
" [0 0]\n",
" [0 0]\n",
" [0 0]\n",
" [0 0]\n",
" [1 1]\n",
" [1 1]\n",
" [1 1]\n",
" [0 0]\n",
" [1 1]\n",
" [0 0]\n",
" [0 0]\n",
" [0 0]\n",
" [1 1]\n",
" [1 1]\n",
" [0 0]\n",
" [0 1]\n",
" [1 0]\n",
" [1 1]\n",
" [0 0]\n",
" [0 0]\n",
" [1 1]\n",
" [0 0]\n",
" [0 0]\n",
" [0 0]\n",
" [0 0]\n",
" [0 0]\n",
" [0 0]\n",
" [0 0]\n",
" [1 1]\n",
" [0 0]\n",
" [1 1]\n",
" [0 0]\n",
" [0 1]\n",
" [1 1]\n",
" [0 0]\n",
" [1 1]\n",
" [1 1]\n",
" [0 0]\n",
" [0 0]\n",
" [0 0]\n",
" [0 0]\n",
" [0 0]\n",
" [0 0]\n",
" [0 0]\n",
" [0 0]\n",
" [0 0]\n",
" [1 1]\n",
" [0 0]\n",
" [1 1]\n",
" [0 0]\n",
" [0 0]\n",
" [0 0]\n",
" [0 0]\n",
" [0 0]\n",
" [1 1]\n",
" [0 0]\n",
" [0 0]\n",
" [0 0]\n",
" [0 0]\n",
" [0 0]\n",
" [0 0]\n",
" [0 1]\n",
" [1 1]\n",
" [0 0]\n",
" [0 0]\n",
" [0 0]\n",
" [1 1]]\n"
]
}
]
}
]
}