mirror of
https://github.com/deepseek-ai/DeepSeek-Coder.git
synced 2025-02-23 14:19:09 -05:00
938 lines
55 KiB
Plaintext
938 lines
55 KiB
Plaintext
{
|
|
"nbformat": 4,
|
|
"nbformat_minor": 0,
|
|
"metadata": {
|
|
"colab": {
|
|
"provenance": [],
|
|
"include_colab_link": true
|
|
},
|
|
"kernelspec": {
|
|
"name": "python3",
|
|
"display_name": "Python 3"
|
|
},
|
|
"language_info": {
|
|
"name": "python"
|
|
}
|
|
},
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"id": "view-in-github",
|
|
"colab_type": "text"
|
|
},
|
|
"source": [
|
|
"<a href=\"https://colab.research.google.com/github/Orrm23/DeepSeek-Coder/blob/main/BreastCancerDetection_VariousMLAlgorithm.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"id": "AhpeX5Dm_eLW"
|
|
},
|
|
"source": [
|
|
"# **Day-10_BreastCancerDetection_VariousMLAlgorithm**"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"id": "8rvHswu2td0Z"
|
|
},
|
|
"source": [
|
|
"### *Importing Libraries*"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {
|
|
"id": "P-YXxKhu_Uk4"
|
|
},
|
|
"source": [
|
|
"import pandas as pd #useful for loading the dataset\n",
|
|
"import numpy as np #to perform array\n",
|
|
"from matplotlib import pyplot"
|
|
],
|
|
"execution_count": 1,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"id": "mOt3nocbwvZk"
|
|
},
|
|
"source": [
|
|
"### *Choose Dataset from Local Directory*"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {
|
|
"id": "xB2NMwXtw2dG",
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 73
|
|
},
|
|
"outputId": "ec354929-3ba9-42a1-b9bb-ccf832a4bb1b"
|
|
},
|
|
"source": [
|
|
"from google.colab import files\n",
|
|
"uploaded = files.upload()"
|
|
],
|
|
"execution_count": 2,
|
|
"outputs": [
|
|
{
|
|
"output_type": "display_data",
|
|
"data": {
|
|
"text/plain": [
|
|
"<IPython.core.display.HTML object>"
|
|
],
|
|
"text/html": [
|
|
"\n",
|
|
" <input type=\"file\" id=\"files-fecec7e8-bcb2-40b8-af0a-82ebe3c06ca1\" name=\"files[]\" multiple disabled\n",
|
|
" style=\"border:none\" />\n",
|
|
" <output id=\"result-fecec7e8-bcb2-40b8-af0a-82ebe3c06ca1\">\n",
|
|
" Upload widget is only available when the cell has been executed in the\n",
|
|
" current browser session. Please rerun this cell to enable.\n",
|
|
" </output>\n",
|
|
" <script>// Copyright 2017 Google LLC\n",
|
|
"//\n",
|
|
"// Licensed under the Apache License, Version 2.0 (the \"License\");\n",
|
|
"// you may not use this file except in compliance with the License.\n",
|
|
"// You may obtain a copy of the License at\n",
|
|
"//\n",
|
|
"// http://www.apache.org/licenses/LICENSE-2.0\n",
|
|
"//\n",
|
|
"// Unless required by applicable law or agreed to in writing, software\n",
|
|
"// distributed under the License is distributed on an \"AS IS\" BASIS,\n",
|
|
"// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
|
|
"// See the License for the specific language governing permissions and\n",
|
|
"// limitations under the License.\n",
|
|
"\n",
|
|
"/**\n",
|
|
" * @fileoverview Helpers for google.colab Python module.\n",
|
|
" */\n",
|
|
"(function(scope) {\n",
|
|
"function span(text, styleAttributes = {}) {\n",
|
|
" const element = document.createElement('span');\n",
|
|
" element.textContent = text;\n",
|
|
" for (const key of Object.keys(styleAttributes)) {\n",
|
|
" element.style[key] = styleAttributes[key];\n",
|
|
" }\n",
|
|
" return element;\n",
|
|
"}\n",
|
|
"\n",
|
|
"// Max number of bytes which will be uploaded at a time.\n",
|
|
"const MAX_PAYLOAD_SIZE = 100 * 1024;\n",
|
|
"\n",
|
|
"function _uploadFiles(inputId, outputId) {\n",
|
|
" const steps = uploadFilesStep(inputId, outputId);\n",
|
|
" const outputElement = document.getElementById(outputId);\n",
|
|
" // Cache steps on the outputElement to make it available for the next call\n",
|
|
" // to uploadFilesContinue from Python.\n",
|
|
" outputElement.steps = steps;\n",
|
|
"\n",
|
|
" return _uploadFilesContinue(outputId);\n",
|
|
"}\n",
|
|
"\n",
|
|
"// This is roughly an async generator (not supported in the browser yet),\n",
|
|
"// where there are multiple asynchronous steps and the Python side is going\n",
|
|
"// to poll for completion of each step.\n",
|
|
"// This uses a Promise to block the python side on completion of each step,\n",
|
|
"// then passes the result of the previous step as the input to the next step.\n",
|
|
"function _uploadFilesContinue(outputId) {\n",
|
|
" const outputElement = document.getElementById(outputId);\n",
|
|
" const steps = outputElement.steps;\n",
|
|
"\n",
|
|
" const next = steps.next(outputElement.lastPromiseValue);\n",
|
|
" return Promise.resolve(next.value.promise).then((value) => {\n",
|
|
" // Cache the last promise value to make it available to the next\n",
|
|
" // step of the generator.\n",
|
|
" outputElement.lastPromiseValue = value;\n",
|
|
" return next.value.response;\n",
|
|
" });\n",
|
|
"}\n",
|
|
"\n",
|
|
"/**\n",
|
|
" * Generator function which is called between each async step of the upload\n",
|
|
" * process.\n",
|
|
" * @param {string} inputId Element ID of the input file picker element.\n",
|
|
" * @param {string} outputId Element ID of the output display.\n",
|
|
" * @return {!Iterable<!Object>} Iterable of next steps.\n",
|
|
" */\n",
|
|
"function* uploadFilesStep(inputId, outputId) {\n",
|
|
" const inputElement = document.getElementById(inputId);\n",
|
|
" inputElement.disabled = false;\n",
|
|
"\n",
|
|
" const outputElement = document.getElementById(outputId);\n",
|
|
" outputElement.innerHTML = '';\n",
|
|
"\n",
|
|
" const pickedPromise = new Promise((resolve) => {\n",
|
|
" inputElement.addEventListener('change', (e) => {\n",
|
|
" resolve(e.target.files);\n",
|
|
" });\n",
|
|
" });\n",
|
|
"\n",
|
|
" const cancel = document.createElement('button');\n",
|
|
" inputElement.parentElement.appendChild(cancel);\n",
|
|
" cancel.textContent = 'Cancel upload';\n",
|
|
" const cancelPromise = new Promise((resolve) => {\n",
|
|
" cancel.onclick = () => {\n",
|
|
" resolve(null);\n",
|
|
" };\n",
|
|
" });\n",
|
|
"\n",
|
|
" // Wait for the user to pick the files.\n",
|
|
" const files = yield {\n",
|
|
" promise: Promise.race([pickedPromise, cancelPromise]),\n",
|
|
" response: {\n",
|
|
" action: 'starting',\n",
|
|
" }\n",
|
|
" };\n",
|
|
"\n",
|
|
" cancel.remove();\n",
|
|
"\n",
|
|
" // Disable the input element since further picks are not allowed.\n",
|
|
" inputElement.disabled = true;\n",
|
|
"\n",
|
|
" if (!files) {\n",
|
|
" return {\n",
|
|
" response: {\n",
|
|
" action: 'complete',\n",
|
|
" }\n",
|
|
" };\n",
|
|
" }\n",
|
|
"\n",
|
|
" for (const file of files) {\n",
|
|
" const li = document.createElement('li');\n",
|
|
" li.append(span(file.name, {fontWeight: 'bold'}));\n",
|
|
" li.append(span(\n",
|
|
" `(${file.type || 'n/a'}) - ${file.size} bytes, ` +\n",
|
|
" `last modified: ${\n",
|
|
" file.lastModifiedDate ? file.lastModifiedDate.toLocaleDateString() :\n",
|
|
" 'n/a'} - `));\n",
|
|
" const percent = span('0% done');\n",
|
|
" li.appendChild(percent);\n",
|
|
"\n",
|
|
" outputElement.appendChild(li);\n",
|
|
"\n",
|
|
" const fileDataPromise = new Promise((resolve) => {\n",
|
|
" const reader = new FileReader();\n",
|
|
" reader.onload = (e) => {\n",
|
|
" resolve(e.target.result);\n",
|
|
" };\n",
|
|
" reader.readAsArrayBuffer(file);\n",
|
|
" });\n",
|
|
" // Wait for the data to be ready.\n",
|
|
" let fileData = yield {\n",
|
|
" promise: fileDataPromise,\n",
|
|
" response: {\n",
|
|
" action: 'continue',\n",
|
|
" }\n",
|
|
" };\n",
|
|
"\n",
|
|
" // Use a chunked sending to avoid message size limits. See b/62115660.\n",
|
|
" let position = 0;\n",
|
|
" do {\n",
|
|
" const length = Math.min(fileData.byteLength - position, MAX_PAYLOAD_SIZE);\n",
|
|
" const chunk = new Uint8Array(fileData, position, length);\n",
|
|
" position += length;\n",
|
|
"\n",
|
|
" const base64 = btoa(String.fromCharCode.apply(null, chunk));\n",
|
|
" yield {\n",
|
|
" response: {\n",
|
|
" action: 'append',\n",
|
|
" file: file.name,\n",
|
|
" data: base64,\n",
|
|
" },\n",
|
|
" };\n",
|
|
"\n",
|
|
" let percentDone = fileData.byteLength === 0 ?\n",
|
|
" 100 :\n",
|
|
" Math.round((position / fileData.byteLength) * 100);\n",
|
|
" percent.textContent = `${percentDone}% done`;\n",
|
|
"\n",
|
|
" } while (position < fileData.byteLength);\n",
|
|
" }\n",
|
|
"\n",
|
|
" // All done.\n",
|
|
" yield {\n",
|
|
" response: {\n",
|
|
" action: 'complete',\n",
|
|
" }\n",
|
|
" };\n",
|
|
"}\n",
|
|
"\n",
|
|
"scope.google = scope.google || {};\n",
|
|
"scope.google.colab = scope.google.colab || {};\n",
|
|
"scope.google.colab._files = {\n",
|
|
" _uploadFiles,\n",
|
|
" _uploadFilesContinue,\n",
|
|
"};\n",
|
|
"})(self);\n",
|
|
"</script> "
|
|
]
|
|
},
|
|
"metadata": {}
|
|
},
|
|
{
|
|
"output_type": "stream",
|
|
"name": "stdout",
|
|
"text": [
|
|
"Saving data.csv to data.csv\n"
|
|
]
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"id": "69Kx6TycwzHo"
|
|
},
|
|
"source": [
|
|
"### *Load Dataset*"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {
|
|
"id": "7aXWQK9Cw7Dz"
|
|
},
|
|
"source": [
|
|
"dataset = pd.read_csv('data.csv')"
|
|
],
|
|
"execution_count": 3,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"id": "huZ5AaCtxD0p"
|
|
},
|
|
"source": [
|
|
"### *Summarize Dataset*"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {
|
|
"id": "SmD4EDrkxFzq",
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/"
|
|
},
|
|
"outputId": "c8de07bb-c31d-45b6-d7f1-f37549d27d5f"
|
|
},
|
|
"source": [
|
|
"print(dataset.shape)\n",
|
|
"print(dataset.head(5))"
|
|
],
|
|
"execution_count": 4,
|
|
"outputs": [
|
|
{
|
|
"output_type": "stream",
|
|
"name": "stdout",
|
|
"text": [
|
|
"(569, 33)\n",
|
|
" id diagnosis radius_mean texture_mean perimeter_mean area_mean \\\n",
|
|
"0 842302 M 17.99 10.38 122.80 1001.0 \n",
|
|
"1 842517 M 20.57 17.77 132.90 1326.0 \n",
|
|
"2 84300903 M 19.69 21.25 130.00 1203.0 \n",
|
|
"3 84348301 M 11.42 20.38 77.58 386.1 \n",
|
|
"4 84358402 M 20.29 14.34 135.10 1297.0 \n",
|
|
"\n",
|
|
" smoothness_mean compactness_mean concavity_mean concave points_mean \\\n",
|
|
"0 0.11840 0.27760 0.3001 0.14710 \n",
|
|
"1 0.08474 0.07864 0.0869 0.07017 \n",
|
|
"2 0.10960 0.15990 0.1974 0.12790 \n",
|
|
"3 0.14250 0.28390 0.2414 0.10520 \n",
|
|
"4 0.10030 0.13280 0.1980 0.10430 \n",
|
|
"\n",
|
|
" ... texture_worst perimeter_worst area_worst smoothness_worst \\\n",
|
|
"0 ... 17.33 184.60 2019.0 0.1622 \n",
|
|
"1 ... 23.41 158.80 1956.0 0.1238 \n",
|
|
"2 ... 25.53 152.50 1709.0 0.1444 \n",
|
|
"3 ... 26.50 98.87 567.7 0.2098 \n",
|
|
"4 ... 16.67 152.20 1575.0 0.1374 \n",
|
|
"\n",
|
|
" compactness_worst concavity_worst concave points_worst symmetry_worst \\\n",
|
|
"0 0.6656 0.7119 0.2654 0.4601 \n",
|
|
"1 0.1866 0.2416 0.1860 0.2750 \n",
|
|
"2 0.4245 0.4504 0.2430 0.3613 \n",
|
|
"3 0.8663 0.6869 0.2575 0.6638 \n",
|
|
"4 0.2050 0.4000 0.1625 0.2364 \n",
|
|
"\n",
|
|
" fractal_dimension_worst Unnamed: 32 \n",
|
|
"0 0.11890 NaN \n",
|
|
"1 0.08902 NaN \n",
|
|
"2 0.08758 NaN \n",
|
|
"3 0.17300 NaN \n",
|
|
"4 0.07678 NaN \n",
|
|
"\n",
|
|
"[5 rows x 33 columns]\n"
|
|
]
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"id": "-MFwV-9k3Tu1"
|
|
},
|
|
"source": [
|
|
"### *Mapping Class String Values to Numbers*"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {
|
|
"id": "yJvwzEfF3ZMe",
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/"
|
|
},
|
|
"outputId": "7138e1d5-0f52-4a8d-80e1-63f3280ea23f"
|
|
},
|
|
"source": [
|
|
"dataset['diagnosis'] = dataset['diagnosis'].map({'B': 0, 'M': 1}).astype(int)\n",
|
|
"print(dataset.head)"
|
|
],
|
|
"execution_count": 5,
|
|
"outputs": [
|
|
{
|
|
"output_type": "stream",
|
|
"name": "stdout",
|
|
"text": [
|
|
"<bound method NDFrame.head of id diagnosis radius_mean texture_mean perimeter_mean \\\n",
|
|
"0 842302 1 17.99 10.38 122.80 \n",
|
|
"1 842517 1 20.57 17.77 132.90 \n",
|
|
"2 84300903 1 19.69 21.25 130.00 \n",
|
|
"3 84348301 1 11.42 20.38 77.58 \n",
|
|
"4 84358402 1 20.29 14.34 135.10 \n",
|
|
".. ... ... ... ... ... \n",
|
|
"564 926424 1 21.56 22.39 142.00 \n",
|
|
"565 926682 1 20.13 28.25 131.20 \n",
|
|
"566 926954 1 16.60 28.08 108.30 \n",
|
|
"567 927241 1 20.60 29.33 140.10 \n",
|
|
"568 92751 0 7.76 24.54 47.92 \n",
|
|
"\n",
|
|
" area_mean smoothness_mean compactness_mean concavity_mean \\\n",
|
|
"0 1001.0 0.11840 0.27760 0.30010 \n",
|
|
"1 1326.0 0.08474 0.07864 0.08690 \n",
|
|
"2 1203.0 0.10960 0.15990 0.19740 \n",
|
|
"3 386.1 0.14250 0.28390 0.24140 \n",
|
|
"4 1297.0 0.10030 0.13280 0.19800 \n",
|
|
".. ... ... ... ... \n",
|
|
"564 1479.0 0.11100 0.11590 0.24390 \n",
|
|
"565 1261.0 0.09780 0.10340 0.14400 \n",
|
|
"566 858.1 0.08455 0.10230 0.09251 \n",
|
|
"567 1265.0 0.11780 0.27700 0.35140 \n",
|
|
"568 181.0 0.05263 0.04362 0.00000 \n",
|
|
"\n",
|
|
" concave points_mean ... texture_worst perimeter_worst area_worst \\\n",
|
|
"0 0.14710 ... 17.33 184.60 2019.0 \n",
|
|
"1 0.07017 ... 23.41 158.80 1956.0 \n",
|
|
"2 0.12790 ... 25.53 152.50 1709.0 \n",
|
|
"3 0.10520 ... 26.50 98.87 567.7 \n",
|
|
"4 0.10430 ... 16.67 152.20 1575.0 \n",
|
|
".. ... ... ... ... ... \n",
|
|
"564 0.13890 ... 26.40 166.10 2027.0 \n",
|
|
"565 0.09791 ... 38.25 155.00 1731.0 \n",
|
|
"566 0.05302 ... 34.12 126.70 1124.0 \n",
|
|
"567 0.15200 ... 39.42 184.60 1821.0 \n",
|
|
"568 0.00000 ... 30.37 59.16 268.6 \n",
|
|
"\n",
|
|
" smoothness_worst compactness_worst concavity_worst \\\n",
|
|
"0 0.16220 0.66560 0.7119 \n",
|
|
"1 0.12380 0.18660 0.2416 \n",
|
|
"2 0.14440 0.42450 0.4504 \n",
|
|
"3 0.20980 0.86630 0.6869 \n",
|
|
"4 0.13740 0.20500 0.4000 \n",
|
|
".. ... ... ... \n",
|
|
"564 0.14100 0.21130 0.4107 \n",
|
|
"565 0.11660 0.19220 0.3215 \n",
|
|
"566 0.11390 0.30940 0.3403 \n",
|
|
"567 0.16500 0.86810 0.9387 \n",
|
|
"568 0.08996 0.06444 0.0000 \n",
|
|
"\n",
|
|
" concave points_worst symmetry_worst fractal_dimension_worst \\\n",
|
|
"0 0.2654 0.4601 0.11890 \n",
|
|
"1 0.1860 0.2750 0.08902 \n",
|
|
"2 0.2430 0.3613 0.08758 \n",
|
|
"3 0.2575 0.6638 0.17300 \n",
|
|
"4 0.1625 0.2364 0.07678 \n",
|
|
".. ... ... ... \n",
|
|
"564 0.2216 0.2060 0.07115 \n",
|
|
"565 0.1628 0.2572 0.06637 \n",
|
|
"566 0.1418 0.2218 0.07820 \n",
|
|
"567 0.2650 0.4087 0.12400 \n",
|
|
"568 0.0000 0.2871 0.07039 \n",
|
|
"\n",
|
|
" Unnamed: 32 \n",
|
|
"0 NaN \n",
|
|
"1 NaN \n",
|
|
"2 NaN \n",
|
|
"3 NaN \n",
|
|
"4 NaN \n",
|
|
".. ... \n",
|
|
"564 NaN \n",
|
|
"565 NaN \n",
|
|
"566 NaN \n",
|
|
"567 NaN \n",
|
|
"568 NaN \n",
|
|
"\n",
|
|
"[569 rows x 33 columns]>\n"
|
|
]
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"id": "0qgR6rGRxH5y"
|
|
},
|
|
"source": [
|
|
"### *Segregate Dataset into X(Input/IndependentVariable) & Y(Output/DependentVariable)*"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {
|
|
"id": "8qOVIILpxefB",
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/"
|
|
},
|
|
"outputId": "a50556fe-21af-46eb-ff7e-056be4e2d174"
|
|
},
|
|
"source": [
|
|
"X = dataset.iloc[:, 2:32].values\n",
|
|
"X"
|
|
],
|
|
"execution_count": 6,
|
|
"outputs": [
|
|
{
|
|
"output_type": "execute_result",
|
|
"data": {
|
|
"text/plain": [
|
|
"array([[1.799e+01, 1.038e+01, 1.228e+02, ..., 2.654e-01, 4.601e-01,\n",
|
|
" 1.189e-01],\n",
|
|
" [2.057e+01, 1.777e+01, 1.329e+02, ..., 1.860e-01, 2.750e-01,\n",
|
|
" 8.902e-02],\n",
|
|
" [1.969e+01, 2.125e+01, 1.300e+02, ..., 2.430e-01, 3.613e-01,\n",
|
|
" 8.758e-02],\n",
|
|
" ...,\n",
|
|
" [1.660e+01, 2.808e+01, 1.083e+02, ..., 1.418e-01, 2.218e-01,\n",
|
|
" 7.820e-02],\n",
|
|
" [2.060e+01, 2.933e+01, 1.401e+02, ..., 2.650e-01, 4.087e-01,\n",
|
|
" 1.240e-01],\n",
|
|
" [7.760e+00, 2.454e+01, 4.792e+01, ..., 0.000e+00, 2.871e-01,\n",
|
|
" 7.039e-02]])"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"execution_count": 6
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {
|
|
"id": "TE6LNAwmxkBn",
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/"
|
|
},
|
|
"outputId": "93adc199-d960-4cdd-9d42-3ad978584020"
|
|
},
|
|
"source": [
|
|
"Y = dataset.iloc[:,1].values\n",
|
|
"Y"
|
|
],
|
|
"execution_count": 7,
|
|
"outputs": [
|
|
{
|
|
"output_type": "execute_result",
|
|
"data": {
|
|
"text/plain": [
|
|
"array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0,\n",
|
|
" 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,\n",
|
|
" 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1,\n",
|
|
" 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1,\n",
|
|
" 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0,\n",
|
|
" 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1,\n",
|
|
" 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,\n",
|
|
" 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0,\n",
|
|
" 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1,\n",
|
|
" 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1,\n",
|
|
" 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0,\n",
|
|
" 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
|
|
" 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0,\n",
|
|
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0,\n",
|
|
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1,\n",
|
|
" 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,\n",
|
|
" 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1,\n",
|
|
" 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0,\n",
|
|
" 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1,\n",
|
|
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0,\n",
|
|
" 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,\n",
|
|
" 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,\n",
|
|
" 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0,\n",
|
|
" 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,\n",
|
|
" 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
|
|
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0])"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"execution_count": 7
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"id": "oOzExtMjxmup"
|
|
},
|
|
"source": [
|
|
"### *Splitting Dataset into Train & Test*"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {
|
|
"id": "uJXcK2PHxqJ9"
|
|
},
|
|
"source": [
|
|
"from sklearn.model_selection import train_test_split\n",
|
|
"X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = 0.25, random_state = 0)"
|
|
],
|
|
"execution_count": 8,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"id": "89y8rh-3yv15"
|
|
},
|
|
"source": [
|
|
"### *Feature Scaling*\n",
|
|
"### we scale our data to make all the features contribute equally to the result\n",
|
|
"###Fit_Transform - fit method is calculating the mean and variance of each of the features present in our data\n",
|
|
"###Transform - Transform method is transforming all the features using the respective mean and variance,\n",
|
|
"###We want our test data to be a completely new and a surprise set for our model"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {
|
|
"id": "ehsC_5BSy-Pa"
|
|
},
|
|
"source": [
|
|
"from sklearn.preprocessing import StandardScaler\n",
|
|
"sc = StandardScaler()\n",
|
|
"X_train = sc.fit_transform(X_train)\n",
|
|
"X_test = sc.transform(X_test)"
|
|
],
|
|
"execution_count": 9,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"id": "X1gsZ3YZ51gz"
|
|
},
|
|
"source": [
|
|
"### *Validating some ML algorithm by its accuracy - Model Score*"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {
|
|
"id": "ekrjJPx_5-rJ"
|
|
},
|
|
"source": [
|
|
"from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n",
|
|
"from sklearn.linear_model import LogisticRegression\n",
|
|
"from sklearn.tree import DecisionTreeClassifier\n",
|
|
"from sklearn.neighbors import KNeighborsClassifier\n",
|
|
"from sklearn.naive_bayes import GaussianNB\n",
|
|
"from sklearn.svm import SVC\n",
|
|
"\n",
|
|
"from sklearn.model_selection import cross_val_score\n",
|
|
"from sklearn.model_selection import StratifiedKFold"
|
|
],
|
|
"execution_count": 10,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {
|
|
"id": "7fmWEBDq6fUM"
|
|
},
|
|
"source": [
|
|
"models = []\n",
|
|
"models.append(('LR', LogisticRegression(solver='liblinear', multi_class='ovr')))\n",
|
|
"models.append(('LDA', LinearDiscriminantAnalysis()))\n",
|
|
"models.append(('KNN', KNeighborsClassifier()))\n",
|
|
"models.append(('CART', DecisionTreeClassifier()))\n",
|
|
"models.append(('NB', GaussianNB()))\n",
|
|
"models.append(('SVM', SVC(gamma='auto')))"
|
|
],
|
|
"execution_count": 11,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {
|
|
"id": "r0dYFpqw6iXs",
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 923
|
|
},
|
|
"outputId": "c7d83a67-7567-499b-b99c-3792b5c0f9c5"
|
|
},
|
|
"source": [
|
|
"results = []\n",
|
|
"names = []\n",
|
|
"res = []\n",
|
|
"for name, model in models:\n",
|
|
" kfold = StratifiedKFold(n_splits=10, random_state=None)\n",
|
|
" cv_results = cross_val_score(model, X_train, y_train, cv=kfold, scoring='accuracy')\n",
|
|
" results.append(cv_results)\n",
|
|
" names.append(name)\n",
|
|
" res.append(cv_results.mean())\n",
|
|
" print('%s: %f' % (name, cv_results.mean()))\n",
|
|
"\n",
|
|
"pyplot.ylim(.900, .999)\n",
|
|
"pyplot.bar(names, res, color ='maroon', width = 0.6)\n",
|
|
"\n",
|
|
"pyplot.title('Algorithm Comparison')\n",
|
|
"pyplot.show()"
|
|
],
|
|
"execution_count": 12,
|
|
"outputs": [
|
|
{
|
|
"output_type": "stream",
|
|
"name": "stderr",
|
|
"text": [
|
|
"/usr/local/lib/python3.11/dist-packages/sklearn/linear_model/_logistic.py:1256: FutureWarning: 'multi_class' was deprecated in version 1.5 and will be removed in 1.7. Use OneVsRestClassifier(LogisticRegression(..)) instead. Leave it to its default value to avoid this warning.\n",
|
|
" warnings.warn(\n",
|
|
"/usr/local/lib/python3.11/dist-packages/sklearn/linear_model/_logistic.py:1256: FutureWarning: 'multi_class' was deprecated in version 1.5 and will be removed in 1.7. Use OneVsRestClassifier(LogisticRegression(..)) instead. Leave it to its default value to avoid this warning.\n",
|
|
" warnings.warn(\n",
|
|
"/usr/local/lib/python3.11/dist-packages/sklearn/linear_model/_logistic.py:1256: FutureWarning: 'multi_class' was deprecated in version 1.5 and will be removed in 1.7. Use OneVsRestClassifier(LogisticRegression(..)) instead. Leave it to its default value to avoid this warning.\n",
|
|
" warnings.warn(\n",
|
|
"/usr/local/lib/python3.11/dist-packages/sklearn/linear_model/_logistic.py:1256: FutureWarning: 'multi_class' was deprecated in version 1.5 and will be removed in 1.7. Use OneVsRestClassifier(LogisticRegression(..)) instead. Leave it to its default value to avoid this warning.\n",
|
|
" warnings.warn(\n",
|
|
"/usr/local/lib/python3.11/dist-packages/sklearn/linear_model/_logistic.py:1256: FutureWarning: 'multi_class' was deprecated in version 1.5 and will be removed in 1.7. Use OneVsRestClassifier(LogisticRegression(..)) instead. Leave it to its default value to avoid this warning.\n",
|
|
" warnings.warn(\n",
|
|
"/usr/local/lib/python3.11/dist-packages/sklearn/linear_model/_logistic.py:1256: FutureWarning: 'multi_class' was deprecated in version 1.5 and will be removed in 1.7. Use OneVsRestClassifier(LogisticRegression(..)) instead. Leave it to its default value to avoid this warning.\n",
|
|
" warnings.warn(\n",
|
|
"/usr/local/lib/python3.11/dist-packages/sklearn/linear_model/_logistic.py:1256: FutureWarning: 'multi_class' was deprecated in version 1.5 and will be removed in 1.7. Use OneVsRestClassifier(LogisticRegression(..)) instead. Leave it to its default value to avoid this warning.\n",
|
|
" warnings.warn(\n",
|
|
"/usr/local/lib/python3.11/dist-packages/sklearn/linear_model/_logistic.py:1256: FutureWarning: 'multi_class' was deprecated in version 1.5 and will be removed in 1.7. Use OneVsRestClassifier(LogisticRegression(..)) instead. Leave it to its default value to avoid this warning.\n",
|
|
" warnings.warn(\n",
|
|
"/usr/local/lib/python3.11/dist-packages/sklearn/linear_model/_logistic.py:1256: FutureWarning: 'multi_class' was deprecated in version 1.5 and will be removed in 1.7. Use OneVsRestClassifier(LogisticRegression(..)) instead. Leave it to its default value to avoid this warning.\n",
|
|
" warnings.warn(\n",
|
|
"/usr/local/lib/python3.11/dist-packages/sklearn/linear_model/_logistic.py:1256: FutureWarning: 'multi_class' was deprecated in version 1.5 and will be removed in 1.7. Use OneVsRestClassifier(LogisticRegression(..)) instead. Leave it to its default value to avoid this warning.\n",
|
|
" warnings.warn(\n"
|
|
]
|
|
},
|
|
{
|
|
"output_type": "stream",
|
|
"name": "stdout",
|
|
"text": [
|
|
"LR: 0.981285\n",
|
|
"LDA: 0.957863\n",
|
|
"KNN: 0.964839\n",
|
|
"CART: 0.929568\n",
|
|
"NB: 0.941417\n",
|
|
"SVM: 0.979014\n"
|
|
]
|
|
},
|
|
{
|
|
"output_type": "display_data",
|
|
"data": {
|
|
"text/plain": [
|
|
"<Figure size 640x480 with 1 Axes>"
|
|
],
|
|
"image/png": "\n"
|
|
},
|
|
"metadata": {}
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"id": "ospEktZ3_KgQ"
|
|
},
|
|
"source": [
|
|
"### *Training & Prediction using the algorithm with high accuracy*"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {
|
|
"id": "zbi3Uvd0_Yn7",
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/"
|
|
},
|
|
"outputId": "c3532d24-4239-4945-daee-c3d5ba95fcda"
|
|
},
|
|
"source": [
|
|
"from sklearn.svm import SVC\n",
|
|
"model.fit(X_train, y_train)\n",
|
|
"y_pred = model.predict(X_test)\n",
|
|
"print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))"
|
|
],
|
|
"execution_count": 13,
|
|
"outputs": [
|
|
{
|
|
"output_type": "stream",
|
|
"name": "stdout",
|
|
"text": [
|
|
"[[1 1]\n",
|
|
" [0 0]\n",
|
|
" [0 0]\n",
|
|
" [0 0]\n",
|
|
" [0 0]\n",
|
|
" [0 0]\n",
|
|
" [0 0]\n",
|
|
" [0 0]\n",
|
|
" [0 0]\n",
|
|
" [0 0]\n",
|
|
" [0 0]\n",
|
|
" [0 0]\n",
|
|
" [0 0]\n",
|
|
" [1 0]\n",
|
|
" [0 0]\n",
|
|
" [1 1]\n",
|
|
" [0 0]\n",
|
|
" [1 1]\n",
|
|
" [1 1]\n",
|
|
" [1 1]\n",
|
|
" [1 1]\n",
|
|
" [1 1]\n",
|
|
" [0 0]\n",
|
|
" [0 0]\n",
|
|
" [1 1]\n",
|
|
" [0 0]\n",
|
|
" [0 0]\n",
|
|
" [1 1]\n",
|
|
" [0 0]\n",
|
|
" [1 1]\n",
|
|
" [0 0]\n",
|
|
" [1 1]\n",
|
|
" [0 0]\n",
|
|
" [1 1]\n",
|
|
" [0 0]\n",
|
|
" [1 1]\n",
|
|
" [0 0]\n",
|
|
" [1 1]\n",
|
|
" [0 0]\n",
|
|
" [1 1]\n",
|
|
" [1 1]\n",
|
|
" [0 0]\n",
|
|
" [1 1]\n",
|
|
" [0 0]\n",
|
|
" [0 0]\n",
|
|
" [1 1]\n",
|
|
" [0 0]\n",
|
|
" [0 0]\n",
|
|
" [0 0]\n",
|
|
" [1 1]\n",
|
|
" [1 1]\n",
|
|
" [1 1]\n",
|
|
" [1 1]\n",
|
|
" [0 0]\n",
|
|
" [0 0]\n",
|
|
" [0 0]\n",
|
|
" [0 0]\n",
|
|
" [0 0]\n",
|
|
" [0 0]\n",
|
|
" [1 1]\n",
|
|
" [1 1]\n",
|
|
" [1 1]\n",
|
|
" [0 0]\n",
|
|
" [0 0]\n",
|
|
" [1 1]\n",
|
|
" [0 0]\n",
|
|
" [1 1]\n",
|
|
" [1 1]\n",
|
|
" [1 1]\n",
|
|
" [0 0]\n",
|
|
" [0 0]\n",
|
|
" [1 1]\n",
|
|
" [0 0]\n",
|
|
" [1 1]\n",
|
|
" [1 1]\n",
|
|
" [0 0]\n",
|
|
" [0 0]\n",
|
|
" [0 0]\n",
|
|
" [0 0]\n",
|
|
" [0 0]\n",
|
|
" [1 1]\n",
|
|
" [1 1]\n",
|
|
" [1 1]\n",
|
|
" [0 0]\n",
|
|
" [1 1]\n",
|
|
" [0 0]\n",
|
|
" [0 0]\n",
|
|
" [0 0]\n",
|
|
" [1 1]\n",
|
|
" [1 1]\n",
|
|
" [0 0]\n",
|
|
" [0 1]\n",
|
|
" [1 0]\n",
|
|
" [1 1]\n",
|
|
" [0 0]\n",
|
|
" [0 0]\n",
|
|
" [1 1]\n",
|
|
" [0 0]\n",
|
|
" [0 0]\n",
|
|
" [0 0]\n",
|
|
" [0 0]\n",
|
|
" [0 0]\n",
|
|
" [0 0]\n",
|
|
" [0 0]\n",
|
|
" [1 1]\n",
|
|
" [0 0]\n",
|
|
" [1 1]\n",
|
|
" [0 0]\n",
|
|
" [0 1]\n",
|
|
" [1 1]\n",
|
|
" [0 0]\n",
|
|
" [1 1]\n",
|
|
" [1 1]\n",
|
|
" [0 0]\n",
|
|
" [0 0]\n",
|
|
" [0 0]\n",
|
|
" [0 0]\n",
|
|
" [0 0]\n",
|
|
" [0 0]\n",
|
|
" [0 0]\n",
|
|
" [0 0]\n",
|
|
" [0 0]\n",
|
|
" [1 1]\n",
|
|
" [0 0]\n",
|
|
" [1 1]\n",
|
|
" [0 0]\n",
|
|
" [0 0]\n",
|
|
" [0 0]\n",
|
|
" [0 0]\n",
|
|
" [0 0]\n",
|
|
" [1 1]\n",
|
|
" [0 0]\n",
|
|
" [0 0]\n",
|
|
" [0 0]\n",
|
|
" [0 0]\n",
|
|
" [0 0]\n",
|
|
" [0 0]\n",
|
|
" [0 1]\n",
|
|
" [1 1]\n",
|
|
" [0 0]\n",
|
|
" [0 0]\n",
|
|
" [0 0]\n",
|
|
" [1 1]]\n"
|
|
]
|
|
}
|
|
]
|
|
}
|
|
]
|
|
} |