DeepSeek-Coder/SalaryEstimation_K_NN.ipynb

1156 lines
86 KiB
Plaintext
Raw Normal View History

2025-02-04 03:35:13 -05:00
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/github/Orrm23/DeepSeek-Coder/blob/main/SalaryEstimation_K_NN.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "6pk_INoSKj1N"
},
"source": [
"# Day-4 | Salary Estimation | K-NEAREST NEIGHBOUR model"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "7OGtDD8zKwUz"
},
"source": [
"### *Importing Libraries*"
]
},
{
"cell_type": "code",
"metadata": {
"id": "7YBAsJmVKc_B"
},
"source": [
"import pandas as pd #useful for loading the dataset\n",
"import numpy as np #to perform array"
],
"execution_count": 1,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "rVLDLs-lK3xo"
},
"source": [
"### *Choose Dataset from Local Directory*"
]
},
{
"cell_type": "code",
"metadata": {
"id": "XA2ITub0K9k2",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 73
},
"outputId": "02acd90f-0908-4b97-85b4-bf701901c3f6"
},
"source": [
"from google.colab import files\n",
"uploaded = files.upload()"
],
"execution_count": 2,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
"<IPython.core.display.HTML object>"
],
"text/html": [
"\n",
" <input type=\"file\" id=\"files-10af51ff-93ce-47d8-89ae-45b5a3fa1cf9\" name=\"files[]\" multiple disabled\n",
" style=\"border:none\" />\n",
" <output id=\"result-10af51ff-93ce-47d8-89ae-45b5a3fa1cf9\">\n",
" Upload widget is only available when the cell has been executed in the\n",
" current browser session. Please rerun this cell to enable.\n",
" </output>\n",
" <script>// Copyright 2017 Google LLC\n",
"//\n",
"// Licensed under the Apache License, Version 2.0 (the \"License\");\n",
"// you may not use this file except in compliance with the License.\n",
"// You may obtain a copy of the License at\n",
"//\n",
"// http://www.apache.org/licenses/LICENSE-2.0\n",
"//\n",
"// Unless required by applicable law or agreed to in writing, software\n",
"// distributed under the License is distributed on an \"AS IS\" BASIS,\n",
"// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
"// See the License for the specific language governing permissions and\n",
"// limitations under the License.\n",
"\n",
"/**\n",
" * @fileoverview Helpers for google.colab Python module.\n",
" */\n",
"(function(scope) {\n",
"function span(text, styleAttributes = {}) {\n",
" const element = document.createElement('span');\n",
" element.textContent = text;\n",
" for (const key of Object.keys(styleAttributes)) {\n",
" element.style[key] = styleAttributes[key];\n",
" }\n",
" return element;\n",
"}\n",
"\n",
"// Max number of bytes which will be uploaded at a time.\n",
"const MAX_PAYLOAD_SIZE = 100 * 1024;\n",
"\n",
"function _uploadFiles(inputId, outputId) {\n",
" const steps = uploadFilesStep(inputId, outputId);\n",
" const outputElement = document.getElementById(outputId);\n",
" // Cache steps on the outputElement to make it available for the next call\n",
" // to uploadFilesContinue from Python.\n",
" outputElement.steps = steps;\n",
"\n",
" return _uploadFilesContinue(outputId);\n",
"}\n",
"\n",
"// This is roughly an async generator (not supported in the browser yet),\n",
"// where there are multiple asynchronous steps and the Python side is going\n",
"// to poll for completion of each step.\n",
"// This uses a Promise to block the python side on completion of each step,\n",
"// then passes the result of the previous step as the input to the next step.\n",
"function _uploadFilesContinue(outputId) {\n",
" const outputElement = document.getElementById(outputId);\n",
" const steps = outputElement.steps;\n",
"\n",
" const next = steps.next(outputElement.lastPromiseValue);\n",
" return Promise.resolve(next.value.promise).then((value) => {\n",
" // Cache the last promise value to make it available to the next\n",
" // step of the generator.\n",
" outputElement.lastPromiseValue = value;\n",
" return next.value.response;\n",
" });\n",
"}\n",
"\n",
"/**\n",
" * Generator function which is called between each async step of the upload\n",
" * process.\n",
" * @param {string} inputId Element ID of the input file picker element.\n",
" * @param {string} outputId Element ID of the output display.\n",
" * @return {!Iterable<!Object>} Iterable of next steps.\n",
" */\n",
"function* uploadFilesStep(inputId, outputId) {\n",
" const inputElement = document.getElementById(inputId);\n",
" inputElement.disabled = false;\n",
"\n",
" const outputElement = document.getElementById(outputId);\n",
" outputElement.innerHTML = '';\n",
"\n",
" const pickedPromise = new Promise((resolve) => {\n",
" inputElement.addEventListener('change', (e) => {\n",
" resolve(e.target.files);\n",
" });\n",
" });\n",
"\n",
" const cancel = document.createElement('button');\n",
" inputElement.parentElement.appendChild(cancel);\n",
" cancel.textContent = 'Cancel upload';\n",
" const cancelPromise = new Promise((resolve) => {\n",
" cancel.onclick = () => {\n",
" resolve(null);\n",
" };\n",
" });\n",
"\n",
" // Wait for the user to pick the files.\n",
" const files = yield {\n",
" promise: Promise.race([pickedPromise, cancelPromise]),\n",
" response: {\n",
" action: 'starting',\n",
" }\n",
" };\n",
"\n",
" cancel.remove();\n",
"\n",
" // Disable the input element since further picks are not allowed.\n",
" inputElement.disabled = true;\n",
"\n",
" if (!files) {\n",
" return {\n",
" response: {\n",
" action: 'complete',\n",
" }\n",
" };\n",
" }\n",
"\n",
" for (const file of files) {\n",
" const li = document.createElement('li');\n",
" li.append(span(file.name, {fontWeight: 'bold'}));\n",
" li.append(span(\n",
" `(${file.type || 'n/a'}) - ${file.size} bytes, ` +\n",
" `last modified: ${\n",
" file.lastModifiedDate ? file.lastModifiedDate.toLocaleDateString() :\n",
" 'n/a'} - `));\n",
" const percent = span('0% done');\n",
" li.appendChild(percent);\n",
"\n",
" outputElement.appendChild(li);\n",
"\n",
" const fileDataPromise = new Promise((resolve) => {\n",
" const reader = new FileReader();\n",
" reader.onload = (e) => {\n",
" resolve(e.target.result);\n",
" };\n",
" reader.readAsArrayBuffer(file);\n",
" });\n",
" // Wait for the data to be ready.\n",
" let fileData = yield {\n",
" promise: fileDataPromise,\n",
" response: {\n",
" action: 'continue',\n",
" }\n",
" };\n",
"\n",
" // Use a chunked sending to avoid message size limits. See b/62115660.\n",
" let position = 0;\n",
" do {\n",
" const length = Math.min(fileData.byteLength - position, MAX_PAYLOAD_SIZE);\n",
" const chunk = new Uint8Array(fileData, position, length);\n",
" position += length;\n",
"\n",
" const base64 = btoa(String.fromCharCode.apply(null, chunk));\n",
" yield {\n",
" response: {\n",
" action: 'append',\n",
" file: file.name,\n",
" data: base64,\n",
" },\n",
" };\n",
"\n",
" let percentDone = fileData.byteLength === 0 ?\n",
" 100 :\n",
" Math.round((position / fileData.byteLength) * 100);\n",
" percent.textContent = `${percentDone}% done`;\n",
"\n",
" } while (position < fileData.byteLength);\n",
" }\n",
"\n",
" // All done.\n",
" yield {\n",
" response: {\n",
" action: 'complete',\n",
" }\n",
" };\n",
"}\n",
"\n",
"scope.google = scope.google || {};\n",
"scope.google.colab = scope.google.colab || {};\n",
"scope.google.colab._files = {\n",
" _uploadFiles,\n",
" _uploadFilesContinue,\n",
"};\n",
"})(self);\n",
"</script> "
]
},
"metadata": {}
},
{
"output_type": "stream",
"name": "stdout",
"text": [
"Saving salary.csv to salary.csv\n"
]
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "2VV6yeOfK_vp"
},
"source": [
"### *Load Dataset*"
]
},
{
"cell_type": "code",
"metadata": {
"id": "Ij-0EywfLN3t"
},
"source": [
"dataset = pd.read_csv('salary.csv')"
],
"execution_count": 3,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "7QZKjhQiLd0_"
},
"source": [
"### *Summarize Dataset*\n"
]
},
{
"cell_type": "code",
"metadata": {
"id": "Bn4Vn37VLsZX",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "3c5f2ba0-35fd-4859-9c20-777593dc00df"
},
"source": [
"print(dataset.shape)\n",
"print(dataset.head(5))"
],
"execution_count": 4,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"(32561, 5)\n",
" age education.num capital.gain hours.per.week income\n",
"0 90 9 0 40 <=50K\n",
"1 82 9 0 18 <=50K\n",
"2 66 10 0 40 <=50K\n",
"3 54 4 0 40 <=50K\n",
"4 41 10 0 40 <=50K\n"
]
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "PSCVm2nuMmEa"
},
"source": [
"### *Mapping Salary Data to Binary Value*"
]
},
{
"cell_type": "code",
"metadata": {
"id": "NHJs-2U0M3CV",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "ba91dd2d-2c05-4d58-d180-86635bc29685"
},
"source": [
"income_set = set(dataset['income'])\n",
"dataset['income'] = dataset['income'].map({'<=50K': 0, '>50K': 1}).astype(int)\n",
"print(dataset.head)"
],
"execution_count": 5,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"<bound method NDFrame.head of age education.num capital.gain hours.per.week income\n",
"0 90 9 0 40 0\n",
"1 82 9 0 18 0\n",
"2 66 10 0 40 0\n",
"3 54 4 0 40 0\n",
"4 41 10 0 40 0\n",
"... ... ... ... ... ...\n",
"32556 22 10 0 40 0\n",
"32557 27 12 0 38 0\n",
"32558 40 9 0 40 1\n",
"32559 58 9 0 40 0\n",
"32560 22 9 0 20 0\n",
"\n",
"[32561 rows x 5 columns]>\n"
]
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "cPyEdPblNyrw"
},
"source": [
"### *Segregate Dataset into X(Input/IndependentVariable) & Y(Output/DependentVariable)*"
]
},
{
"cell_type": "code",
"metadata": {
"id": "LKL0-37RNz0v",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "a714a09a-8f1b-4de0-de9a-f7f5c8461207"
},
"source": [
"X = dataset.iloc[:, :-1].values\n",
"X"
],
"execution_count": 6,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"array([[90, 9, 0, 40],\n",
" [82, 9, 0, 18],\n",
" [66, 10, 0, 40],\n",
" ...,\n",
" [40, 9, 0, 40],\n",
" [58, 9, 0, 40],\n",
" [22, 9, 0, 20]])"
]
},
"metadata": {},
"execution_count": 6
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "gYqJfw9CN379",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "85ddb52c-990f-4fc3-d1e2-6d3d75f77dda"
},
"source": [
"Y = dataset.iloc[:, -1].values\n",
"Y"
],
"execution_count": 7,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"array([0, 0, 0, ..., 1, 0, 0])"
]
},
"metadata": {},
"execution_count": 7
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "c9aiUBLqOCIP"
},
"source": [
"### *Splitting Dataset into Train & Test*"
]
},
{
"cell_type": "code",
"metadata": {
"id": "F-Xes7CFODNU"
},
"source": [
"from sklearn.model_selection import train_test_split\n",
"X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = 0.25, random_state = 0)"
],
"execution_count": 8,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "BI1S2PKoOLpf"
},
"source": [
"### *Feature Scaling*\n",
"### we scale our data to make all the features contribute equally to the result\n",
"###Fit_Transform - fit method is calculating the mean and variance of each of the features present in our data\n",
"###Transform - Transform method is transforming all the features using the respective mean and variance,\n",
"###We want our test data to be a completely new and a surprise set for our model"
]
},
{
"cell_type": "code",
"metadata": {
"id": "bHSNDh8zOMZl"
},
"source": [
"from sklearn.preprocessing import StandardScaler\n",
"sc = StandardScaler()\n",
"X_train = sc.fit_transform(X_train)\n",
"X_test = sc.transform(X_test)"
],
"execution_count": 9,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "tWUH3bnwYEfr"
},
"source": [
"### *Finding the Best K-Value*"
]
},
{
"cell_type": "code",
"metadata": {
"id": "WKES0Vn1YINO",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 581
},
"outputId": "207033dc-a2ff-4659-b63c-2ad3d1b0f7e9"
},
"source": [
"error = []\n",
"from sklearn.neighbors import KNeighborsClassifier\n",
"import matplotlib.pyplot as plt\n",
"\n",
"# Calculating error for K values between 1 and 40\n",
"for i in range(1, 40):\n",
" model = KNeighborsClassifier(n_neighbors=i)\n",
" model.fit(X_train, y_train)\n",
" pred_i = model.predict(X_test)\n",
" error.append(np.mean(pred_i != y_test))\n",
"\n",
"plt.figure(figsize=(12, 6))\n",
"plt.plot(range(1, 40), error, color='red', linestyle='dashed', marker='o',\n",
" markerfacecolor='blue', markersize=10)\n",
"plt.title('Error Rate K Value')\n",
"plt.xlabel('K Value')\n",
"plt.ylabel('Mean Error')"
],
"execution_count": 10,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"Text(0, 0.5, 'Mean Error')"
]
},
"metadata": {},
"execution_count": 10
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"<Figure size 1200x600 with 1 Axes>"
],
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA/IAAAIjCAYAAACgdyAGAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjAsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvlHJYcgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAe3BJREFUeJzt3Xl4VOXZx/HfZE/IQgiYsIdNXMEWhKLimhIQREpYq5IK7hZFxO1tFVutgLUWEMSKgmBFwBCVghIQAcXiBlLrhsheZJElCRBIJpnn/eOYhEASZpKZzJzJ93Ndc83kLM+5ZzmTuc+zOYwxRgAAAAAAwBZC/B0AAAAAAABwH4k8AAAAAAA2QiIPAAAAAICNkMgDAAAAAGAjJPIAAAAAANgIiTwAAAAAADZCIg8AAAAAgI2QyAMAAAAAYCMk8gAAAAAA2AiJPAAAQDVSU1P1u9/9zt9hAABQhkQeAAA3vfLKK3I4HFXePv74Y3+HWKnf/e53FeKMjIzU2Wefrccee0wnTpyoUZnffPONHn/8cW3fvt2rsa5evVoOh0NZWVkVlhcVFalfv34KCQnRrFmzKt332WeflcPh0HvvvVdl+TNnzpTD4dDixYu9GjcAAHUpzN8BAABgN3/+85/Vpk2b05a3b9/eD9G4JzIyUi+99JIkKS8vT2+//baeeOIJbdmyRa+99prH5X3zzTf605/+pCuvvFKpqalejrYip9OpQYMG6Z133tHMmTM1cuTISrcbNmyYHnjgAc2bN09paWmVbjNv3jwlJSWpT58+vgwZAACfIpEHAMBDffr0UdeuXT3ap7i4WC6XSxEREaetO3bsmBo0aFDjeIwxOnHihKKjo6vcJiwsTDfeeGPZ33fddZcuueQSvf7663r22WeVnJxc4+P7ktPp1JAhQ7RkyRL94x//0KhRo6rctlmzZrrqqquUnZ2tGTNmKDIyssL63bt364MPPtBtt92m8PBwX4cOAIDP0LQeAAAv2759uxwOh5555hlNnjxZ7dq1U2RkZFlzdIfDoW+++Ua//e1vlZiYqMsuu0ySlew/8cQTZdunpqbq//7v/1RYWFih/NTUVPXr1085OTnq2rWroqOj9Y9//MOjGB0Ohy677DIZY7R169ay5Tt27NBdd92ljh07Kjo6WklJSRo8eHCFJvSvvPKKBg8eLEm66qqryprsr169umybd999Vz179lSDBg0UFxenvn376uuvv/YoxuLiYg0bNkxvv/22ZsyYoVtvvfWM+9x4443Ky8vT0qVLT1s3f/58uVwu3XDDDZKkZ555RpdccomSkpIUHR2tLl26nNakvzKl7+GpSrtenNrdwBuvBQAAJ6NGHgAAD+Xl5enAgQMVljkcDiUlJVVYNnv2bJ04cUK33XabIiMj1ahRo7J1gwcPVocOHfTUU0/JGCNJuuWWWzRnzhwNGjRI999/vz755BNNmDBB3377rd58880KZW/atEnDhw/X7bffrltvvVUdO3b0+HmUJpyJiYllyz777DP9+9//1rBhw9SiRQtt375dM2bM0JVXXqlvvvlGMTExuvzyy3XPPfdo6tSp+r//+z+de+65klR2/+qrryozM1Pp6emaNGmSCgoKNGPGDF122WX64osv3GqKX1xcrOHDh+vNN9/U9OnTdfvtt7v1nAYOHKg777xT8+bN08CBAyusmzdvnlq3bq1LL71UkjRlyhT1799fN9xwg4qKijR//nwNHjxYS5YsUd++fd063pl447UAAOA0BgAAuGX27NlGUqW3yMjIsu22bdtmJJn4+Hizf//+CmWMHz/eSDLDhw+vsHzjxo1GkrnlllsqLB83bpyRZN5///2yZa1btzaSzLJly9yKOzMz0zRo0MD89NNP5qeffjI//PCDeeaZZ4zD4TAXXHCBcblcZdsWFBSctv+6deuMJDN37tyyZW+88YaRZFatWlVh2yNHjpiGDRuaW2+9tcLyvXv3moSEhNOWn2rVqlVGUtlznD59ulvP8WSDBw82UVFRJi8vr2zZd999ZySZRx55pGzZqc+1qKjIXHDBBebqq6+usLx169YmMzOz7O/S9/BUpZ+Pbdu2GWNq/1oAAFAVauQBAPDQ9OnTdfbZZ1dYFhoaetp2GRkZatKkSaVl3HHHHRX+fueddyRJY8eOrbD8/vvv1zPPPKOlS5fqqquuKlvepk0bpaenux3zsWPHTovlsssu05w5cyo0Ez+5n73T6VR+fr7at2+vhg0basOGDbrpppuqPc6KFSuUm5ur4cOHV2i1EBoaqu7du2vVqlVuxbtv3z6FhYVVOqjgmdx444164403lJ2dXTZt3Lx58ySprFm9VPG5Hj58WCUlJerZs6def/11j49ZGW+9FgAAnIpEHgAAD3Xr1s2twe6qS0JPXbdjxw6FhIScNvJ9SkqKGjZsqB07drhddmWioqL0r3/9S5L0v//9T08//bT2799/2gB5x48f14QJEzR79mzt3r27rNm/ZHUpOJPNmzdLkq6++upK18fHx7sV79NPP63Jkydr0KBBWr58eVlzeHf06dNHjRo10rx588oS+ddff12dO3fW+eefX7bdkiVL9OSTT2rjxo0VxiGorP97TXjrtQAA4FQk8gAA+Eh1o8hXtc7dJLK6sisTGhpaYUq29PR0nXPOObr99tsrzKk+evRozZ49W2PGjFGPHj2UkJAgh8OhYcOGyeVynfE4pdu8+uqrSklJOW19WJh7Pz2aNm2qFStW6LLLLlPfvn21Zs0ade7c2a19w8PDNWTIEM2cOVP79u3Tzp07tXnzZj399NNl23z44Yfq37+/Lr/8cj3//PNq2rSpwsPDNXv27LLa+6pU9R6VlJRU+NtbrwUAAKfiPwgAAAGgdevWcrlc2rx5c9mgcZLVxDw3N1etW7f26vGaNm2q++67T3/605/08ccf61e/+pUkKSsrS5mZmfrb3/5Wtu2JEyeUm5tbYf+qktl27dpJks4666wq53J3V9u2bZWTk6MrrrhC6enp+vDDD9WhQwe39r3hhhv0wgsvaMGCBdq2bZscDoeGDx9etn7RokWKiopSTk5OhWnqZs+efcaySwcHzM3NVcOGDcuWn9pqwpuvBQAAJ2P6OQAAAsC1114rSZo8eXKF5c8++6wkeW0U9ZONHj1aMTExmjhxYtmy0NDQCs3pJem55547rba5dN77UxP89PR0xcfH66mnnpLT6TztmD/99JNHMV544YVaunSpjh49ql//+tfavXu3W/tdeumlSk1N1T//+U8tWLBAV1xxhVq0aFG2PjQ0VA6Ho8Lz2r59u956660zll2aoH/wwQdly44dO6Y5c+ZU2M7brwUAAKWokQcAwEPvvvuuvvvuu9OWX3LJJWrbtm2NyuzcubMyMzP14osvKjc3V1dccYU+/fRTzZkzRwMGDKgw0J23JCUl6eabb9bzzz+vb7/9Vueee6769eunV199VQkJCTrvvPO0bt06vffee6dNrXfRRRcpNDRUkyZNUl5eniIjI3X11VfrrLPO0owZM3TTTTfpl7/8pYYNG6YmTZpo586dWrp0qS699FJNmzbNozh79Oih7OxsXXfddfr1r3+tDz/88LR4TuVwOPTb3/5WTz31lCTpz3/+c4X1ffv21bPPPqvevXvrt7/9rfbv36/p06erffv2+vLLL6stu1evXmrVqpVGjRqlBx54QKGhoZo1a1bZ8ywVHx/v9dcCAABJTD8HAIC7qpt+TpKZPXu2MaZ8+rm//vWvp5VROnXZTz/9dNo6p9Np/vSnP5k2bdqY8PBw07JlS/PII4+YEydOVNiudevWpm/fvm7HXTr9XGW2bNliQkNDy6ZXO3z4sLn55ptN48aNTWxsrElPTzfffffdaVOwGWPMzJkzTdu2bU1oaOhpU9GtWrXKpKenm4SEBBMVFWXatWtnfve735nPP/+82lhLp5974403Tlu3YMECExISYi6++GKTn59/xuf99ddfl00NePjw4dPWv/zyy6ZDhw4mMjLSnHPOOWb27NmVTi1X2XNfv3696d69u4mIiDCtWrUyzz777GnTz9X2tQAAo
},
"metadata": {}
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "j3i7m8V3OoXP"
},
"source": [
"### *Training*"
]
},
{
"cell_type": "code",
"metadata": {
"id": "GmrBKoYLOnIx",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 80
},
"outputId": "3512171c-3593-42d9-8cc8-d9ea70dd9927"
},
"source": [
"from sklearn.neighbors import KNeighborsClassifier\n",
"model = KNeighborsClassifier(n_neighbors = 2, metric = 'minkowski', p = 2)\n",
"model.fit(X_train, y_train)"
],
"execution_count": 11,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"KNeighborsClassifier(n_neighbors=2)"
],
"text/html": [
"<style>#sk-container-id-1 {\n",
" /* Definition of color scheme common for light and dark mode */\n",
" --sklearn-color-text: #000;\n",
" --sklearn-color-text-muted: #666;\n",
" --sklearn-color-line: gray;\n",
" /* Definition of color scheme for unfitted estimators */\n",
" --sklearn-color-unfitted-level-0: #fff5e6;\n",
" --sklearn-color-unfitted-level-1: #f6e4d2;\n",
" --sklearn-color-unfitted-level-2: #ffe0b3;\n",
" --sklearn-color-unfitted-level-3: chocolate;\n",
" /* Definition of color scheme for fitted estimators */\n",
" --sklearn-color-fitted-level-0: #f0f8ff;\n",
" --sklearn-color-fitted-level-1: #d4ebff;\n",
" --sklearn-color-fitted-level-2: #b3dbfd;\n",
" --sklearn-color-fitted-level-3: cornflowerblue;\n",
"\n",
" /* Specific color for light theme */\n",
" --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
" --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
" --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
" --sklearn-color-icon: #696969;\n",
"\n",
" @media (prefers-color-scheme: dark) {\n",
" /* Redefinition of color scheme for dark theme */\n",
" --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
" --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
" --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
" --sklearn-color-icon: #878787;\n",
" }\n",
"}\n",
"\n",
"#sk-container-id-1 {\n",
" color: var(--sklearn-color-text);\n",
"}\n",
"\n",
"#sk-container-id-1 pre {\n",
" padding: 0;\n",
"}\n",
"\n",
"#sk-container-id-1 input.sk-hidden--visually {\n",
" border: 0;\n",
" clip: rect(1px 1px 1px 1px);\n",
" clip: rect(1px, 1px, 1px, 1px);\n",
" height: 1px;\n",
" margin: -1px;\n",
" overflow: hidden;\n",
" padding: 0;\n",
" position: absolute;\n",
" width: 1px;\n",
"}\n",
"\n",
"#sk-container-id-1 div.sk-dashed-wrapped {\n",
" border: 1px dashed var(--sklearn-color-line);\n",
" margin: 0 0.4em 0.5em 0.4em;\n",
" box-sizing: border-box;\n",
" padding-bottom: 0.4em;\n",
" background-color: var(--sklearn-color-background);\n",
"}\n",
"\n",
"#sk-container-id-1 div.sk-container {\n",
" /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
" but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
" so we also need the `!important` here to be able to override the\n",
" default hidden behavior on the sphinx rendered scikit-learn.org.\n",
" See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
" display: inline-block !important;\n",
" position: relative;\n",
"}\n",
"\n",
"#sk-container-id-1 div.sk-text-repr-fallback {\n",
" display: none;\n",
"}\n",
"\n",
"div.sk-parallel-item,\n",
"div.sk-serial,\n",
"div.sk-item {\n",
" /* draw centered vertical line to link estimators */\n",
" background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
" background-size: 2px 100%;\n",
" background-repeat: no-repeat;\n",
" background-position: center center;\n",
"}\n",
"\n",
"/* Parallel-specific style estimator block */\n",
"\n",
"#sk-container-id-1 div.sk-parallel-item::after {\n",
" content: \"\";\n",
" width: 100%;\n",
" border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
" flex-grow: 1;\n",
"}\n",
"\n",
"#sk-container-id-1 div.sk-parallel {\n",
" display: flex;\n",
" align-items: stretch;\n",
" justify-content: center;\n",
" background-color: var(--sklearn-color-background);\n",
" position: relative;\n",
"}\n",
"\n",
"#sk-container-id-1 div.sk-parallel-item {\n",
" display: flex;\n",
" flex-direction: column;\n",
"}\n",
"\n",
"#sk-container-id-1 div.sk-parallel-item:first-child::after {\n",
" align-self: flex-end;\n",
" width: 50%;\n",
"}\n",
"\n",
"#sk-container-id-1 div.sk-parallel-item:last-child::after {\n",
" align-self: flex-start;\n",
" width: 50%;\n",
"}\n",
"\n",
"#sk-container-id-1 div.sk-parallel-item:only-child::after {\n",
" width: 0;\n",
"}\n",
"\n",
"/* Serial-specific style estimator block */\n",
"\n",
"#sk-container-id-1 div.sk-serial {\n",
" display: flex;\n",
" flex-direction: column;\n",
" align-items: center;\n",
" background-color: var(--sklearn-color-background);\n",
" padding-right: 1em;\n",
" padding-left: 1em;\n",
"}\n",
"\n",
"\n",
"/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
"clickable and can be expanded/collapsed.\n",
"- Pipeline and ColumnTransformer use this feature and define the default style\n",
"- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
"*/\n",
"\n",
"/* Pipeline and ColumnTransformer style (default) */\n",
"\n",
"#sk-container-id-1 div.sk-toggleable {\n",
" /* Default theme specific background. It is overwritten whether we have a\n",
" specific estimator or a Pipeline/ColumnTransformer */\n",
" background-color: var(--sklearn-color-background);\n",
"}\n",
"\n",
"/* Toggleable label */\n",
"#sk-container-id-1 label.sk-toggleable__label {\n",
" cursor: pointer;\n",
" display: flex;\n",
" width: 100%;\n",
" margin-bottom: 0;\n",
" padding: 0.5em;\n",
" box-sizing: border-box;\n",
" text-align: center;\n",
" align-items: start;\n",
" justify-content: space-between;\n",
" gap: 0.5em;\n",
"}\n",
"\n",
"#sk-container-id-1 label.sk-toggleable__label .caption {\n",
" font-size: 0.6rem;\n",
" font-weight: lighter;\n",
" color: var(--sklearn-color-text-muted);\n",
"}\n",
"\n",
"#sk-container-id-1 label.sk-toggleable__label-arrow:before {\n",
" /* Arrow on the left of the label */\n",
" content: \"▸\";\n",
" float: left;\n",
" margin-right: 0.25em;\n",
" color: var(--sklearn-color-icon);\n",
"}\n",
"\n",
"#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {\n",
" color: var(--sklearn-color-text);\n",
"}\n",
"\n",
"/* Toggleable content - dropdown */\n",
"\n",
"#sk-container-id-1 div.sk-toggleable__content {\n",
" max-height: 0;\n",
" max-width: 0;\n",
" overflow: hidden;\n",
" text-align: left;\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-0);\n",
"}\n",
"\n",
"#sk-container-id-1 div.sk-toggleable__content.fitted {\n",
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-0);\n",
"}\n",
"\n",
"#sk-container-id-1 div.sk-toggleable__content pre {\n",
" margin: 0.2em;\n",
" border-radius: 0.25em;\n",
" color: var(--sklearn-color-text);\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-0);\n",
"}\n",
"\n",
"#sk-container-id-1 div.sk-toggleable__content.fitted pre {\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-fitted-level-0);\n",
"}\n",
"\n",
"#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
" /* Expand drop-down */\n",
" max-height: 200px;\n",
" max-width: 100%;\n",
" overflow: auto;\n",
"}\n",
"\n",
"#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
" content: \"▾\";\n",
"}\n",
"\n",
"/* Pipeline/ColumnTransformer-specific style */\n",
"\n",
"#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
" color: var(--sklearn-color-text);\n",
" background-color: var(--sklearn-color-unfitted-level-2);\n",
"}\n",
"\n",
"#sk-container-id-1 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
" background-color: var(--sklearn-color-fitted-level-2);\n",
"}\n",
"\n",
"/* Estimator-specific style */\n",
"\n",
"/* Colorize estimator box */\n",
"#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-2);\n",
"}\n",
"\n",
"#sk-container-id-1 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-2);\n",
"}\n",
"\n",
"#sk-container-id-1 div.sk-label label.sk-toggleable__label,\n",
"#sk-container-id-1 div.sk-label label {\n",
" /* The background is the default theme color */\n",
" color: var(--sklearn-color-text-on-default-background);\n",
"}\n",
"\n",
"/* On hover, darken the color of the background */\n",
"#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {\n",
" color: var(--sklearn-color-text);\n",
" background-color: var(--sklearn-color-unfitted-level-2);\n",
"}\n",
"\n",
"/* Label box, darken color on hover, fitted */\n",
"#sk-container-id-1 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
" color: var(--sklearn-color-text);\n",
" background-color: var(--sklearn-color-fitted-level-2);\n",
"}\n",
"\n",
"/* Estimator label */\n",
"\n",
"#sk-container-id-1 div.sk-label label {\n",
" font-family: monospace;\n",
" font-weight: bold;\n",
" display: inline-block;\n",
" line-height: 1.2em;\n",
"}\n",
"\n",
"#sk-container-id-1 div.sk-label-container {\n",
" text-align: center;\n",
"}\n",
"\n",
"/* Estimator-specific */\n",
"#sk-container-id-1 div.sk-estimator {\n",
" font-family: monospace;\n",
" border: 1px dotted var(--sklearn-color-border-box);\n",
" border-radius: 0.25em;\n",
" box-sizing: border-box;\n",
" margin-bottom: 0.5em;\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-0);\n",
"}\n",
"\n",
"#sk-container-id-1 div.sk-estimator.fitted {\n",
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-0);\n",
"}\n",
"\n",
"/* on hover */\n",
"#sk-container-id-1 div.sk-estimator:hover {\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-2);\n",
"}\n",
"\n",
"#sk-container-id-1 div.sk-estimator.fitted:hover {\n",
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-2);\n",
"}\n",
"\n",
"/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
"\n",
"/* Common style for \"i\" and \"?\" */\n",
"\n",
".sk-estimator-doc-link,\n",
"a:link.sk-estimator-doc-link,\n",
"a:visited.sk-estimator-doc-link {\n",
" float: right;\n",
" font-size: smaller;\n",
" line-height: 1em;\n",
" font-family: monospace;\n",
" background-color: var(--sklearn-color-background);\n",
" border-radius: 1em;\n",
" height: 1em;\n",
" width: 1em;\n",
" text-decoration: none !important;\n",
" margin-left: 0.5em;\n",
" text-align: center;\n",
" /* unfitted */\n",
" border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
" color: var(--sklearn-color-unfitted-level-1);\n",
"}\n",
"\n",
".sk-estimator-doc-link.fitted,\n",
"a:link.sk-estimator-doc-link.fitted,\n",
"a:visited.sk-estimator-doc-link.fitted {\n",
" /* fitted */\n",
" border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
" color: var(--sklearn-color-fitted-level-1);\n",
"}\n",
"\n",
"/* On hover */\n",
"div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
".sk-estimator-doc-link:hover,\n",
"div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
".sk-estimator-doc-link:hover {\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-3);\n",
" color: var(--sklearn-color-background);\n",
" text-decoration: none;\n",
"}\n",
"\n",
"div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
".sk-estimator-doc-link.fitted:hover,\n",
"div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
".sk-estimator-doc-link.fitted:hover {\n",
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-3);\n",
" color: var(--sklearn-color-background);\n",
" text-decoration: none;\n",
"}\n",
"\n",
"/* Span, style for the box shown on hovering the info icon */\n",
".sk-estimator-doc-link span {\n",
" display: none;\n",
" z-index: 9999;\n",
" position: relative;\n",
" font-weight: normal;\n",
" right: .2ex;\n",
" padding: .5ex;\n",
" margin: .5ex;\n",
" width: min-content;\n",
" min-width: 20ex;\n",
" max-width: 50ex;\n",
" color: var(--sklearn-color-text);\n",
" box-shadow: 2pt 2pt 4pt #999;\n",
" /* unfitted */\n",
" background: var(--sklearn-color-unfitted-level-0);\n",
" border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
"}\n",
"\n",
".sk-estimator-doc-link.fitted span {\n",
" /* fitted */\n",
" background: var(--sklearn-color-fitted-level-0);\n",
" border: var(--sklearn-color-fitted-level-3);\n",
"}\n",
"\n",
".sk-estimator-doc-link:hover span {\n",
" display: block;\n",
"}\n",
"\n",
"/* \"?\"-specific style due to the `<a>` HTML tag */\n",
"\n",
"#sk-container-id-1 a.estimator_doc_link {\n",
" float: right;\n",
" font-size: 1rem;\n",
" line-height: 1em;\n",
" font-family: monospace;\n",
" background-color: var(--sklearn-color-background);\n",
" border-radius: 1rem;\n",
" height: 1rem;\n",
" width: 1rem;\n",
" text-decoration: none;\n",
" /* unfitted */\n",
" color: var(--sklearn-color-unfitted-level-1);\n",
" border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
"}\n",
"\n",
"#sk-container-id-1 a.estimator_doc_link.fitted {\n",
" /* fitted */\n",
" border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
" color: var(--sklearn-color-fitted-level-1);\n",
"}\n",
"\n",
"/* On hover */\n",
"#sk-container-id-1 a.estimator_doc_link:hover {\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-3);\n",
" color: var(--sklearn-color-background);\n",
" text-decoration: none;\n",
"}\n",
"\n",
"#sk-container-id-1 a.estimator_doc_link.fitted:hover {\n",
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-3);\n",
"}\n",
"</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>KNeighborsClassifier(n_neighbors=2)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow\"><div><div>KNeighborsClassifier</div></div><div><a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.6/modules/generated/sklearn.neighbors.KNeighborsClassifier.html\">?<span>Documentation for KNeighborsClassifier</span></a><span class=\"sk-estimator-doc-link fitted\">i<span>Fitted</span></span></div></label><div class=\"sk-toggleable__content fitted\"><pre>KNeighborsClassifier(n_neighbors=2)</pre></div> </div></div></div></div>"
]
},
"metadata": {},
"execution_count": 11
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "0oabYXRVOxO4"
},
"source": [
"### *Predicting, wheather new customer with Age & Salary will Buy or Not*"
]
},
{
"cell_type": "code",
"metadata": {
"id": "ovhU7dWzOx_a",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "0ffcdae0-a0cc-438a-f3dc-83f253ae793d"
},
"source": [
"age = int(input(\"Enter New Employee's Age: \"))\n",
"edu = int(input(\"Enter New Employee's Education: \"))\n",
"cg = int(input(\"Enter New Employee's Captital Gain: \"))\n",
"wh = int(input(\"Enter New Employee's Hour's Per week: \"))\n",
"newEmp = [[age,edu,cg,wh]]\n",
"result = model.predict(sc.transform(newEmp))\n",
"print(result)\n",
"\n",
"if result == 1:\n",
" print(\"Employee might got Salary above 50K\")\n",
"else:\n",
" print(\"Customer might not got Salary above 50K\")"
],
"execution_count": 14,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Enter New Employee's Age: 60\n",
"Enter New Employee's Education: 11\n",
"Enter New Employee's Captital Gain: 0\n",
"Enter New Employee's Hour's Per week: 12\n",
"[0]\n",
"Customer might not got Salary above 50K\n"
]
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "mjBCAtVFTE89"
},
"source": [
"### *Prediction for all Test Data*"
]
},
{
"cell_type": "code",
"metadata": {
"id": "SLSFpnwqTFk7",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "48095b0e-6a7c-4ec5-d5cd-19c074b42955"
},
"source": [
"y_pred = model.predict(X_test)\n",
"print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))"
],
"execution_count": 15,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"[[0 0]\n",
" [0 0]\n",
" [0 0]\n",
" ...\n",
" [0 0]\n",
" [0 0]\n",
" [0 0]]\n"
]
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "dgsr46pcTosv"
},
"source": [
"### *Evaluating Model - CONFUSION MATRIX*"
]
},
{
"cell_type": "code",
"metadata": {
"id": "agWRkVL3Tqn_",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "3e9bdec0-081b-4d19-c9f4-71061cca654d"
},
"source": [
"from sklearn.metrics import confusion_matrix, accuracy_score\n",
"cm = confusion_matrix(y_test, y_pred)\n",
"\n",
"print(\"Confusion Matrix: \")\n",
"print(cm)\n",
"\n",
"print(\"Accuracy of the Model: {0}%\".format(accuracy_score(y_test, y_pred)*100))"
],
"execution_count": 16,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Confusion Matrix: \n",
"[[5918 275]\n",
" [1326 622]]\n",
"Accuracy of the Model: 80.33411128853949%\n"
]
}
]
}
]
}