diff --git a/11___House_price_prediction_using_Linear_Regression.ipynb b/11___House_price_prediction_using_Linear_Regression.ipynb new file mode 100644 index 0000000..31acfce --- /dev/null +++ b/11___House_price_prediction_using_Linear_Regression.ipynb @@ -0,0 +1,1508 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "I1VRs4tZkbvW" + }, + "source": [ + "# **Day-11 | House price prediction using Linear Regression-SingleVariable**" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "SAFLqwkKk8rK" + }, + "source": [ + "### *Import Libraries*" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "EgF2lvr_jzVL" + }, + "source": [ + "import pandas as pd\n", + "from sklearn.linear_model import LinearRegression\n", + "import matplotlib.pyplot as plt" + ], + "execution_count": 1, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XWe_7j6UjxRj" + }, + "source": [ + "### *Load Dataset from Local Directory*" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "vKrHCJk_jwfJ", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 73 + }, + "outputId": "715c67be-afd2-4b0d-c041-a7d3888a5095" + }, + "source": [ + "from google.colab import files\n", + "uploaded = files.upload()" + ], + "execution_count": 2, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "\n", + " \n", + " \n", + " Upload widget is only available when the cell has been executed in the\n", + " current browser session. Please rerun this cell to enable.\n", + " \n", + " " + ] + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Saving dataset.csv to dataset.csv\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6gXowmSom462" + }, + "source": [ + "### *Load Dataset*" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "6JLDHSdym6wP" + }, + "source": [ + "dataset = pd.read_csv('dataset.csv')" + ], + "execution_count": 3, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-DdkIy1ZnDfA" + }, + "source": [ + "### *Load Summarize*" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "OlElQViRnGFp", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "d47549da-8c94-4e4b-ef5c-d76fae7a04d8" + }, + "source": [ + "print(dataset.shape)\n", + "print(dataset.head(5))" + ], + "execution_count": 4, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "(1460, 2)\n", + " area price\n", + "0 8450 208500\n", + "1 9600 181500\n", + "2 11250 223500\n", + "3 9550 140000\n", + "4 14260 250000\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "p5yk_BN4nMtD" + }, + "source": [ + "### *Visualize Dataset*" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "a8Mi5nkFnOTQ", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 466 + }, + "outputId": "cb410515-c01a-4870-f356-77eef2cedaa8" + }, + "source": [ + "plt.xlabel('Area')\n", + "plt.ylabel('Price')\n", + "plt.scatter(dataset.area,dataset.price,color='red',marker='*')" + ], + "execution_count": 5, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 5 + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": {} + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JRyfB6prpJDP" + }, + "source": [ + "### *Segregate Dataset into Input X & Output Y*" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "x9dQcTohpK1X", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 424 + }, + "outputId": "f42cd163-c1ad-4279-da9b-845cd0644611" + }, + "source": [ + "X = dataset.drop('price',axis='columns')\n", + "X" + ], + "execution_count": 6, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " area\n", + "0 8450\n", + "1 9600\n", + "2 11250\n", + "3 9550\n", + "4 14260\n", + "... ...\n", + "1455 7917\n", + "1456 13175\n", + "1457 9042\n", + "1458 9717\n", + "1459 9937\n", + "\n", + "[1460 rows x 1 columns]" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
area
08450
19600
211250
39550
414260
......
14557917
145613175
14579042
14589717
14599937
\n", + "

1460 rows × 1 columns

\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "variable_name": "X", + "summary": "{\n \"name\": \"X\",\n \"rows\": 1460,\n \"fields\": [\n {\n \"column\": \"area\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 9981,\n \"min\": 1300,\n \"max\": 215245,\n \"num_unique_values\": 1073,\n \"samples\": [\n 10186,\n 8163,\n 8854\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 6 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "SqxVaBO0pf1W", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 458 + }, + "outputId": "dfdcb6a7-44fb-4805-bb42-0085a18e0e45" + }, + "source": [ + "Y = dataset.price\n", + "Y" + ], + "execution_count": 7, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0 208500\n", + "1 181500\n", + "2 223500\n", + "3 140000\n", + "4 250000\n", + " ... \n", + "1455 175000\n", + "1456 210000\n", + "1457 266500\n", + "1458 142125\n", + "1459 147500\n", + "Name: price, Length: 1460, dtype: int64" + ], + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
price
0208500
1181500
2223500
3140000
4250000
......
1455175000
1456210000
1457266500
1458142125
1459147500
\n", + "

1460 rows × 1 columns

\n", + "

" + ] + }, + "metadata": {}, + "execution_count": 7 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "KsDoGjjbpmjk" + }, + "source": [ + "### *Training Dataset using Linear Regression*" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "nKmEySI1poV_", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 80 + }, + "outputId": "e8b7d151-62fe-414c-b8c4-381bad76b2a6" + }, + "source": [ + "model = LinearRegression()\n", + "model.fit(X,Y)" + ], + "execution_count": 8, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "LinearRegression()" + ], + "text/html": [ + "
LinearRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ] + }, + "metadata": {}, + "execution_count": 8 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "n-UeFqpGpw9p" + }, + "source": [ + "### *Predicted Price for Land sq.Feet of custom values*" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Ollo3wTcpyKQ", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "365ec852-2686-4b53-85ec-56783730f77f" + }, + "source": [ + "x=2400\n", + "LandAreainSqFt=[[x]]\n", + "PredictedmodelResult = model.predict(LandAreainSqFt)\n", + "print(PredictedmodelResult)" + ], + "execution_count": 13, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[163876.08458098]\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.11/dist-packages/sklearn/utils/validation.py:2739: UserWarning: X does not have valid feature names, but LinearRegression was fitted with feature names\n", + " warnings.warn(\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1PbTSCtSp3lC" + }, + "source": [ + "### Let's check is our model is Right ?\n", + "### Theory Calculation\n", + "### Y = m * X + b (m is coefficient and b is intercept)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "T5eVVDPvp8Hc" + }, + "source": [ + "*Coefficient - m*" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "1SvYtiI2p4ZB", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "d9726623-f842-4474-e680-d61ca7cefa09" + }, + "source": [ + "m=model.coef_\n", + "print(m)" + ], + "execution_count": 14, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[2.09997195]\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nSXLgArvqBu2" + }, + "source": [ + "*Intercept - b*" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "mxyroJ6uqCet", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "f8bfef10-61d7-456b-e860-0eb4bf5a047a" + }, + "source": [ + "b=model.intercept_\n", + "print(b)" + ], + "execution_count": 15, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "158836.1518968766\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VCLG6YIOqEaX" + }, + "source": [ + "### Y=mx+b\n", + "*x is Independant variable - Input - area*" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "kRHG8tUFqO1i", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "83287893-80dc-4c5c-d20b-595ac364519b" + }, + "source": [ + "y = m*x + b\n", + "print(\"The Price of {0} Square feet Land is: {1}\".format(x,y[0]))" + ], + "execution_count": 16, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "The Price of 2400 Square feet Land is: 163876.08458097503\n" + ] + } + ] + } + ] +} \ No newline at end of file