{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [], "include_colab_link": true }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "view-in-github", "colab_type": "text" }, "source": [ "\"Open" ] }, { "cell_type": "markdown", "metadata": { "id": "Qmi36D7ZPY-M" }, "source": [ "# **Day - 6 Titanic Survival Prediction using NAIVE BAYES**" ] }, { "cell_type": "markdown", "metadata": { "id": "Q8lgHC2zPTE4" }, "source": [ "### *Importing basic Libraries*" ] }, { "cell_type": "code", "metadata": { "id": "nKKbpfywIqAq" }, "source": [ "import pandas as pd\n", "import numpy as np" ], "execution_count": 1, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "xfyZYdDaPnJz" }, "source": [ "### *Choose Dataset file from Local Directory*" ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 73 }, "id": "ki0LIHaOP869", "outputId": "b9ecc9aa-e308-4031-a096-fd77b3f36d4c" }, "source": [ "from google.colab import files\n", "uploaded = files.upload()" ], "execution_count": 2, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "" ], "text/html": [ "\n", " \n", " \n", " Upload widget is only available when the cell has been executed in the\n", " current browser session. Please rerun this cell to enable.\n", " \n", " " ] }, "metadata": {} }, { "output_type": "stream", "name": "stdout", "text": [ "Saving titanicsurvival.csv to titanicsurvival.csv\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "oEx3VSimP_DF" }, "source": [ "### *Load Dataset*" ] }, { "cell_type": "code", "metadata": { "id": "HQVO5TRBQCGP" }, "source": [ "dataset = pd.read_csv('titanicsurvival.csv')" ], "execution_count": 3, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "Da6ym5z7QHwY" }, "source": [ "### *Summarize Dataset*" ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Esd6w-GBQLZ5", "outputId": "44a2090a-3b0f-4ee2-c37b-328f185d2bba" }, "source": [ "print(dataset.shape)\n", "print(dataset.head(5))" ], "execution_count": 4, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "(891, 5)\n", " Pclass Sex Age Fare Survived\n", "0 3 male 22.0 7.2500 0\n", "1 1 female 38.0 71.2833 1\n", "2 3 female 26.0 7.9250 1\n", "3 1 female 35.0 53.1000 1\n", "4 3 male 35.0 8.0500 0\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "1AALh-8cS6Jd" }, "source": [ "### *Mapping Text Data to Binary Value*" ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "rcr5RdqtS9iD", "outputId": "24ecd79c-ef7c-4301-be39-ecad5bb7361e" }, "source": [ "income_set = set(dataset['Sex'])\n", "dataset['Sex'] = dataset['Sex'].map({'female': 0, 'male': 1}).astype(int)\n", "print(dataset.head)" ], "execution_count": 5, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "_j0iPDCWRYAg" }, "source": [ "### *Segregate Dataset into X(Input/IndependentVariable) & Y(Output/DependentVariable)*" ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 424 }, "id": "Cqyxx7qQRYp7", "outputId": "f7fa1388-e792-4dab-ef8b-6110b7b7d6f5" }, "source": [ "X = dataset.drop('Survived',axis='columns')\n", "X" ], "execution_count": 6, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " Pclass Sex Age Fare\n", "0 3 1 22.0 7.2500\n", "1 1 0 38.0 71.2833\n", "2 3 0 26.0 7.9250\n", "3 1 0 35.0 53.1000\n", "4 3 1 35.0 8.0500\n", ".. ... ... ... ...\n", "886 2 1 27.0 13.0000\n", "887 1 0 19.0 30.0000\n", "888 3 0 NaN 23.4500\n", "889 1 1 26.0 30.0000\n", "890 3 1 32.0 7.7500\n", "\n", "[891 rows x 4 columns]" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "

891 rows × 4 columns

\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "\n", "
\n", " \n", " \n", " \n", "
\n", "\n", "
\n", "
\n" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "dataframe", "variable_name": "X", "summary": "{\n \"name\": \"X\",\n \"rows\": 891,\n \"fields\": [\n {\n \"column\": \"Pclass\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 1,\n \"max\": 3,\n \"num_unique_values\": 3,\n \"samples\": [\n 3,\n 1,\n 2\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Sex\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 0,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Age\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 14.526497332334044,\n \"min\": 0.42,\n \"max\": 80.0,\n \"num_unique_values\": 88,\n \"samples\": [\n 0.75,\n 22.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Fare\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 49.693428597180905,\n \"min\": 0.0,\n \"max\": 512.3292,\n \"num_unique_values\": 248,\n \"samples\": [\n 11.2417,\n 51.8625\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" } }, "metadata": {}, "execution_count": 6 } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 458 }, "id": "1F1tC2tRRddY", "outputId": "dfa4f8ca-945a-465b-ab63-637411837fb8" }, "source": [ "Y = dataset.Survived\n", "Y" ], "execution_count": 7, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "0 0\n", "1 1\n", "2 1\n", "3 1\n", "4 0\n", " ..\n", "886 0\n", "887 1\n", "888 0\n", "889 1\n", "890 0\n", "Name: Survived, Length: 891, dtype: int64" ], "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "

891 rows × 1 columns

\n", "

" ] }, "metadata": {}, "execution_count": 7 } ] }, { "cell_type": "markdown", "metadata": { "id": "SibVwENGTpsN" }, "source": [ "Finding & Removing NA values from our Features X" ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "soVDtqhRTwHZ", "outputId": "e18b9852-5d91-40cb-c46d-94f3402f13a2" }, "source": [ "X.columns[X.isna().any()]" ], "execution_count": 8, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "Index(['Age'], dtype='object')" ] }, "metadata": {}, "execution_count": 8 } ] }, { "cell_type": "code", "metadata": { "id": "0_jCaFTRXQj1" }, "source": [ "X.Age = X.Age.fillna(X.Age.mean())" ], "execution_count": 9, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "nYNPgh4cX0bt" }, "source": [ "### *Test again to check any na value*" ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "QSBSGrNfX3NA", "outputId": "5a65aec5-9704-4c66-8c2c-9012d961c99b" }, "source": [ "X.columns[X.isna().any()]" ], "execution_count": 10, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "Index([], dtype='object')" ] }, "metadata": {}, "execution_count": 10 } ] }, { "cell_type": "markdown", "metadata": { "id": "R4ngba4SYEue" }, "source": [ "### *Splitting Dataset into Train & Test*" ] }, { "cell_type": "code", "metadata": { "id": "vy9RTlZ4YFyO" }, "source": [ "from sklearn.model_selection import train_test_split\n", "X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = 0.25,random_state =0)" ], "execution_count": 11, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "ocZLLSzgYl9V" }, "source": [ "### *Training*" ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 80 }, "id": "tPSuaammYz_4", "outputId": "a3a5cc02-a37f-41be-d0d6-514872a484e1" }, "source": [ "from sklearn.naive_bayes import GaussianNB\n", "model = GaussianNB()\n", "model.fit(X_train, y_train)" ], "execution_count": 12, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "GaussianNB()" ], "text/html": [ "
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ] }, "metadata": {}, "execution_count": 12 } ] }, { "cell_type": "markdown", "metadata": { "id": "v63bNnciZZYS" }, "source": [ "### *Predicting, wheather Person Survived or Not*" ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "s17AtjCOZeEZ", "outputId": "fb8f0722-4755-463a-b2c7-3761e564e2b3" }, "source": [ "pclassNo = int(input(\"Enter Person's Pclass number: \"))\n", "gender = int(input(\"Enter Person's Gender 0-female 1-male(0 or 1): \"))\n", "age = int(input(\"Enter Person's Age: \"))\n", "fare = float(input(\"Enter Person's Fare: \"))\n", "person = [[pclassNo,gender,age,fare]]\n", "result = model.predict(person)\n", "print(result)\n", "\n", "if result == 1:\n", " print(\"Person might be Survived\")\n", "else:\n", " print(\"Person might not be Survived\")" ], "execution_count": 13, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Enter Person's Pclass number: 20\n", "Enter Person's Gender 0-female 1-male(0 or 1): 0\n", "Enter Person's Age: 43\n", "Enter Person's Fare: 4567\n", "[1]\n", "Person might be Survived\n" ] }, { "output_type": "stream", "name": "stderr", "text": [ "/usr/local/lib/python3.11/dist-packages/sklearn/utils/validation.py:2739: UserWarning: X does not have valid feature names, but GaussianNB was fitted with feature names\n", " warnings.warn(\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "1PdvxG-La4H3" }, "source": [ "### *Prediction for all Test Data*" ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "fShPpJ75a6u0", "outputId": "8d96d8c6-14fc-4c3d-fe83-35f22246d155" }, "source": [ "y_pred = model.predict(X_test)\n", "print(np.column_stack((y_pred,y_test)))" ], "execution_count": 14, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "[[0 0]\n", " [0 0]\n", " [0 0]\n", " [1 1]\n", " [1 1]\n", " [0 1]\n", " [1 1]\n", " [1 1]\n", " [1 1]\n", " [1 1]\n", " [0 0]\n", " [1 1]\n", " [0 0]\n", " [1 1]\n", " [1 1]\n", " [1 0]\n", " [0 0]\n", " [0 0]\n", " [0 0]\n", " [0 1]\n", " [0 0]\n", " [1 1]\n", " [0 0]\n", " [0 0]\n", " [1 0]\n", " [1 1]\n", " [0 0]\n", " [1 1]\n", " [1 1]\n", " [1 0]\n", " [0 0]\n", " [1 1]\n", " [0 0]\n", " [0 1]\n", " [0 0]\n", " [0 1]\n", " [0 0]\n", " [0 0]\n", " [0 0]\n", " [0 0]\n", " [1 1]\n", " [0 0]\n", " [0 0]\n", " [0 0]\n", " [1 1]\n", " [0 0]\n", " [0 0]\n", " [1 1]\n", " [0 0]\n", " [1 0]\n", " [0 1]\n", " [0 1]\n", " [1 1]\n", " [0 0]\n", " [0 1]\n", " [0 0]\n", " [0 0]\n", " [1 0]\n", " [0 0]\n", " [0 1]\n", " [0 0]\n", " [1 0]\n", " [1 1]\n", " [0 0]\n", " [1 1]\n", " [0 0]\n", " [1 1]\n", " [0 0]\n", " [1 1]\n", " [1 1]\n", " [1 1]\n", " [0 1]\n", " [1 0]\n", " [0 0]\n", " [0 0]\n", " [1 1]\n", " [1 0]\n", " [0 0]\n", " [0 0]\n", " [0 0]\n", " [0 0]\n", " [0 1]\n", " [1 0]\n", " [0 0]\n", " [0 0]\n", " [1 1]\n", " [1 1]\n", " [1 1]\n", " [1 1]\n", " [1 0]\n", " [0 0]\n", " [0 0]\n", " [0 1]\n", " [1 1]\n", " [1 0]\n", " [0 0]\n", " [1 1]\n", " [0 0]\n", " [0 0]\n", " [1 1]\n", " [0 0]\n", " [0 0]\n", " [0 0]\n", " [0 0]\n", " [1 0]\n", " [1 1]\n", " [1 1]\n", " [1 0]\n", " [0 0]\n", " [1 1]\n", " [0 0]\n", " [1 1]\n", " [0 1]\n", " [1 0]\n", " [1 1]\n", " [1 1]\n", " [1 1]\n", " [1 1]\n", " [0 0]\n", " [1 1]\n", " [0 1]\n", " [0 0]\n", " [0 0]\n", " [0 0]\n", " [0 0]\n", " [0 0]\n", " [0 0]\n", " [0 1]\n", " [0 0]\n", " [0 0]\n", " [1 0]\n", " [0 0]\n", " [0 0]\n", " [1 0]\n", " [0 0]\n", " [0 0]\n", " [0 0]\n", " [1 0]\n", " [0 0]\n", " [0 0]\n", " [0 0]\n", " [1 1]\n", " [0 0]\n", " [1 0]\n", " [1 1]\n", " [1 0]\n", " [0 0]\n", " [1 1]\n", " [1 1]\n", " [0 0]\n", " [1 0]\n", " [1 1]\n", " [1 0]\n", " [0 0]\n", " [1 1]\n", " [0 0]\n", " [1 0]\n", " [0 1]\n", " [1 0]\n", " [1 1]\n", " [0 0]\n", " [0 1]\n", " [1 1]\n", " [1 1]\n", " [0 0]\n", " [0 0]\n", " [0 0]\n", " [0 0]\n", " [0 0]\n", " [0 0]\n", " [0 0]\n", " [1 1]\n", " [0 0]\n", " [0 0]\n", " [1 1]\n", " [0 0]\n", " [1 1]\n", " [0 0]\n", " [0 0]\n", " [1 0]\n", " [0 0]\n", " [0 0]\n", " [0 0]\n", " [0 0]\n", " [0 0]\n", " [0 0]\n", " [1 1]\n", " [0 0]\n", " [0 0]\n", " [1 1]\n", " [1 0]\n", " [0 0]\n", " [1 1]\n", " [1 1]\n", " [0 0]\n", " [0 0]\n", " [0 0]\n", " [1 1]\n", " [0 1]\n", " [0 0]\n", " [0 1]\n", " [1 0]\n", " [0 0]\n", " [1 1]\n", " [0 1]\n", " [0 0]\n", " [1 0]\n", " [0 0]\n", " [1 1]\n", " [0 0]\n", " [0 0]\n", " [0 1]\n", " [0 0]\n", " [1 0]\n", " [0 0]\n", " [0 0]\n", " [0 0]\n", " [0 1]\n", " [1 0]\n", " [1 1]\n", " [0 0]\n", " [1 1]\n", " [1 1]]\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "lFeW_-qYdszc" }, "source": [ "### *Accuracy of our Model*" ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "HDOFRQ0PdzQS", "outputId": "9e0c4bba-ac94-4065-dac9-2e69d47ddebd" }, "source": [ "from sklearn.metrics import accuracy_score\n", "print(\"Accuracy of the Model: {0}%\".format(accuracy_score(y_test, y_pred)*100))" ], "execution_count": 15, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Accuracy of the Model: 77.57847533632287%\n" ] } ] } ] }