"cells": [
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
"source": [
"cell_type": "markdown",
"metadata": {
"id": "Qmi36D7ZPY-M"
"source": [
"# **Day - 6 Titanic Survival Prediction using NAIVE BAYES**"
"cell_type": "markdown",
"metadata": {
"id": "Q8lgHC2zPTE4"
"source": [
"### *Importing basic Libraries*"
"cell_type": "code",
"metadata": {
"id": "nKKbpfywIqAq"
"source": [
"import pandas as pd\n",
"import numpy as np"
"execution_count": 1,
"outputs": []
"cell_type": "markdown",
"metadata": {
"id": "xfyZYdDaPnJz"
"source": [
"### *Choose Dataset file from Local Directory*"
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 73
"id": "ki0LIHaOP869",
"outputId": "b9ecc9aa-e308-4031-a096-fd77b3f36d4c"
"source": [
"from google.colab import files\n",
"uploaded = files.upload()"
"execution_count": 2,
"outputs": [
"output_type": "display_data",
"data": {
"text/plain": [
"<IPython.core.display.HTML object>"
"text/html": [
"metadata": {}
"output_type": "stream",
"name": "stdout",
"text": [
"Saving titanicsurvival.csv to titanicsurvival.csv\n"
"cell_type": "markdown",
"metadata": {
"id": "oEx3VSimP_DF"
"source": [
"### *Load Dataset*"
"cell_type": "code",
"metadata": {
"source": [
"dataset = pd.read_csv('titanicsurvival.csv')"
"execution_count": 3,
"outputs": []
"cell_type": "markdown",
"metadata": {
"id": "Da6ym5z7QHwY"
"source": [
"### *Summarize Dataset*"
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
"id": "Esd6w-GBQLZ5",
"outputId": "44a2090a-3b0f-4ee2-c37b-328f185d2bba"
"source": [
"execution_count": 4,
"outputs": [
"output_type": "stream",
"name": "stdout",
"text": [
"(891, 5)\n",
" Pclass Sex Age Fare Survived\n",
"0 3 male 22.0 7.2500 0\n",
"1 1 female 38.0 71.2833 1\n",
"2 3 female 26.0 7.9250 1\n",
"3 1 female 35.0 53.1000 1\n",
"4 3 male 35.0 8.0500 0\n"
"cell_type": "markdown",
"metadata": {
"id": "1AALh-8cS6Jd"
"source": [
"### *Mapping Text Data to Binary Value*"
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
"id": "rcr5RdqtS9iD",
"outputId": "24ecd79c-ef7c-4301-be39-ecad5bb7361e"
"source": [
"income_set = set(dataset['Sex'])\n",
"dataset['Sex'] = dataset['Sex'].map({'female': 0, 'male': 1}).astype(int)\n",
"execution_count": 5,
"outputs": [
"output_type": "stream",
"name": "stdout",
"text": [
"<bound method NDFrame.head of Pclass Sex Age Fare Survived\n",
"0 3 1 22.0 7.2500 0\n",
"1 1 0 38.0 71.2833 1\n",
"2 3 0 26.0 7.9250 1\n",
"3 1 0 35.0 53.1000 1\n",
"4 3 1 35.0 8.0500 0\n",
".. ... ... ... ... ...\n",
"886 2 1 27.0 13.0000 0\n",
"887 1 0 19.0 30.0000 1\n",
"888 3 0 NaN 23.4500 0\n",
"889 1 1 26.0 30.0000 1\n",
"890 3 1 32.0 7.7500 0\n",
"[891 rows x 5 columns]>\n"
"cell_type": "markdown",
"metadata": {
"id": "_j0iPDCWRYAg"
"source": [
"### *Segregate Dataset into X(Input/IndependentVariable) & Y(Output/DependentVariable)*"
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 424
"id": "Cqyxx7qQRYp7",
"outputId": "f7fa1388-e792-4dab-ef8b-6110b7b7d6f5"
"source": [
"X = dataset.drop('Survived',axis='columns')\n",
"execution_count": 6,
"outputs": [
"output_type": "execute_result",
"data": {
"text/plain": [
" Pclass Sex Age Fare\n",
"0 3 1 22.0 7.2500\n",
"1 1 0 38.0 71.2833\n",
"2 3 0 26.0 7.9250\n",
"3 1 0 35.0 53.1000\n",
"4 3 1 35.0 8.0500\n",
".. ... ... ... ...\n",
"886 2 1 27.0 13.0000\n",
"887 1 0 19.0 30.0000\n",
"888 3 0 NaN 23.4500\n",
"889 1 1 26.0 30.0000\n",
"890 3 1 32.0 7.7500\n",
"[891 rows x 4 columns]"
"text/html": [
"metadata": {},
"execution_count": 6
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 458
"id": "1F1tC2tRRddY",
"outputId": "dfa4f8ca-945a-465b-ab63-637411837fb8"
"source": [
"Y = dataset.Survived\n",
"execution_count": 7,
"outputs": [
"output_type": "execute_result",
"data": {
"text/plain": [
"0 0\n",
"1 1\n",
"2 1\n",
"3 1\n",
"4 0\n",
" ..\n",
"886 0\n",
"887 1\n",
"888 0\n",
"889 1\n",
"890 0\n",
"Name: Survived, Length: 891, dtype: int64"
"metadata": {},
"execution_count": 7
"cell_type": "markdown",
"metadata": {
"id": "SibVwENGTpsN"
"source": [
"Finding & Removing NA values from our Features X"
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
"id": "soVDtqhRTwHZ",
"outputId": "e18b9852-5d91-40cb-c46d-94f3402f13a2"
"source": [
"execution_count": 8,
"outputs": [
"output_type": "execute_result",
"data": {
"text/plain": [
"Index(['Age'], dtype='object')"
"metadata": {},
"execution_count": 8
"cell_type": "code",
"metadata": {
"id": "0_jCaFTRXQj1"
"source": [
"X.Age = X.Age.fillna(X.Age.mean())"
"execution_count": 9,
"outputs": []
"cell_type": "markdown",
"metadata": {
"id": "nYNPgh4cX0bt"
"source": [
"### *Test again to check any na value*"
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
"id": "QSBSGrNfX3NA",
"outputId": "5a65aec5-9704-4c66-8c2c-9012d961c99b"
"source": [
"execution_count": 10,
"outputs": [
"output_type": "execute_result",
"data": {
"text/plain": [
"Index([], dtype='object')"
"metadata": {},
"execution_count": 10
"cell_type": "markdown",
"metadata": {
"id": "R4ngba4SYEue"
"source": [
"### *Splitting Dataset into Train & Test*"
"cell_type": "code",
"metadata": {
"id": "vy9RTlZ4YFyO"
"source": [
"from sklearn.model_selection import train_test_split\n",
"X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = 0.25,random_state =0)"
"execution_count": 11,
"outputs": []
"cell_type": "markdown",
"metadata": {
"id": "ocZLLSzgYl9V"
"source": [
"### *Training*"
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 80
"id": "tPSuaammYz_4",
"outputId": "a3a5cc02-a37f-41be-d0d6-514872a484e1"
"source": [
"from sklearn.naive_bayes import GaussianNB\n",
"model = GaussianNB()\n",
"model.fit(X_train, y_train)"
"execution_count": 12,
"outputs": [
"output_type": "execute_result",
"data": {
"text/plain": [
"text/html": [
"metadata": {},
"execution_count": 12
"cell_type": "markdown",
"metadata": {
"id": "v63bNnciZZYS"
"source": [
"### *Predicting, wheather Person Survived or Not*"
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
"id": "s17AtjCOZeEZ",
"outputId": "fb8f0722-4755-463a-b2c7-3761e564e2b3"
"source": [
"pclassNo = int(input(\"Enter Person's Pclass number: \"))\n",
"gender = int(input(\"Enter Person's Gender 0-female 1-male(0 or 1): \"))\n",
"age = int(input(\"Enter Person's Age: \"))\n",
"fare = float(input(\"Enter Person's Fare: \"))\n",
"person = [[pclassNo,gender,age,fare]]\n",
"result = model.predict(person)\n",
"if result == 1:\n",
" print(\"Person might be Survived\")\n",
" print(\"Person might not be Survived\")"
"execution_count": 13,
"outputs": [
"output_type": "stream",
"name": "stdout",
"text": [
"Enter Person's Pclass number: 20\n",
"Enter Person's Gender 0-female 1-male(0 or 1): 0\n",
"Enter Person's Age: 43\n",
"Enter Person's Fare: 4567\n",
"Person might be Survived\n"
"output_type": "stream",
"name": "stderr",
"text": [
"/usr/local/lib/python3.11/dist-packages/sklearn/utils/validation.py:2739: UserWarning: X does not have valid feature names, but GaussianNB was fitted with feature names\n",
" warnings.warn(\n"
"cell_type": "markdown",
"metadata": {
"id": "1PdvxG-La4H3"
"source": [
"### *Prediction for all Test Data*"
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
"id": "fShPpJ75a6u0",
"outputId": "8d96d8c6-14fc-4c3d-fe83-35f22246d155"
"source": [
"y_pred = model.predict(X_test)\n",
"execution_count": 14,
"outputs": [
"output_type": "stream",
"name": "stdout",
"text": [
"[[0 0]\n",
" [0 0]\n",
" [0 0]\n",
" [1 1]\n",
" [1 1]\n",
" [0 1]\n",
" [1 1]\n",
" [1 1]\n",
" [1 1]\n",
" [1 1]\n",
" [0 0]\n",
" [1 1]\n",
" [0 0]\n",
" [1 1]\n",
" [1 1]\n",
" [1 0]\n",
" [0 0]\n",
" [0 0]\n",
" [0 0]\n",
" [0 1]\n",
" [0 0]\n",
" [1 1]\n",
" [0 0]\n",
" [0 0]\n",
" [1 0]\n",
" [1 1]\n",
" [0 0]\n",
" [1 1]\n",
" [1 1]\n",
" [1 0]\n",
" [0 0]\n",
" [1 1]\n",
" [0 0]\n",
" [0 1]\n",
" [0 0]\n",
" [0 1]\n",
" [0 0]\n",
" [0 0]\n",
" [0 0]\n",
" [0 0]\n",
" [1 1]\n",
" [0 0]\n",
" [0 0]\n",
" [0 0]\n",
" [1 1]\n",
" [0 0]\n",
" [0 0]\n",
" [1 1]\n",
" [0 0]\n",
" [1 0]\n",
" [0 1]\n",
" [0 1]\n",
" [1 1]\n",
" [0 0]\n",
" [0 1]\n",
" [0 0]\n",
" [0 0]\n",
" [1 0]\n",
" [0 0]\n",
" [0 1]\n",
" [0 0]\n",
" [1 0]\n",
" [1 1]\n",
" [0 0]\n",
" [1 1]\n",
" [0 0]\n",
" [1 1]\n",
" [0 0]\n",
" [1 1]\n",
" [1 1]\n",
" [1 1]\n",
" [0 1]\n",
" [1 0]\n",
" [0 0]\n",
" [0 0]\n",
" [1 1]\n",
" [1 0]\n",
" [0 0]\n",
" [0 0]\n",
" [0 0]\n",
" [0 0]\n",
" [0 1]\n",
" [1 0]\n",
" [0 0]\n",
" [0 0]\n",
" [1 1]\n",
" [1 1]\n",
" [1 1]\n",
" [1 1]\n",
" [1 0]\n",
" [0 0]\n",
" [0 0]\n",
" [0 1]\n",
" [1 1]\n",
" [1 0]\n",
" [0 0]\n",
" [1 1]\n",
" [0 0]\n",
" [0 0]\n",
" [1 1]\n",
" [0 0]\n",
" [0 0]\n",
" [0 0]\n",
" [0 0]\n",
" [1 0]\n",
" [1 1]\n",
" [1 1]\n",
" [1 0]\n",
" [0 0]\n",
" [1 1]\n",
" [0 0]\n",
" [1 1]\n",
" [0 1]\n",
" [1 0]\n",
" [1 1]\n",
" [1 1]\n",
" [1 1]\n",
" [1 1]\n",
" [0 0]\n",
" [1 1]\n",
" [0 1]\n",
" [0 0]\n",
" [0 0]\n",
" [0 0]\n",
" [0 0]\n",
" [0 0]\n",
" [0 0]\n",
" [0 1]\n",
" [0 0]\n",
" [0 0]\n",
" [1 0]\n",
" [0 0]\n",
" [0 0]\n",
" [1 0]\n",
" [0 0]\n",
" [0 0]\n",
" [0 0]\n",
" [1 0]\n",
" [0 0]\n",
" [0 0]\n",
" [0 0]\n",
" [1 1]\n",
" [0 0]\n",
" [1 0]\n",
" [1 1]\n",
" [1 0]\n",
" [0 0]\n",
" [1 1]\n",
" [1 1]\n",
" [0 0]\n",
" [1 0]\n",
" [1 1]\n",
" [1 0]\n",
" [0 0]\n",
" [1 1]\n",
" [0 0]\n",
" [1 0]\n",
" [0 1]\n",
" [1 0]\n",
" [1 1]\n",
" [0 0]\n",
" [0 1]\n",
" [1 1]\n",
" [1 1]\n",
" [0 0]\n",
" [0 0]\n",
" [0 0]\n",
" [0 0]\n",
" [0 0]\n",
" [0 0]\n",
" [0 0]\n",
" [1 1]\n",
" [0 0]\n",
" [0 0]\n",
" [1 1]\n",
" [0 0]\n",
" [1 1]\n",
" [0 0]\n",
" [0 0]\n",
" [1 0]\n",
" [0 0]\n",
" [0 0]\n",
" [0 0]\n",
" [0 0]\n",
" [0 0]\n",
" [0 0]\n",
" [1 1]\n",
" [0 0]\n",
" [0 0]\n",
" [1 1]\n",
" [1 0]\n",
" [0 0]\n",
" [1 1]\n",
" [1 1]\n",
" [0 0]\n",
" [0 0]\n",
" [0 0]\n",
" [1 1]\n",
" [0 1]\n",
" [0 0]\n",
" [0 1]\n",
" [1 0]\n",
" [0 0]\n",
" [1 1]\n",
" [0 1]\n",
" [0 0]\n",
" [1 0]\n",
" [0 0]\n",
" [1 1]\n",
" [0 0]\n",
" [0 0]\n",
" [0 1]\n",
" [0 0]\n",
" [1 0]\n",
" [0 0]\n",
" [0 0]\n",
" [0 0]\n",
" [0 1]\n",
" [1 0]\n",
" [1 1]\n",
" [0 0]\n",
" [1 1]\n",
" [1 1]]\n"
"cell_type": "markdown",
"metadata": {
"id": "lFeW_-qYdszc"
"source": [
"### *Accuracy of our Model*"
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
"id": "HDOFRQ0PdzQS",
"outputId": "9e0c4bba-ac94-4065-dac9-2e69d47ddebd"
"source": [
"from sklearn.metrics import accuracy_score\n",
"print(\"Accuracy of the Model: {0}%\".format(accuracy_score(y_test, y_pred)*100))"
"execution_count": 15,
"outputs": [
"output_type": "stream",
"name": "stdout",
"text": [
"Accuracy of the Model: 77.57847533632287%\n"
} |