Skip to content

Commit 2493783

Browse files
authored
Titanic Data analysis Using Streamlit and EDA
Titanic Data analysis Using Streamlit and EDA
1 parent 769a7bb commit 2493783

File tree

1 file changed

+92
-0
lines changed

1 file changed

+92
-0
lines changed

Diff for: app.py

+92
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
import seaborn as sns
2+
import streamlit as st
3+
import pandas as pd
4+
from matplotlib import pyplot as plt
5+
import numpy as np
6+
7+
#load Titanic dataset
8+
@st.cache_data
9+
def load_data():
10+
data = pd.read_csv(r"C:\Users\vemer\Documents\Naresh Technology\New\21st FEB EDA\titanic dataset.csv")
11+
return data
12+
13+
data = load_data()
14+
15+
#Title and description
16+
st.title('Exploratoy data analysis of Titan Dataset')
17+
st.write(" This is an EDA on the titanic dataset")
18+
st.write("First few rows of dataset:")
19+
st.dataframe(data.head())
20+
21+
#data cleaning section
22+
st.subheader('Missing values')
23+
missing_data=data.isnull().sum()
24+
st.write(missing_data)
25+
26+
if st.checkbox('Fill missing Age with median'):
27+
data['Age'].fillna(data['Age'].mean(), inplace=True)
28+
29+
if st.checkbox('Fill missing Embarked with mode'):
30+
data['Embarked'].fillna(data['Embarked'].mode()[0], inplace=True)
31+
32+
if st.checkbox('Drop duplicates'):
33+
data.drop_duplicates(inplace=True)
34+
35+
st.subheader('Cleaned Dataset')
36+
st.dataframe(data.head())
37+
38+
# EDA Section
39+
st.subheader('Statistical Summary of the Data')
40+
st.write(data.describe())
41+
42+
# Age Distribution
43+
st.subheader('Age Distribution')
44+
fig, ax = plt.subplots()
45+
sns.histplot(data['Age'], kde=True, ax=ax)
46+
ax.set_title('Age Distribution')
47+
st.pyplot(fig)
48+
49+
# Gender Distribution
50+
st.subheader('Gender Distribution')
51+
fig, ax = plt.subplots()
52+
sns.countplot(x='Sex', data=data, ax=ax)
53+
ax.set_title('Gender Distribution')
54+
st.pyplot(fig)
55+
56+
# Pclass vs Survived
57+
st.subheader('Pclass vs Survived')
58+
fig, ax = plt.subplots()
59+
sns.countplot(x='Pclass', hue='Survived', data=data, ax=ax)
60+
ax.set_title('Pclass vs Survived')
61+
st.pyplot(fig)
62+
63+
'''
64+
# Correlation Heatmap
65+
st.subheader('Correlation Heatmap')
66+
fig, ax = plt.subplots()
67+
data = pd.get_dummies(data)
68+
sns.heatmap(data.corr(), annot=True, cmap='coolwarm', ax=ax)
69+
#sns.heatmap(data.corr(), annot=True, cmap='coolwarm', ax=ax)
70+
ax.set_title('Correlation Heatmap')
71+
st.pyplot(fig)
72+
73+
'''
74+
75+
# Feature Engineering Section
76+
st.subheader('Feature Engineering: Family Size')
77+
data['FamilySize'] = data['SibSp'] + data['Parch']
78+
fig, ax = plt.subplots()
79+
sns.histplot(data['FamilySize'], kde=True, ax=ax)
80+
ax.set_title('Family Size Distribution')
81+
st.pyplot(fig)
82+
83+
# Conclusion Section
84+
st.subheader('Key Insights')
85+
insights = """
86+
- Females have a higher survival rate than males.
87+
- Passengers in 1st class had the highest survival rate.
88+
- The majority of passengers are in Pclass 3.
89+
- Younger passengers tended to survive more often.
90+
"""
91+
st.write(insights)
92+

0 commit comments

Comments
 (0)