-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathFinal Heart.py
More file actions
65 lines (55 loc) · 2.55 KB
/
Final Heart.py
File metadata and controls
65 lines (55 loc) · 2.55 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
# loading basic libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
# Reading the csv file
df = pd.read_csv("C:/Users/adm/Documents/Datasets/new dataset/heart.csv ")
df.head()
# Exploring the dataset so as derive useful info
df.columns
df = df.rename(columns = {'output':'target'}, inplace=False)
df.describe
# Statistical Details provide statistical information in numerical format. From age column we can infer that the minimum
# age is 29 years and the maximum is is 77 years ,mean age is 54 years
df.isnull().sum()
print(df.info())
# The data has no null values
# Finding the correlation among attributes
plt.figure(figsize=(20,10))
sns.heatmap(df.corr(),annot=True,cmap='terrain')
sns.pairplot(data=df)
df.hist(figsize=(12,12),layout=(5,3));
# Boxplot and whiskers plot
df.plot(kind='box',subplots = True,layout = (5,3),figsize=(12,12))
sns.catplot(data=df,x='sex',y='age',hue='target',palette=['blue', 'yellow'])
sns.barplot(data=df,x='sex',y='chol',hue='target',palette=['darkred','steelblue'])
df['sex'].value_counts() # 207 males , 96 females
df['target'].value_counts() # 165 cases of heart disease
df['thall'].value_counts()
# Results of thallium stress test measuring blood flow to heart , with possible values of normal,fixed_defect,reversible_defect.
sns.countplot(x="sex",data=df,palette='husl',hue = 'target')
# with 1 here representing males and 0 females we observe that females having heart disease are comparatively less when
# compared to males. Males have low heart disease compared to females in the dataset
sns.countplot(x='target',palette = 'BuGn',data=df)
# The count of not having having heart disease and not having heart disease are almost balanced.Not having has a frequency
# of 140 and having is 160
plt.figure(figsize=(20,10))
sns.countplot(x='caa',hue='target',data=df)
# ca: number of major vessels(0-3) colored by flourosopy
df['caa'].value_counts()
# caa has a negative correlation with the target i.e an increase in caa will lead to a drop in heart disease and vice versa
plt.figure()
plt.style.use('ggplot')
sns.countplot(x='thall',data= df,hue='target',palette = 'BuPu')
plt.title("Thall vs Target",fontsize = 15)
plt.gcf().set_size_inches(12,6)
df['cp'].value_counts()
plt.figure()
sns.countplot(x='cp',data=df,hue='target',palette='rocket')
plt.title("Chestpain vs Target")
plt.figure(facecolor="steelblue")
sns.boxplot(x="sex",y='chol',hue='target',palette='seismic',data=df)
plt.title('sex vs Target() \n Example \n',fontsize = 12, fontweight='bold')