diff --git a/SAT.csv b/SAT.csv new file mode 100644 index 0000000..2a3fd5f --- /dev/null +++ b/SAT.csv @@ -0,0 +1,14 @@ +GPA,Math SAT Score +3.4,705 +3.65,750 +2.9,610 +2.5,400 +3.07,570 +2.3,400 +2.75,550 +3.3,660 +3.4,680 +3,575 +3.7,770 +3.1,575 +3,600 \ No newline at end of file diff --git a/exercise-9-q1 b/exercise-9-q1 new file mode 100644 index 0000000..293b7c4 --- /dev/null +++ b/exercise-9-q1 @@ -0,0 +1,20 @@ +#!/usr/bin/env python2 +# -*- coding: utf-8 -*- +""" +Created on Wed Nov 7 20:31:47 2018 + +@author: atatarian +""" + +import numpy +import pandas +from plotnine import * + +sat=pandas.read_csv("SAT.txt",sep="\t",header=0) +sat.shape + +print (sat) + +a=ggplot(sat,aes(x="GPA",y="SAT-Score"))+theme_classic()+geom_point()+coord_cartesian() + +print a+xlab("GPA")+ylab("SAT-Score")+stat_smooth(method="lm") diff --git a/exercise-9-q2 b/exercise-9-q2 new file mode 100644 index 0000000..37aae6f --- /dev/null +++ b/exercise-9-q2 @@ -0,0 +1,34 @@ +#!/usr/bin/env python2 +# -*- coding: utf-8 -*- +""" +Created on Wed Nov 7 20:49:52 2018 + +@author: atatarian +""" + +import numpy +import pandas +from plotnine import * + +data=pandas.read_csv("data.txt",sep=",",header=0) +data.shape + + +#bar plot + + +d=ggplot(data)+theme_classic()+xlab("region")+ylab("observations") + +print d+geom_bar(aes(x="factor(region)",y="observations"),stat="summary",fun_y=numpy.mean) + +#scatter plot + +a=ggplot(data,aes(x="region",y="observations"))+theme_classic()+geom_jitter() + +print a + + +# The scatter plots tell different stories, most notably for the north and +# south regions. While the means are similar, as shown by the bar plot, the +# spreads, or distributions, of the data are different, as shown by the scatter +# plot. \ No newline at end of file diff --git a/sat.txt b/sat.txt new file mode 100644 index 0000000..00f376c --- /dev/null +++ b/sat.txt @@ -0,0 +1,14 @@ +GPA SAT-Score +3.4 705 +3.65 750 +2.9 610 +2.5 400 +3.07 570 +2.3 400 +2.75 550 +3.3 660 +3.4 680 +3 575 +3.7 770 +3.1 575 +3 600