diff --git a/Exercise9 b/Exercise9 new file mode 100644 index 0000000..062a71f --- /dev/null +++ b/Exercise9 @@ -0,0 +1,24 @@ +#!/usr/bin/env python2 +# -*- coding: utf-8 -*- +""" +Created on Wed Nov 7 20:30:51 2018 + +@author: jessicahummel +""" + +#1 + +import numpy +import pandas +from plotnine import * + +sat=pandas.read_csv("sat.txt",sep=",",header=0) +sat.shape +sat.head(5) + +a=ggplot(sat,aes(x="GPA",y="SATScore"))+theme_classic()+geom_point()+coord_cartesian() + +print a+xlab("GPA")+ylab("SATScore")+stat_smooth(method="lm") + +#2 + diff --git a/Exercise9-2 b/Exercise9-2 new file mode 100644 index 0000000..08b58ca --- /dev/null +++ b/Exercise9-2 @@ -0,0 +1,27 @@ +#!/usr/bin/env python2 +# -*- coding: utf-8 -*- +""" +Created on Wed Nov 7 20:56:03 2018 + +@author: jessicahummel +""" + +import matplotlib.pyplot as plt +import numpy +import pandas +from plotnine import * + +data=pandas.read_csv("data.txt",sep=",",header=0) +data.shape + +b=ggplot(data)+theme_classic()+xlab("region")+ylab("observations") + +print b+geom_bar(aes(x="factor(region)",y="observations"),stat="summary",fun_y=numpy.mean) + +a=ggplot(data,aes(x="region",y="observations"))+theme_classic()+geom_jitter() + +print a + +#Yes, the bar and scatter plots tell different stories. If you were to look at the bar +#graph, you would see that the means are very similar, yet looking at the scatter plot +#tells you that the spreads are very different. \ No newline at end of file diff --git a/SAT.csv b/SAT.csv new file mode 100644 index 0000000..2a3fd5f --- /dev/null +++ b/SAT.csv @@ -0,0 +1,14 @@ +GPA,Math SAT Score +3.4,705 +3.65,750 +2.9,610 +2.5,400 +3.07,570 +2.3,400 +2.75,550 +3.3,660 +3.4,680 +3,575 +3.7,770 +3.1,575 +3,600 \ No newline at end of file diff --git a/sat.txt b/sat.txt new file mode 100644 index 0000000..a44cd41 --- /dev/null +++ b/sat.txt @@ -0,0 +1,14 @@ +GPA,SATScore +3.4,705 +3.65,750 +2.9,610 +2.5,400 +3.07,570 +2.3,400 +2.75,550 +3.3,660 +3.4,680 +3,575 +3.7,770 +3.1,575 +3,600