diff --git a/GrayKangaroo.txt b/GrayKangaroo.txt new file mode 100644 index 0000000..23d7089 --- /dev/null +++ b/GrayKangaroo.txt @@ -0,0 +1,49 @@ +Nasal Length Nasal Width +609 241 +629 222 +620 233 +564 207 +645 247 +493 189 +606 226 +660 240 +630 215 +672 231 +778 263 +616 220 +727 271 +810 284 +778 279 +823 272 +755 268 +710 278 +701 238 +803 255 +855 308 +838 281 +830 288 +864 306 +635 236 +565 204 +562 216 +580 225 +596 220 +597 219 +636 201 +559 213 +615 228 +740 234 +677 237 +675 217 +629 211 +692 238 +710 221 +730 281 +763 292 +686 251 +717 231 +737 275 +816 275 + + + diff --git a/TutorialEx09.py b/TutorialEx09.py new file mode 100644 index 0000000..dea77a4 --- /dev/null +++ b/TutorialEx09.py @@ -0,0 +1,31 @@ +# -*- coding: utf-8 -*- +""" +Created on Wed Nov 7 22:37:17 2018 + +@author: Alicia +""" +import numpy +import pandas +from plotnine import * +#Question 1 - sample data for Gray Kangaroos +data=pandas.read_csv("GrayKangaroo.txt",sep="\t",header=0) +data.head(5) +a=ggplot(data,aes(x="Nasal Length",y="Nasal Width")) +a+geom_point()+coord_cartesian()+theme_classic()+stat_smooth(method="lm") + +#Question 2 +data=pandas.read_csv("data.txt",sep=",",header=0) +data.head(5) +df=ggplot(data)+theme_classic()+xlab("region")+ylab("observations") +df+geom_bar(aes(x="factor(region)",y="observations"),stat="summary",fun_y=numpy.mean) +#To check barplot mean values are correct +data.groupby(['region'])['observations'].mean() + +a=ggplot(data,aes(x="region",y="observations")) +a+geom_point()+coord_cartesian()+theme_classic()+geom_jitter() + + +#The bar plot shows you that the mean number of observations for each region is about the same +#When you create the scatter plot you see that even though the means are similar, the actual points of observation vary for each region +#While the east and west regions are fairly evenly spread in terms of observations, the north region is clumped together around 15 and the south region is clumped around 5 and 25 +#These various sets of observations all return similar means, but the scatterplot gives a more accurate depiction of what is going on for number of observations in each region \ No newline at end of file