Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 49 additions & 0 deletions GrayKangaroo.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
Nasal Length Nasal Width
609 241
629 222
620 233
564 207
645 247
493 189
606 226
660 240
630 215
672 231
778 263
616 220
727 271
810 284
778 279
823 272
755 268
710 278
701 238
803 255
855 308
838 281
830 288
864 306
635 236
565 204
562 216
580 225
596 220
597 219
636 201
559 213
615 228
740 234
677 237
675 217
629 211
692 238
710 221
730 281
763 292
686 251
717 231
737 275
816 275



31 changes: 31 additions & 0 deletions TutorialEx09.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# -*- coding: utf-8 -*-
"""
Created on Wed Nov 7 22:37:17 2018

@author: Alicia
"""
import numpy
import pandas
from plotnine import *
#Question 1 - sample data for Gray Kangaroos
data=pandas.read_csv("GrayKangaroo.txt",sep="\t",header=0)
data.head(5)
a=ggplot(data,aes(x="Nasal Length",y="Nasal Width"))
a+geom_point()+coord_cartesian()+theme_classic()+stat_smooth(method="lm")

#Question 2
data=pandas.read_csv("data.txt",sep=",",header=0)
data.head(5)
df=ggplot(data)+theme_classic()+xlab("region")+ylab("observations")
df+geom_bar(aes(x="factor(region)",y="observations"),stat="summary",fun_y=numpy.mean)
#To check barplot mean values are correct
data.groupby(['region'])['observations'].mean()

a=ggplot(data,aes(x="region",y="observations"))
a+geom_point()+coord_cartesian()+theme_classic()+geom_jitter()


#The bar plot shows you that the mean number of observations for each region is about the same
#When you create the scatter plot you see that even though the means are similar, the actual points of observation vary for each region
#While the east and west regions are fairly evenly spread in terms of observations, the north region is clumped together around 15 and the south region is clumped around 5 and 25
#These various sets of observations all return similar means, but the scatterplot gives a more accurate depiction of what is going on for number of observations in each region