Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions Exercise9hw.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Created on Thu Nov 8 13:33:39 2018

@author: saurylara
"""

import numpy
import pandas
from plotnine import *

#Import the SpouseAges text file
SpouseAges = pandas.read_csv("/Users/saurylara/Desktop/SpouseAges.txt",sep="\t",header=0)

#Create scatterplot of the two variables that includes a trendline
a=ggplot(SpouseAges,aes(x="HusbandAge",y="WifeAge"))+theme_classic()+geom_point()
print(a+xlab("HusbandAge (y)")+ylab("WifeAge (y)")+stat_smooth(method="lm"))

#Import the data file
data = pandas.read_csv("/Users/saurylara/Desktop/ICB_EX9/data.txt")

#Generate a barplot with means of the four populations
d=ggplot(data)+theme_classic()+xlab("region")+ylab("observations")
d=d+geom_bar(aes(x="region",y="observations"),stat="summary",fun_y=numpy.mean)
print(d)

#Plot a scatter plot of all of the observations
a=ggplot(data,aes(x="region",y="observations"))+theme_classic()+geom_jitter()
print(a)

# The bar and scatter plot do tell me different things because you do not get the full story just by looking at the bar graph with means.
# When I first plotted the data using the bar graph, I thought that all of the observations were fairly similar for each region given the mean for all four was about 15.
# When I plotted the scatter plot though, I was able to tell that the regions had very different data spreads.
# The data points in the West region were fully spread out, in the East region fairly spread out, in the South region there were many high/low values, and in the North region all of the values were concentrated around 15.




25 changes: 25 additions & 0 deletions SpouseAges.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
Couple HusbandAge WifeAge
1 25 22
2 25 32
3 51 50
4 25 25
5 38 33
6 30 27
7 60 45
8 54 47
9 31 30
10 54 44
11 23 23
12 34 39
13 25 24
14 23 22
15 19 16
16 71 73
17 26 27
18 31 36
19 26 24
20 62 60
21 29 26
22 31 23
23 29 28
24 35 36