Skip to content

Commit 34f0e80

Browse files
committed
Updated from dicook/SISBID-2017
1 parent 20c6d37 commit 34f0e80

File tree

89 files changed

+703
-763
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

89 files changed

+703
-763
lines changed

0-preamble/index.Rmd

+1-1
Original file line numberDiff line numberDiff line change
@@ -147,4 +147,4 @@ devtools::install_github("haleyjeppson/ggmosaic")
147147

148148
Materials for this workshop build on slides provided by several people, but primarily Hadley Wickham and Carson Sievert.
149149

150-
This work is licensed under the Creative Commons Attribution-Noncommercial 3.0 United States License. To view a copy of this license, visit http://creativecommons.org/licenses/by-nc/ 3.0/us/ or send a letter to Creative Commons, 171 Second Street, Suite 300, San Francisco, California, 94105, USA.
150+
This work is licensed under the Creative Commons Attribution-Noncommercial 3.0 United States License. To view a copy of this license, visit http://creativecommons.org/licenses/by-nc/3.0/us/ or send a letter to Creative Commons, 171 Second Street, Suite 300, San Francisco, California, 94105, USA.

0-preamble/index.html

+2-2
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@
147147
"highlightLines": true,
148148
"countIncrementalSlides": false
149149
});
150-
if (window.HTMLWidgets) slideshow.on('afterShowSlide', function (slide) {window.dispatchEvent(new Event('resize'));});</script>
150+
if (window.HTMLWidgets) slideshow.on('showSlide', function (slide) {setTimeout(function() {window.dispatchEvent(new Event('resize'));}, 100)});</script>
151151

152152
<script type="text/x-mathjax-config">
153153
MathJax.Hub.Config({
@@ -161,7 +161,7 @@
161161
(function () {
162162
var script = document.createElement('script');
163163
script.type = 'text/javascript';
164-
script.src = 'https://cdn.bootcss.com/mathjax/2.7.1/MathJax.js?config=TeX-MML-AM_CHTML';
164+
script.src = 'https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML';
165165
if (location.protocol !== 'file:' && /^https?:/.test(script.src))
166166
script.src = script.src.replace(/^https?:/, '');
167167
document.getElementsByTagName('head')[0].appendChild(script);

1.1-ggplot/index.Rmd

+21-17
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
---
22
title: "Grammar of Graphics and ggplot2"
3-
subtitle: "SISBID 2017"
3+
subtitle: "SISBID 2017 <br> https://github.com/SISBID/Module2"
44
author: "Di Cook ([email protected], @visnut) <br> Heike Hofmann ([email protected], @heike_hh)"
55
date: "07/12-14/2017"
66
output:
@@ -28,12 +28,7 @@ knitr::opts_chunk$set(
2828
```
2929

3030
```{r echo=FALSE}
31-
library(ggplot2)
32-
library(tidyr)
33-
library(dplyr)
34-
library(lubridate)
35-
library(scales)
36-
library(readr)
31+
library(tidyverse)
3732
library(ggmap)
3833
library(HLMdiag)
3934
library(RColorBrewer)
@@ -141,7 +136,7 @@ autism, x and y position along a line, point, none
141136

142137
```{r}
143138
ggplot(autism, aes(x=age2, y=vsae)) +
144-
geom_jitter()
139+
geom_jitter(width=0.3, height=0)
145140
```
146141

147142
---
@@ -182,15 +177,17 @@ ggplot(autism, aes(x=age2, y=vsae, group=childid)) +
182177

183178
```{r}
184179
ggplot(autism, aes(x=age2, y=vsae, group=childid)) +
185-
geom_point() + geom_line(alpha=0.5)
180+
geom_point() + geom_line(alpha=0.1)
186181
```
187182

188183
---
189184
# Log scale y?
190185

191186
```{r}
192187
ggplot(autism, aes(x=age2, y=vsae, group=childid)) +
193-
geom_point() + geom_line(alpha=0.5) + scale_y_log10()
188+
geom_point() + geom_line(alpha=0.2) + scale_y_log10()
189+
ggplot(autism, aes(x=age2, y=log(vsae), group=childid)) +
190+
geom_point() + geom_line(alpha=0.2)
194191
```
195192

196193
---
@@ -208,7 +205,7 @@ ggplot(autism, aes(x=age2, y=vsae, group=childid, colour=bestest2)) +
208205
ggplot(autism, aes(x=age2, y=vsae, colour=bestest2)) +
209206
geom_point(alpha=0.1) + geom_line(aes(group=childid), alpha=0.1) +
210207
geom_smooth(se=F) +
211-
scale_y_log10()
208+
scale_y_log10()
212209
```
213210

214211
---
@@ -308,6 +305,9 @@ fly$`How often do you travel by plane?` <-
308305
"Never","Once a year or less","Once a month or less",
309306
"A few times per month","A few times per week","Every day"))
310307
ggplot(fly, aes(x=`How often do you travel by plane?`)) + geom_bar() + coord_flip()
308+
ggplot(fly, aes(x=`How often do you travel by plane?`,
309+
fill=`How often do you travel by plane?`)) + geom_bar() + coord_flip() +
310+
scale_fill_brewer(palette="Dark2")
311311
```
312312

313313
---
@@ -412,6 +412,10 @@ ggplot(fly_sub, aes(x=`Do you ever recline your seat when you fly?`)) +
412412
geom_bar() +
413413
facet_wrap(~`Is itrude to recline your seat on a plane?`, ncol=3) +
414414
coord_flip()
415+
ggplot(fly_sub, aes(x=`Do you ever recline your seat when you fly?`,
416+
fill=`Is itrude to recline your seat on a plane?`)) +
417+
geom_bar(position="fill") +
418+
coord_flip()
415419
```
416420

417421

@@ -486,12 +490,12 @@ p + scale_fill_manual("", values=clrs) + theme(legend.position = "none")
486490
---
487491
# Hierarchy of mappings
488492

489-
- 1. Position - common scale (BEST)
490-
- 2. Position - nonaligned scale
491-
- 3. Length, direction, angle
492-
- 4. Area
493-
- 5. Volume, curvature
494-
- 6. Shading, color (WORST)
493+
- 1.Position - common scale (BEST)
494+
- 2.Position - nonaligned scale
495+
- 3.Length, direction, angle
496+
- 4.Area
497+
- 5.Volume, curvature
498+
- 6.Shading, color (WORST)
495499

496500
---
497501
# Pre-attentive

1.1-ggplot/index.html

+8-8
Original file line numberDiff line numberDiff line change
@@ -496,12 +496,12 @@
496496
---
497497
# Hierarchy of mappings
498498

499-
- 1. Position - common scale (BEST)
500-
- 2. Position - nonaligned scale
501-
- 3. Length, direction, angle
502-
- 4. Area
503-
- 5. Volume, curvature
504-
- 6. Shading, color (WORST)
499+
- 1.Position - common scale (BEST)
500+
- 2.Position - nonaligned scale
501+
- 3.Length, direction, angle
502+
- 4.Area
503+
- 5.Volume, curvature
504+
- 6.Shading, color (WORST)
505505

506506
---
507507
# Pre-attentive
@@ -627,7 +627,7 @@
627627
"highlightLines": true,
628628
"countIncrementalSlides": false
629629
});
630-
if (window.HTMLWidgets) slideshow.on('showSlide', function (slide) {setTimeout(function() {window.dispatchEvent(new Event('resize'));}, 100)});</script>
630+
if (window.HTMLWidgets) slideshow.on('afterShowSlide', function (slide) {window.dispatchEvent(new Event('resize'));});</script>
631631

632632
<script type="text/x-mathjax-config">
633633
MathJax.Hub.Config({
@@ -641,7 +641,7 @@
641641
(function () {
642642
var script = document.createElement('script');
643643
script.type = 'text/javascript';
644-
script.src = 'https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML';
644+
script.src = 'https://cdn.bootcss.com/mathjax/2.7.1/MathJax.js?config=TeX-MML-AM_CHTML';
645645
if (location.protocol !== 'file:' && /^https?:/.test(script.src))
646646
script.src = script.src.replace(/^https?:/, '');
647647
document.getElementsByTagName('head')[0].appendChild(script);

1.2-mvplot/index.Rmd

+104-3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
---
22
title: "Multivariate data plots"
3-
subtitle: "SISBID 2017"
3+
subtitle: "SISBID 2017 <br> https://github.com/SISBID/Module2"
44
author: "Di Cook ([email protected], @visnut) <br> Heike Hofmann ([email protected], @heike_hh)"
55
date: "07/12-14/2017"
66
output:
@@ -371,7 +371,7 @@ sig.tab <- sig.tab %>% filter(PValue < 0.01)
371371
---
372372

373373

374-
```{r}
374+
```{r fig.height=6, fig.width=6}
375375
sig.tab <- merge(sig.tab, coty[,1:7], by.x="genes", by.y="ID")
376376
ggscatmat(sig.tab, columns=5:10, alpha=0.1)
377377
```
@@ -417,13 +417,114 @@ ggplot(data=sig.tab) +
417417
geom_segment(aes(x=C_S1_R1, xend=C_S1_R2, y=C_S2_R1, yend=C_S2_R2))
418418
```
419419

420+
---
421+
# biobroom and limma
422+
423+
```{r}
424+
bb <- data.frame(read_tsv("../data/biotin-rma2.txt"))
425+
head(data.frame(bb[,-2]))
426+
row.names(bb) <- bb$Gene
427+
```
428+
429+
---
430+
# Looking at the gene expression data
431+
432+
```{r fig.width=6, fig.height=6}
433+
ggpairs(bb, columns=c(3,7,4,8))
434+
```
435+
436+
---
437+
# A porcupine plot again
438+
439+
```{r fig.width=6, fig.height=6}
440+
sub <- bb %>% select(Gene, biotin.WT.01.1, biotin.WT.02.1, biotin.bio101.4, biotin.bio102.4)
441+
ggplot(sub, aes(x=biotin.WT.01.1, xend=biotin.WT.02.1, y=biotin.bio101.4, yend=biotin.bio102.4)) +
442+
geom_segment() +
443+
theme(aspect.ratio = 1) +
444+
xlab("wildtype, control treatment") +
445+
ylab("mutant, treated")
446+
```
447+
448+
---
449+
# Fit a limma model
450+
451+
```{r}
452+
design <- expand.grid(type=c("wild", "mutant"), trt=c("control", "treatment"), rep=1:2)
453+
454+
fit <- lmFit(bb[,-(1:2)], model.matrix(~ type*trt, design))
455+
fit <- eBayes(fit)
456+
457+
head(topTable(fit))
458+
```
459+
460+
---
461+
class: inverse middle
462+
# Your Turn
463+
464+
![](lorikeets.png)
465+
466+
- For the previous example, try out what output the different broom functions (`glance`, `tidy`, `augment`) produce.
467+
- Create a Volcano plot for each of the model terms, i.e. plot estimates on x by log(p.values) on y.
468+
- Are there differences visible between the terms?
469+
470+
```{r, echo=FALSE, eval=FALSE}
471+
head(tidy(fit))
472+
ggplot(tidy(fit), aes(x=estimate, y=log(p.value), colour = p.value < 0.05)) +
473+
facet_wrap(~term) +
474+
geom_point() + ggtitle("Volcano Plots with limma")
475+
```
476+
477+
---
478+
479+
```{r}
480+
bbfit <- tidy(fit)
481+
ggplot(data=bbfit, aes(x=term, y=estimate, group=gene)) +
482+
geom_line(alpha=0.1) +
483+
geom_point(aes(color=log(p.value)), size=2, alpha=0.6)
484+
```
485+
486+
---
487+
488+
Is type*treatment interaction necessary? Very strong negative correlation is suspicious.
489+
490+
491+
```{r fig.show='hide'}
492+
bbfit_m <- bbfit %>% select(gene, term, estimate, p.value) %>%
493+
gather(fit.stat, value, -gene, -term) %>%
494+
unite(term_stat, term, fit.stat) %>%
495+
spread(term_stat, value) %>%
496+
rename(trt=trttreatment_estimate, mut=typemutant_estimate,
497+
int=`typemutant:trttreatment_estimate`,
498+
trtp=trttreatment_p.value, mutp=typemutant_p.value,
499+
intp=`typemutant:trttreatment_p.value`)
500+
ggpairs(bbfit_m, columns=c(2,4,6), upper=list(continuous="points"),
501+
ggplot2::aes(colour=intp)) + theme(aspect.ratio=1)
502+
```
503+
504+
---
505+
506+
```{r echo=FALSE, fig.width=8, fig.height=8}
507+
ggpairs(bbfit_m, columns=c(2,4,6), upper=list(continuous="points"),
508+
ggplot2::aes(colour=intp)) + theme(aspect.ratio=1)
509+
```
510+
511+
---
512+
513+
```{r}
514+
fit2 <- lmFit(bb[,-(1:2)], model.matrix(~ type+trt, design))
515+
fit2 <- eBayes(fit2)
516+
bbfit2 <- tidy(fit2)
517+
ggplot(data=bbfit2, aes(x=term, y=estimate, group=gene)) +
518+
geom_line(alpha=0.1) +
519+
geom_point(aes(color=log(p.value)), size=2, alpha=0.6)
520+
```
521+
420522
---
421523
# Resources
422524

423525
- [GGobi web site](http://www.ggobi.org), [ggobi book](http://www.ggobi.org/book)
424526
- Emerson et al (2013) The Generalized Pairs Plot, Journal of Computational and Graphical Statistics, 22:1, 79-91
425527
- Cook et al (2007) Exploring Gene Expression Data, Using Plots, Journal of Data Science, 5(2):151-182
426-
- [Antony Unwin, Graphical Data Analysis with R](https://www.crcpress.com/Graphical-Data-Analysis-with-R/Unwin/9781498715232)
427528

428529
---
429530
# Share and share alike

1.2-mvplot/index.html

+3-4
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
class: center, middle, inverse, title-slide
1313

1414
# Multivariate data plots
15-
## SISBID 2017
15+
## SISBID 2017 <br> <a href="https://github.com/SISBID/Module2" class="uri">https://github.com/SISBID/Module2</a>
1616
### Di Cook (<a href="mailto:[email protected]">[email protected]</a>, <span class="citation">@visnut</span>) <br> Heike Hofmann (<a href="mailto:[email protected]">[email protected]</a>, <span class="citation">@heike_hh</span>)
1717
### 07/12-14/2017
1818

@@ -347,7 +347,6 @@
347347
- [GGobi web site](http://www.ggobi.org), [ggobi book](http://www.ggobi.org/book)
348348
- Emerson et al (2013) The Generalized Pairs Plot, Journal of Computational and Graphical Statistics, 22:1, 79-91
349349
- Cook et al (2007) Exploring Gene Expression Data, Using Plots, Journal of Data Science, 5(2):151-182
350-
- [Antony Unwin, Graphical Data Analysis with R](https://www.crcpress.com/Graphical-Data-Analysis-with-R/Unwin/9781498715232)
351350

352351
---
353352
# Share and share alike
@@ -360,7 +359,7 @@
360359
"highlightLines": true,
361360
"countIncrementalSlides": false
362361
});
363-
if (window.HTMLWidgets) slideshow.on('showSlide', function (slide) {setTimeout(function() {window.dispatchEvent(new Event('resize'));}, 100)});</script>
362+
if (window.HTMLWidgets) slideshow.on('afterShowSlide', function (slide) {window.dispatchEvent(new Event('resize'));});</script>
364363

365364
<script type="text/x-mathjax-config">
366365
MathJax.Hub.Config({
@@ -374,7 +373,7 @@
374373
(function () {
375374
var script = document.createElement('script');
376375
script.type = 'text/javascript';
377-
script.src = 'https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML';
376+
script.src = 'https://cdn.bootcss.com/mathjax/2.7.1/MathJax.js?config=TeX-MML-AM_CHTML';
378377
if (location.protocol !== 'file:' && /^https?:/.test(script.src))
379378
script.src = script.src.replace(/^https?:/, '');
380379
document.getElementsByTagName('head')[0].appendChild(script);

2.1-tidyr/index.Rmd

+16-4
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,13 @@ student2012.sub <- readRDS("../data/student_sub.rds")
6363
student2012.sub %>% group_by(CNT) %>% tally()
6464
```
6565

66+
---
67+
# What is **tidy data**?
68+
69+
- Why do we want tidy data?
70+
- Couple of exercises to look closer at what we find in practice
71+
72+
6673
---
6774
class: inverse middle
6875
# Your turn 1
@@ -180,10 +187,10 @@ There are various features of messy data that one can observe in practice. Here
180187
---
181188
# Tidy Verbs
182189

183-
- **gather**: specify the **keys** (identifiers) and the **values** (measures) to make long form (used to be called melting)
184-
- **spread**: variables in columns (used to be called casting)
185-
- nest/unnest: working with lists
186-
- separate/unite: split and combine columns
190+
- `gather`: specify the **keys** (identifiers) and the **values** (measures) to make long form (used to be called melting)
191+
- `spread`: variables in columns (used to be called casting)
192+
- `nest`/`unnest`: working with list variables
193+
- `separate`/`unite`: split and combine columns
187194

188195
---
189196
# French Fries
@@ -264,6 +271,7 @@ ff.m <- french_fries %>%
264271
head(ff.m)
265272
```
266273

274+
--
267275

268276
```{r, fig.height=2, fig.width=8}
269277
ggplot(data=ff.m, aes(x=rating)) + geom_histogram(binwidth=2) +
@@ -291,6 +299,10 @@ We need to gather the data into long form, and then get the replicates spread in
291299

292300
```{r}
293301
head(ff.m)
302+
```
303+
304+
305+
```{r}
294306
ff.s <- ff.m %>% spread(rep, rating)
295307
head(ff.s)
296308
```

0 commit comments

Comments
 (0)