forked from microbiome/tutorials
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathComposition.Rmd
204 lines (152 loc) · 6.32 KB
/
Composition.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
---
title: "Microbiome composition"
author: "Leo Lahti, Sudarshan Shetty et al. `r Sys.Date()`"
bibliography:
- bibliography.bib
output:
BiocStyle::html_document:
number_sections: no
toc: yes
toc_depth: 4
toc_float: true
self_contained: true
thumbnails: true
lightbox: true
gallery: true
use_bookdown: false
highlight: haddock
---
<!--
%\VignetteEngine{knitr::rmarkdown}
%\VignetteIndexEntry{microbiome tutorial - composition}
%\usepackage[utf8]{inputenc}
%\VignetteEncoding{UTF-8}
%\setkeys{Gin}{width=\linewidth,height=\textheight,keepaspectratio}
-->
Read example data from a [diet swap study](http://dx.doi.org/10.1038/ncomms7342):
```{r composition-example1, warning=FALSE, message=FALSE}
# Example data
library(microbiome)
library(dplyr)
data(dietswap)
# Make sure we use functions from correct package
transform <- microbiome::transform
# Merge rare taxa to speed up examples
pseq <- transform(dietswap, "compositional")
pseq <- aggregate_rare(pseq, level = "Genus", detection = 1/100, prevalence = 50/100)
# Pick sample subset
library(phyloseq)
pseq2 <- subset_samples(pseq, group == "DI" & nationality == "AFR" & timepoint.within.group == 1)
# Normal western adults
data(atlas1006)
pseq3 <- atlas1006 %>%
subset_samples(DNA_extraction_method == "r") %>%
aggregate_taxa(level = "Phylum") %>%
microbiome::transform(transform = "compositional")
```
## Composition barplots
Same with compositional (relative) abundances; for each sample (left), or averafged by group (right).
```{r composition-example4b, fig.width=10, fig.height=6, out.width="80%", fig.show="hold", warning=FALSE, message=FALSE}
# Try another theme
# from https://github.com/hrbrmstr/hrbrthemes
library(hrbrthemes)
library(gcookbook)
library(tidyverse)
theme_set(theme_bw(21))
p <- pseq3 %>%
plot_composition(sample.sort = "Firmicutes", otu.sort = "abundance") +
# Set custom colors
scale_fill_manual(values = default_colors("Phylum")[taxa(pseq3)]) +
scale_y_continuous(label = scales::percent)
print(p)
```
```{r composition-example4c, fig.width=10, fig.height=8, out.width="50%", fig.show="hold", warning=FALSE, message=FALSE}
# Limit the analysis on core taxa and specific sample group
p <- plot_composition(pseq2,
taxonomic.level = "Genus",
sample.sort = "nationality",
x.label = "nationality") +
guides(fill = guide_legend(ncol = 1)) +
scale_y_percent() +
labs(x = "Samples", y = "Relative abundance (%)",
title = "Relative abundance data",
subtitle = "Subtitle",
caption = "Caption text.") +
theme_ipsum(grid="Y")
print(p)
# Averaged by group
p <- plot_composition(pseq2,
average_by = "bmi_group", transform = "compositional")
print(p)
p <- NULL
```
## Composition heatmaps
Heatmap for CLR-transformed abundances, with samples and OTUs sorted
with the neatmap method:
```{r composition-example7, fig.width=10, fig.height=8, out.width="800px"}
p <- plot_composition(microbiome::transform(pseq, "compositional"),
plot.type = "heatmap",
sample.sort = "neatmap", otu.sort = "neatmap")
print(p)
```
## Plot taxa prevalence
This function
allows you to have an overview of OTU prevalences alongwith their
taxonomic affiliations. This will aid in checking if you filter OTUs
based on prevalence, then what taxonomic affliations will be lost.
```{r plot_prev, fig.height=6, fig.width=8, dev="CairoPNG", out.width="100%"}
data(atlas1006)
# Use sample and taxa subset to speed up example
p0 <- subset_samples(atlas1006, DNA_extraction_method == "r")
# Define detection and prevalence thresholds to filter out rare taxa
p0 <- core(p0, detection = 0.1/100, prevalence = 1/100)
# For the available taxonomic levels
plot_taxa_prevalence(p0, "Phylum", detection = 0.1/100)
```
## Amplicon data
Also see [phyloseq barplot examples](http://joey711.github.io/phyloseq/plot_bar-examples.html).
Check the [core microbiome page](http://microbiome.github.io/tutorials/CoremicrobiotaAmplicon.html) which shows how to read the your files into R and make a phyloseq object.
Here, an example data is used which is not available in the package currently due to size limitations. We will directly strat from a ready phyloseq object. Please test your own data and give your feedback on the [issues page](https://github.com/microbiome/microbiome/issues).
```{r composition-amplicon-example1, warning=FALSE, message=FALSE}
# Example data
library(microbiome)
# Try another theme
# from https://github.com/hrbrmstr/hrbrthemes
# you can install these if you don't have it already.
# devtools::install_github("hrbrmstr/hrbrthemes")
#library(devtools)
# install_github("microsud/microbiomeutilities")
library(hrbrthemes)
library(microbiomeutilities)
library(gcookbook)
library(tidyverse)
library(dplyr)
data("zackular2014")
#data("DynamicsIBD") #This data is currently unavialable due to size limitations
ps1 <- zackular2014
colnames(tax_table(ps1))
```
As you can see the taxonomic classification is just lablled as "Rank1" ... "Rank7". We need to change this to proper designation and also do some formatting of the data. This can be a useful example for understanding simple file processing in R.
```{r, composition-amplicon-example1a}
# First change the column names of the taxonomy table in phyloseq to following:
colnames(tax_table(ps1)) <- c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species" )
tax_table(ps1)[tax_table(ps1)[,"Kingdom"]== "NA", "Kingdom" ] <- "Unidentified_Kingdom"
tax_table(ps1)[tax_table(ps1)[,"Phylum"]== "p__", "Phylum" ] <- "p__Unidentified_Phylum"
# make a dataframe for taxonomy information.
taxic <- as.data.frame(ps1@tax_table)
otu.df <- abundances(ps1)
# make a dataframe for OTU information.
otu.df <- as.data.frame(otu.df)
# check the rows and columns
# head(otu.df)
# Add the OTU ids from OTU table into the taxa table at the end.
taxic$OTU <- row.names.data.frame(otu.df)
# You can see that we now have extra taxonomy levels.
colnames(taxic)
# convert it into a matrix.
taxmat <- as.matrix(taxic)
# convert into phyloseq compaitble file.
new.tax <- tax_table(taxmat)
# incroporate into phyloseq Object
tax_table(ps1) <- new.tax
```