Skip to content

Commit c5d5f67

Browse files
committed
restructure output of phylotax()
1 parent b9fba0e commit c5d5f67

14 files changed

+160
-67
lines changed

DESCRIPTION

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Package: phylotax
22
Type: Package
33
Title: Refine taxonomic assignment of environmental sequences using a taxonomic tree
4-
Version: 0.0.2.1
4+
Version: 0.0.3
55
Authors@R: person(given = "Brendan", family = "Furneaux",
66
email = "[email protected]",
77
role = c("aut", "cre"))

NEWS.md

+17
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,20 @@
1+
# phlotax 0.0.3
2+
3+
* **BREAKING CHANGE** `phylotax` returns taxonomic tables in four categories;
4+
* "`tip_taxa`" is the assignments which PHYLOTAX has made itself.
5+
* "`rejected`" are primary assignments which PHYLOTAX has rejected.
6+
* "`retained`" are primary assignments which PHYLOTAX has not rejected;
7+
however some of them may still be ambiguous.
8+
* "`missing`" are primary assignments whose labels are not present in the
9+
tree, so PHYLOTAX has not done anything with them. (But note that this will
10+
be empty if no tree was given).
11+
* `phylotax()` now returns an S3 object of class "`phylotax`". This should not
12+
break anything, and it allows the possibility of nice improvements in the
13+
future.
14+
* `phylotax()$node_taxa` now includes a "`label`" column, and populates it with
15+
node labels if they exist, or just the numbers if they don't. Node labels are
16+
also used in trace output.
17+
118
# phylotax 0.0.2.1
219

320
* Two quick bugfix, applying to errors in `taxonomy_sintax()` and

R/taxonomy.R

+49-26
Original file line numberDiff line numberDiff line change
@@ -541,46 +541,54 @@ new_phylotax_env <- function(tree, taxa, parent = parent.frame()) {
541541
.parent = parent,
542542
node_taxa = tibble::tibble(
543543
node = integer(),
544+
label = NULL,
544545
rank = taxa$rank[FALSE],
545546
taxon = character()
546547
),
547-
tip_taxa = dplyr::filter(taxa, .data$label %in% tree$tip.label),
548+
tip_taxa = dplyr::filter(taxa, FALSE),
549+
retained = dplyr::filter(taxa, .data$label %in% tree$tip.label),
550+
rejected = dplyr::filter(taxa, FALSE),
551+
missing = dplyr::filter(taxa, !.data$label %in% tree$tip.label),
548552
tree = tree
549553
)
550554
}
551555

552556

553557
phylotax_ <- function(tree, taxa, node, ranks, method, e) {
554558
if (length(ranks) == 0) return()
555-
559+
nodelabel <- if (!is.null(tree$node.label)) {
560+
tree$node.label[node - ape::Ntip(tree)]
561+
} else {
562+
as.character(node)
563+
}
556564
parents <- phangorn::Ancestors(tree, node, type = "all")
557565
for (r in ranks) {
558566
if (is.ordered(ranks)) r <- ordered(r, levels = levels(ranks))
559567
if (any(e$node_taxa$node %in% parents & e$node_taxa$rank == r)) next
560-
taxon <- clade_taxon(tree, e$tip_taxa, node, r)
568+
taxon <- clade_taxon(tree, e$retained, node, r)
561569
if (is.na(taxon)) {
562-
futile.logger::flog.debug("Could not assign a %s to node %d.", r, node)
570+
futile.logger::flog.debug("Could not assign a %s to node %s.", r, nodelabel)
563571
for (n in phangorn::Children(tree, node)) {
564-
phylotax_(tree, e$tip_taxa, n, ranks, method, e)
572+
phylotax_(tree, e$retained, n, ranks, method, e)
565573
}
566574
break
567575
} else {
568576
children <- phangorn::Children(tree, node)
569577
if (length(children) > 0) {
570578
futile.logger::flog.info(
571-
"Assigned node %d and its %d children to %s %s.",
572-
node, length(children), as.character(r), taxon)
579+
"Assigned node %s and its %d children to %s %s.",
580+
nodelabel, length(children), as.character(r), taxon)
573581
} else {
574-
futile.logger::flog.info("Assigned node %d to %s %s.", node,
582+
futile.logger::flog.info("Assigned node %s to %s %s.", nodelabel,
575583
as.character(r), taxon)
576584
}
577585
ranks <- ranks[-1]
578586
e$node_taxa <- dplyr::bind_rows(
579587
e$node_taxa,
580-
tibble::tibble(node = node, rank = r, taxon = taxon)
588+
tibble::tibble(node = node, label = nodelabel, rank = r, taxon = taxon)
581589
)
582590
tips <- tree$tip.label[phangorn::Descendants(tree, node, type = "tips")[[1]]]
583-
wrongTaxa <- e$tip_taxa %>%
591+
wrongTaxa <- e$retained %>%
584592
dplyr::filter(
585593
.data$label %in% tips,
586594
.data$rank == r,
@@ -596,11 +604,16 @@ phylotax_ <- function(tree, taxa, node, ranks, method, e) {
596604
newAssign[[n]] <- unname(method[n])
597605
}
598606
# remove assignments which are not consistent with the one we just chose
599-
e$tip_taxa <- dplyr::bind_rows(
600-
dplyr::filter(e$tip_taxa, .data$rank < r),
601-
dplyr::filter(e$tip_taxa, .data$rank >= r) %>%
602-
dplyr::anti_join(wrongTaxa, by = names(wrongTaxa)),
603-
newAssign
607+
e$tip_taxa <- dplyr::bind_rows(e$tip_taxa, newAssign)
608+
e$rejected <- dplyr::bind_rows(
609+
e$rejected,
610+
dplyr::filter(e$retained, .data$rank >= r) %>%
611+
dplyr::semi_join(wrongTaxa, by = names(wrongTaxa))
612+
)
613+
e$retained <- dplyr::bind_rows(
614+
dplyr::filter(e$retained, .data$rank < r),
615+
dplyr::filter(e$retained, .data$rank >= r) %>%
616+
dplyr::anti_join(wrongTaxa, by = names(wrongTaxa))
604617
)
605618
}
606619
}
@@ -647,14 +660,20 @@ phylotax_ <- function(tree, taxa, node, ranks, method, e) {
647660
#' treat each unique combination of values in these columns as a distinct
648661
#' method.
649662
#'
650-
#' @return a list with two elements, "tip_taxa" and "node_taxa". "tip_taxa" is
651-
#' a `tibble::tibble()` with the same format as `taxa`, in
652-
#' which assignments which are inconsistent with the phylogeny have been
653-
#' removed, and new assignments deduced or confirmed from the phylogeny.
654-
#' These are identified by the value "phylotax" in the "method" column,
655-
#' which is created if it does not already exist. "node_taxa" has columns
656-
#' "node", "rank" and "taxon", giving taxonomic assignments for the nodes of
657-
#' the tree.
663+
#' @return an S3 object with class "`phylotax`", with five elements:
664+
#' * "`tip_taxa` a `tibble::tibble()` with the same format as `taxa`, containing
665+
#' taxonomy assignments made by PHYLOTAX to tips.
666+
#' * "`node_taxa`" a `tibble::tibble()` with columns "`node`", "`label`",
667+
#' "`rank`" and "`taxon`" giving taxonomy assignments made by PHYLOTAX to
668+
#' internal nodes.
669+
#' * "`rejected`" a `tibble::tibble()` with the same format as `taxa` giving
670+
#' primary assignments which have been rejected by PHYLOTAX.
671+
#' * "`retained`" a `tibble::tibble()` with the same format as `taxa` giving
672+
#' primary assignments which have not been rehected by PHYLOTAX. These may
673+
#' contain inconsistencies that PHYLOTAX was unable to resolve.
674+
#' * "`missing`" a `tibble::tibble()` with the same format as `taxa`, giving the
675+
#' primary assignments which have not been assessed by PHULOTAX because they
676+
#' have labels which are not present on the tree.
658677
#'
659678
#' @export
660679
phylotax <- function(
@@ -670,9 +689,13 @@ phylotax <- function(
670689
e <- new_phylotax_env(tree, count_assignments(taxa), ranks)
671690
ranks <- sort(unique(taxa$rank))
672691
phylotax_(tree, taxa, phangorn::getRoot(tree), ranks, method, e)
673-
e$tip_taxa$n_tot <- NULL
674-
e$tip_taxa$n_diff <- NULL
675-
as.list(e)
692+
for (member in c("missing", "retained", "rejected", "tip_taxa"))
693+
for (n in c("n_tot", "n_diff"))
694+
e[[member]][[n]] <- NULL
695+
structure(
696+
as.list(e),
697+
class = "phylotax"
698+
)
676699
}
677700

678701
#' Simple phylogenetic tree for use in examples

README.Rmd

+13-2
Original file line numberDiff line numberDiff line change
@@ -76,13 +76,24 @@ supports it.
7676
phylotax_out <- phylotax(tree = example_tree(), taxa = example_taxa())
7777
```
7878

79-
PHYLOTAX returns a list containing the tree, taxa assigned to tips,
80-
and taxa assigned to nodes. Let's look at the tip taxa assignments.
79+
PHYLOTAX returns a list of class "`phylotax`" containing the tree,
80+
taxa assignments for tips and internal nodes, as well as tables dividing the
81+
primary assignments into those which were rejected, those which were retained,
82+
and those which were missing from the input tree.
8183

8284
```{r}
8385
phylotax_out$tip_taxa
8486
```
8587

88+
```{r}
89+
phylotax_out$retained
90+
```
91+
92+
```{r}
93+
phylotax_out$rejected
94+
```
95+
96+
8697
Phylotax has used the following logic:
8798

8899
1. It's not possible to decide what the root (node 1) is, because one of its

README.md

+49-29
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11

22
<!-- README.md is generated from README.Rmd. Please edit that file -->
33

4-
# phylotax
4+
phylotax
5+
========
56

67
<!-- badges: start -->
78

@@ -11,7 +12,8 @@ status](https://travis-ci.com/brendanf/phylotax.svg?branch=master)](https://trav
1112
coverage](https://codecov.io/gh/brendanf/phylotax/branch/master/graph/badge.svg)](https://codecov.io/gh/brendanf/phylotax?branch=master)
1213
<!-- badges: end -->
1314

14-
## Installation
15+
Installation
16+
------------
1517

1618
Install the development version from [GitHub](https://github.com/) with:
1719

@@ -20,7 +22,8 @@ Install the development version from [GitHub](https://github.com/) with:
2022
devtools::install_github("brendanf/phylotax")
2123
```
2224

23-
## Usage
25+
Usage
26+
-----
2427

2528
The PHYLOTAX algorithm takes as input taxonomic annotations from one or
2629
more primary taxonomic assignment algoirthms, and refines them using a
@@ -40,15 +43,14 @@ plot(example_tree(), show.node.label = TRUE)
4043

4144
And here is a set of taxonomic assignments for the tips of the tree,
4245
based on two hypothetical primary assignment algorithms “XTAX” and
43-
“YTAX”. Some of the tips have been assigned to two genera: “Tax1”
44-
and “Tax2”. The `phylotax` package includes the function `taxtable()`
45-
which can generate a table of this type based on the output of various
46-
primary assignment algorithms, but all that’s important is that it
47-
contains the columns “label”, “rank”, and “taxon”. The ranks need to be
48-
one of “rootrank”, “domain”, “kingdom”, “phylum”, “class”, “order”,
49-
“family”, “genus”, and “species”. Our example also has a “method”
50-
column, which PHYLOTAX uses to identify which assignments come from the
51-
same source.
46+
“YTAX”. Some of the tips have been assigned to two genera: “Tax1” and
47+
“Tax2”. The `phylotax` package includes the function `taxtable()` which
48+
can generate a table of this type based on the output of various primary
49+
assignment algorithms, but all that’s important is that it contains the
50+
columns “label”, “rank”, and “taxon”. The ranks need to be one of
51+
“rootrank”, “domain”, “kingdom”, “phylum”, “class”, “order”, “family”,
52+
“genus”, and “species”. Our example also has a “method” column, which
53+
PHYLOTAX uses to identify which assignments come from the same source.
5254

5355
``` r
5456
example_taxa()
@@ -87,28 +89,46 @@ tree supports it.
8789

8890
``` r
8991
phylotax_out <- phylotax(tree = example_tree(), taxa = example_taxa())
90-
#> INFO [2020-10-14 17:35:34] Assigned node 9 and its 2 children to genus Tax2.
91-
#> INFO [2020-10-14 17:35:34] Assigned node 10 and its 2 children to genus Tax1.
92+
#> INFO [2020-10-15 15:02:50] Assigned node 3 and its 2 children to genus Tax2.
93+
#> INFO [2020-10-15 15:02:50] Assigned node 4 and its 2 children to genus Tax1.
9294
```
9395

94-
PHYLOTAX returns a list containing the tree, taxa assigned to tips, and
95-
taxa assigned to nodes. Let’s look at the tip taxa assignments.
96+
PHYLOTAX returns a list of class “`phylotax`” containing the tree, taxa
97+
assignments for tips and internal nodes, as well as tables dividing the
98+
primary assignments into those which were rejected, those which were
99+
retained, and those which were missing from the input tree.
96100

97101
``` r
98102
phylotax_out$tip_taxa
99-
#> # A tibble: 10 x 4
100-
#> label method rank taxon
101-
#> <chr> <chr> <ord> <chr>
102-
#> 1 C XTAX genus Tax2
103-
#> 2 B YTAX genus Tax2
104-
#> 3 C YTAX genus Tax2
105-
#> 4 D YTAX genus Tax1
106-
#> 5 F YTAX genus Tax1
107-
#> 6 B PHYLOTAX genus Tax2
108-
#> 7 C PHYLOTAX genus Tax2
109-
#> 8 E PHYLOTAX genus Tax1
110-
#> 9 F PHYLOTAX genus Tax1
111-
#> 10 D PHYLOTAX genus Tax1
103+
#> # A tibble: 5 x 4
104+
#> label method rank taxon
105+
#> <chr> <chr> <ord> <chr>
106+
#> 1 B PHYLOTAX genus Tax2
107+
#> 2 C PHYLOTAX genus Tax2
108+
#> 3 E PHYLOTAX genus Tax1
109+
#> 4 F PHYLOTAX genus Tax1
110+
#> 5 D PHYLOTAX genus Tax1
111+
```
112+
113+
``` r
114+
phylotax_out$retained
115+
#> # A tibble: 5 x 4
116+
#> label method rank taxon
117+
#> <chr> <chr> <ord> <chr>
118+
#> 1 C XTAX genus Tax2
119+
#> 2 B YTAX genus Tax2
120+
#> 3 C YTAX genus Tax2
121+
#> 4 D YTAX genus Tax1
122+
#> 5 F YTAX genus Tax1
123+
```
124+
125+
``` r
126+
phylotax_out$rejected
127+
#> # A tibble: 2 x 4
128+
#> label method rank taxon
129+
#> <chr> <chr> <ord> <chr>
130+
#> 1 B XTAX genus Tax1
131+
#> 2 D XTAX genus Tax2
112132
```
113133

114134
Phylotax has used the following logic:
701 Bytes
Loading

man/phylotax.Rd

+16-8
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/node_taxa.rds

247 Bytes
Binary file not shown.

tests/testthat/phylotax.rds

-500 Bytes
Binary file not shown.

tests/testthat/rejected.rds

256 Bytes
Binary file not shown.

tests/testthat/retained.rds

275 Bytes
Binary file not shown.

tests/testthat/test-consensus.R

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
consensus_test <- tibble::tribble(
2+
~label, ~method, ~rank, ~taxon,
3+
"A", "XTAX", "genus", "G1",
4+
"A", "XTAX", "species", "G1 s1",
5+
"A", "YTAX", "genus", "G2")
6+
7+
test_that("consensus is not assumed for lower taxa when higher taxa are inconsistent", {
8+
expect_equal(length(phylotax(taxa = consensus_test)$tip_taxa$method), 0)
9+
})

tests/testthat/test-phylotax.R

+6-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
phylotax_out <- phylotax(tree = example_tree(), taxa = example_taxa())
2+
13
test_that("phylotax does not revert", {
2-
expect_known_value(phylotax(tree = example_tree(), taxa = example_taxa()), "phylotax.rds", update = FALSE)
4+
expect_known_value(phylotax_out$rejected, "rejected.rds", update = FALSE)
5+
expect_known_value(phylotax_out$retained, "retained.rds", update = FALSE)
6+
expect_known_value(phylotax_out$tip_taxa, "tip_taxa.rds", update = FALSE)
7+
expect_known_value(phylotax_out$node_taxa, "node_taxa.rds", update = FALSE)
38
})

tests/testthat/tip_taxa.rds

282 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)