diff --git a/NEWS.md b/NEWS.md
index 03153ef..1812e71 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -5,6 +5,7 @@
- New function `clear_cached_resources()` to remove the session cache and force a reload.
- `load_taxonomic_resources()` now works offline when parquet files have been previously downloaded; `default_version()` falls back to the most recently cached local version when no internet connection is available.
- Internal taxonomic resource tables renamed to snake_case; `family` column added to resource tables.
+- Functions `create_species_state_origin_matrix()` and `state_diversity_counts()` now includes the parameter `include_infrataxa`, allowing users to select whether just species-rank taxa or species and infra-specific taxa are output in the table. When `create_species_state_origin_matrix()` is called by `native_anywhere_in_australia()`, `include_infrataxa = TRUE` is set as the default, so infrataxa can also be checked by this function.
# APCalign 1.1.6
diff --git a/R/APCalign-package.R b/R/APCalign-package.R
index 3e83c64..7f05257 100644
--- a/R/APCalign-package.R
+++ b/R/APCalign-package.R
@@ -14,7 +14,7 @@
#' [GitHub repository](https://github.com/traitecoevo/APCalign/issues)
#' @keywords internal
#' @section Functions:
-#' **Standarise taxon names**
+#' **Standardise taxon names**
#'
#' * [load_taxonomic_resources]
#' * [create_taxonomic_update_lookup]
diff --git a/R/align_taxa.R b/R/align_taxa.R
index 1bdc086..8ddb2bd 100644
--- a/R/align_taxa.R
+++ b/R/align_taxa.R
@@ -333,12 +333,20 @@ align_taxa <- function(original_name,
dplyr::filter(original_name %in% resources$APC_accepted$canonical_name) %>%
dplyr::distinct(original_name) %>%
nrow()
-
+
+ synonym_matches <- taxa$tocheck %>%
+ dplyr::filter(original_name %in% resources$APC_synonyms$canonical_name) %>%
+ dplyr::filter(!original_name %in% resources$APC_accepted$canonical_name) %>%
+ dplyr::distinct(original_name) %>%
+ nrow()
+
if(!quiet)
message(
" -> of these ",
crayon::blue(perfect_matches),
- " names have a perfect match to a scientific name in the APC.
+ " names have a perfect match to an accepted scientific name in the APC, and ",
+ crayon::blue(synonym_matches),
+ " names have a perfect match to a synonym in the APC.
Alignments being sought for remaining names."
)
}
diff --git a/R/create_species_state_origin_matrix.R b/R/create_species_state_origin_matrix.R
index c95d0fa..5510121 100644
--- a/R/create_species_state_origin_matrix.R
+++ b/R/create_species_state_origin_matrix.R
@@ -24,7 +24,7 @@
#' @seealso \code{\link{load_taxonomic_resources}}
#'
#' @examples
-#' \donttest{create_species_state_origin_matrix()}#'
+#' \donttest{create_species_state_origin_matrix()}
#' \donttest{create_species_state_origin_matrix(include_infrataxa = TRUE)}
#'
#'
diff --git a/R/load_taxonomic_resources.R b/R/load_taxonomic_resources.R
index 77d74d6..58c8722 100644
--- a/R/load_taxonomic_resources.R
+++ b/R/load_taxonomic_resources.R
@@ -139,7 +139,7 @@ load_taxonomic_resources <-
family,
genus
) %>%
- dplyr::arrange(taxonomic_status) %>%
+ dplyr::arrange(relevel_taxonomic_status_preferred_order(taxonomic_status)) %>%
dplyr::mutate(
## strip_names removes punctuation and filler words associated with
## infraspecific taxa (subsp, var, f, ser)
@@ -233,6 +233,7 @@ load_taxonomic_resources <-
taxonomic_resources[["genera_synonym"]] <-
apc_genera %>%
dplyr::filter(!canonical_name %in% taxonomic_resources$genera_accepted$canonical_name) %>%
+ dplyr::arrange(relevel_taxonomic_status_preferred_order(taxonomic_status)) %>%
dplyr::mutate(taxonomic_dataset = "APC") %>%
dplyr::distinct(canonical_name, .keep_all = TRUE)
diff --git a/R/native_anywhere_in_australia.R b/R/native_anywhere_in_australia.R
index 0063a4c..7e4bf98 100644
--- a/R/native_anywhere_in_australia.R
+++ b/R/native_anywhere_in_australia.R
@@ -19,8 +19,6 @@
#' @param resources An optional list of taxonomic resources to use for the lookup.
#' If not provided, the function will load default taxonomic resources using the
#' `load_taxonomic_resources()` function.
-#' @param include_infrataxa option to include subspecies, varieties and forms in the output.
-#' Set to false as the default, outputting results just for species-rank taxa.
#'
#' @return A tibble with two columns: `species`, which is the same as the unique values of
#' the input `species`, and `native_anywhere_in_aus`, a vector indicating whether each
@@ -30,10 +28,10 @@
#' @examples
#' \donttest{native_anywhere_in_australia(c("Eucalyptus globulus","Pinus radiata","Banksis notaspecies"))}
-native_anywhere_in_australia <- function(species, resources = load_taxonomic_resources(), include_infrataxa = FALSE) {
+native_anywhere_in_australia <- function(species, resources = load_taxonomic_resources()) {
# Create lookup tables
- full_lookup <- create_species_state_origin_matrix(resources = resources, include_infrataxa = include_infrataxa)
+ full_lookup <- create_species_state_origin_matrix(resources = resources, include_infrataxa = TRUE)
if(is.null(resources)){
message("Not finding taxonomic resources; check internet connection?")
diff --git a/R/standardise_names.R b/R/standardise_names.R
index 1f68596..dc39b37 100644
--- a/R/standardise_names.R
+++ b/R/standardise_names.R
@@ -118,7 +118,7 @@ standardise_names <- function(taxon_names) {
f("(\\s|\\()s\\.lat\\.(\\s|\\))", "") %>%
f("(\\s|\\()s\\.str\\.(\\s|\\))", "") %>%
- ## standarise "ser"
+ ## standardise "ser"
f("\\sser(\\s|\\.\\s)", " ser. ") %>%
f("\\sseries(\\s|\\.\\s)", " ser. ") %>%
@@ -133,7 +133,7 @@ standardise_names <- function(taxon_names) {
#' the first two words of the taxon name are extracted (e.g. "x Cynochloris"),
#' while for a non-hybrid genus just the first word is extracted (e.g. "Banksia").
#'
-#' @param taxon_name
+#' @param taxon_name A character vector of scientific names.
#'
#' @return The genus for a scientific name.
#'
diff --git a/R/synonyms_for_accepted_names.R b/R/synonyms_for_accepted_names.R
index 4be1725..732fef2 100644
--- a/R/synonyms_for_accepted_names.R
+++ b/R/synonyms_for_accepted_names.R
@@ -1,8 +1,8 @@
#' @title Synonyms for Currently Accepted Names
#'
#' @description
-#' This function generates lists a string of synonyms for currently accepted names to facilitate working out past names of a taxon
-#' when the current name is known
+#' This function generates lists a string of synonyms for currently accepted species and infra-species to facilitate working out past names of a taxon
+#' when the current name is known.
#'
#' @param accepted_names A character vector of currently accepted taxon names to look up synonyms for.
#' @param collapse Offering the option to return a long data table with each synonym in its own row,
@@ -32,6 +32,11 @@ synonyms_for_accepted_names <- function(accepted_names, collapse = TRUE, resourc
dplyr::select(accepted_name_usage_ID, accepted_name = canonical_name) |>
dplyr::filter(accepted_name %in% accepted_names)
+ if(nrow(accepted_names_with_usageID) == 0){
+ message("None of the taxon names you submitted are accepted by the APC. Look within `resources$APC_accepted` to ensure you have a properly formatted name.")
+ return(NULL)
+ }
+
# preferred order of taxonomic updates (function from `update_taxonomy.R`)
relevel_taxonomic_status_preferred_order <- function(taxonomic_status) {
@@ -66,12 +71,12 @@ synonyms_for_accepted_names <- function(accepted_names, collapse = TRUE, resourc
)
}
- # generate list of accepted_name_usage_ID's for accepted species
+ # Generate list of accepted_name_usage_ID's for accepted species
APC_synonyms_tmp <- resources$APC |>
dplyr::filter(taxon_rank %in% c("species", "variety", "form", "subspecies")) |>
# merge currently accepted names for each taxon onto all the synonyms
dplyr::right_join(accepted_names_with_usageID, by = "accepted_name_usage_ID") |>
- dplyr::select(canonical_name, taxonomic_status, accepted_name, accepted_name_usage_ID) |>
+ dplyr::select(canonical_name, taxonomic_status, accepted_name, accepted_name_usage_ID, taxon_ID) |>
# remove the accepted names themselves
dplyr::filter(taxonomic_status != "accepted") |>
dplyr::mutate(
@@ -93,22 +98,23 @@ synonyms_for_accepted_names <- function(accepted_names, collapse = TRUE, resourc
dplyr::distinct(accepted_name_usage_ID, synonyms)
accepted_names_with_synonyms <- resources$APC |>
- dplyr::select(canonical_name, taxon_rank, name_type, genus, family, scientific_name, accepted_name_usage_ID) |>
+ dplyr::select(canonical_name, family, scientific_name, accepted_name_usage_ID) |>
dplyr::filter(canonical_name %in% accepted_names_with_usageID$accepted_name & accepted_name_usage_ID %in% accepted_names_with_usageID$accepted_name_usage_ID) |>
dplyr::distinct(canonical_name, .keep_all = TRUE) |>
dplyr::left_join(APC_synonyms, by = "accepted_name_usage_ID") |>
- dplyr::rename(taxon_name = canonical_name) |>
- dplyr::arrange(family, taxon_name)
+ dplyr::select(family, accepted_name = canonical_name, synonyms, scientific_name, accepted_name_usage_ID) |>
+ dplyr::arrange(family, accepted_name)
} else {
# Create a long list if collapse = F, with one row per synonym
accepted_names_with_synonyms <- resources$APC |>
- dplyr::select(canonical_name, taxon_rank, name_type, genus, family, scientific_name, accepted_name_usage_ID) |>
+ dplyr::select(canonical_name, family, scientific_name, accepted_name_usage_ID) |>
dplyr::filter(canonical_name %in% accepted_names_with_usageID$accepted_name & accepted_name_usage_ID %in% accepted_names_with_usageID$accepted_name_usage_ID) |>
dplyr::distinct(canonical_name, .keep_all = TRUE) |>
dplyr::select(-canonical_name) |>
dplyr::left_join(APC_synonyms_tmp, by = "accepted_name_usage_ID") |>
+ dplyr::select(family, accepted_name, synonym = canonical_name, taxonomic_status, scientific_name, accepted_name_usage_ID, taxon_ID) |>
dplyr::arrange(family, accepted_name)
}
diff --git a/R/update_taxonomy.R b/R/update_taxonomy.R
index a09f935..4dd8f8b 100644
--- a/R/update_taxonomy.R
+++ b/R/update_taxonomy.R
@@ -600,7 +600,7 @@ update_taxonomy_APC_species_and_infraspecific_taxa <- function(data, resources,
suggested_name = ifelse(!is.na(suggested_collapsed_name), suggested_collapsed_name, suggested_name),
## these are occasionally taxa where the `accepted_name_usage_ID` links to a taxon that is "known" by APC, but doesn't have taxonomic_status = "accepted"
## for these taxa, the suggested name is the `canonical_name` associated with the particular `accepted_name_usage_ID`
- suggested_name = ifelse(is.na(suggested_name) & !is.na(taxon_ID), resources$APC_synonyms$canonical_name[match(taxon_ID,resources$APC_synonyms$accepted_name_usage_ID)], suggested_name),
+ suggested_name = ifelse(is.na(suggested_name) & !is.na(taxon_ID), resources$APC$canonical_name[match(taxon_ID, resources$APC$taxon_ID)], suggested_name),
## if there are no "accepted names" (or similar), the aligned name becomes the suggested name
suggested_name = ifelse(is.na(suggested_name), aligned_name, suggested_name),
taxonomic_status = ifelse(is.na(accepted_name), taxonomic_status_aligned, "accepted"),
diff --git a/README.Rmd b/README.Rmd
index 0bc1a64..04530a2 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -26,16 +26,26 @@ library(APCalign)
# APCalign
-`APCalign` uses the [Australian Plant Census (APC)](https://biodiversity.org.au/nsl/services/search/taxonomy) and [Australian Plant Name Index](https://biodiversity.org.au/nsl/services/search/names) to align and update Australian plant taxon names. 'APCalign' also supplies information about the established status (i.e., native/introduced) of plant taxa within different states/territories as compiled by the APC. It's useful for updating species list and intersecting them with the APC consensus for both taxonomy and establishment status.
+When working with biodiversity data, it is important to verify taxonomic names with an authoritative list and correct any out-of-date names or names with typos.
-DOI: https://doi.org/10.1071/BT24014
+The 'APCalign' package simplifies this process by:
+
+- Accessing up-to-date taxonomic information from the [Australian Plant Census](https://biodiversity.org.au/nsl/services/search/taxonomy) and the [Australia Plant Name Index](https://biodiversity.org.au/nsl/services/search/names).
+- Aligning authoritative names to your taxonomic names using our [fuzzy matching algorithm](https://traitecoevo.github.io/APCalign/articles/updating-taxon-names.html)
+- Updating your taxonomic names in a transparent, reproducible manner
+- Because APCalign was developed explicitly for the Australian flora it handles phrase names and aligns disparate phrase name syntax
+- Indicating when a split leads to uncertainty in a name alignment
+
+'APCalign' also supplies information about the established status (i.e., native/introduced) of plant taxa within different states/territories as compiled by the APC. It's useful for updating species list and intersecting them with the APC consensus for both taxonomy and establishment status.
+
+Read the [APCalign paper](https://doi.org/10.1071/BT24014) to learn more about the motivations for this project and our fuzzy matching and aligning algorithms.
## Installation 🛠️
From CRAN:
```{r install, eval= FALSE}
- install.packages("APCalign")
+install.packages("APCalign")
library(APCalign)
```
@@ -47,6 +57,8 @@ install.packages("remotes")
remotes::install_github("traitecoevo/APCalign")
```
+Or for the ShinyApp head to [unsw.shinyapps.io/APCalign-app](https://unsw.shinyapps.io/APCalign-app/)
+
## A quick demo
Generating a look-up table can be done with just one function:
@@ -61,7 +73,7 @@ create_taxonomic_update_lookup(
)
```
-If you're going to use `APCalign` more than once, it will save you time to load the taxonomic resources into memory first:
+You can alternatively load the taxonomic resources into memory first:
```{r,message=FALSE}
tax_resources <- load_taxonomic_resources()
@@ -69,8 +81,20 @@ tax_resources <- load_taxonomic_resources()
create_taxonomic_update_lookup(
taxa = c(
"Banksia integrifolia",
+ "Banksya integrifolla",
+ "Banksya integriifolla",
+ "Banksyya integriifolla",
+ "Banksia red flowers",
+ "Banksia sp.",
+ "Banksia catoglypta",
+ "Dryandra catoglypta",
+ "Dryandra cataglypta",
+ "Dryandra australis",
"Acacia longifolia",
"Commersonia rosea",
+ "Panicum sp. Hairy glumes (C.R.Michell 4192)",
+ "Panicum sp. Hairy glumes (Michell)",
+ "Panicum sp. Hairy glumes",
"not a species"
),
resources = tax_resources
@@ -81,9 +105,16 @@ Checking for a list of species to see if they are classified as Australian nativ
```{r, message=FALSE}
native_anywhere_in_australia(c("Eucalyptus globulus","Pinus radiata"), resources = tax_resources)
+```
+Determining the number of species present in NSW and their establishment means:
+```{r, message=FALSE}
+state_diversity_counts("NSW", resources = tax_resources)
```
+The related function `create_species_state_origin_matrix()` generates a table for all taxa in Australia, indicating their distribution and establishment means, by state.
+
+
Getting a family lookup table for genera from the specified taxonomy:
```{r, message=FALSE}
@@ -96,6 +127,14 @@ get_apc_genus_family_lookup(c("Eucalyptus",
resources = tax_resources)
```
+Compiling a list of outdated synonyms for currently accepted names:
+
+```{r, message=FALSE}
+names_to_check <- c("Acacia aneura", "Banksia nivea", "Cardamine gunnii", "Stenocarpus sinuatus")
+synonyms_for_accepted_names(resources = tax_resources, accepted_names = names_to_check, collapse = T)
+```
+
+
## Cheatsheet
diff --git a/README.md b/README.md
index 3ece52b..442023a 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,6 @@
+
[](https://lifecycle.r-lib.org/articles/stages.html#stable)
@@ -11,24 +12,40 @@ coverage](https://codecov.io/gh/traitecoevo/APCalign/branch/master/graph/badge.s
# APCalign
-`APCalign` uses the [Australian Plant Census
-(APC)](https://biodiversity.org.au/nsl/services/search/taxonomy) and
-[Australian Plant Name
-Index](https://biodiversity.org.au/nsl/services/search/names) to align
-and update Australian plant taxon names. ‘APCalign’ also supplies
-information about the established status (i.e., native/introduced) of
-plant taxa within different states/territories as compiled by the APC.
-It’s useful for updating species list and intersecting them with the APC
-consensus for both taxonomy and establishment status.
-
-DOI:
+When working with biodiversity data, it is important to verify taxonomic
+names with an authoritative list and correct any out-of-date names or
+names with typos.
+
+The ‘APCalign’ package simplifies this process by:
+
+- Accessing up-to-date taxonomic information from the [Australian Plant
+ Census](https://biodiversity.org.au/nsl/services/search/taxonomy) and
+ the [Australia Plant Name
+ Index](https://biodiversity.org.au/nsl/services/search/names).
+- Aligning authoritative names to your taxonomic names using our [fuzzy
+ matching
+ algorithm](https://traitecoevo.github.io/APCalign/articles/updating-taxon-names.html)
+- Updating your taxonomic names in a transparent, reproducible manner
+- Because APCalign was developed explicitly for the Australian flora it
+ handles phrase names and aligns disparate phrase name syntax
+- Indicating when a split leads to uncertainty in a name alignment
+
+‘APCalign’ also supplies information about the established status (i.e.,
+native/introduced) of plant taxa within different states/territories as
+compiled by the APC. It’s useful for updating species list and
+intersecting them with the APC consensus for both taxonomy and
+establishment status.
+
+Read the [APCalign paper](https://doi.org/10.1071/BT24014) to learn more
+about the motivations for this project and our fuzzy matching and
+aligning algorithms.
## Installation 🛠️
From CRAN:
``` r
- install.packages("APCalign")
+install.packages("APCalign")
library(APCalign)
```
@@ -40,6 +57,9 @@ install.packages("remotes")
remotes::install_github("traitecoevo/APCalign")
```
+Or for the ShinyApp head to
+[unsw.shinyapps.io/APCalign-app](https://unsw.shinyapps.io/APCalign-app/)
+
## A quick demo
Generating a look-up table can be done with just one function:
@@ -65,34 +85,55 @@ create_taxonomic_update_lookup(
#> # scientific_name , aligned_reason , update_reason ,
#> # number_of_collapsed_taxa
-If you’re going to use `APCalign` more than once, it will save you time
-to load the taxonomic resources into memory first:
+You can alternatively load the taxonomic resources into memory first:
``` r
tax_resources <- load_taxonomic_resources()
-```
- #> ================================================================================================================================================================
-
- create_taxonomic_update_lookup(
- taxa = c(
- "Banksia integrifolia",
- "Acacia longifolia",
- "Commersonia rosea",
- "not a species"
- ),
- resources = tax_resources
- )
- #> # A tibble: 4 × 12
- #> original_name aligned_name accepted_name suggested_name genus taxon_rank
- #>
- #> 1 Banksia integrifol… Banksia int… Banksia inte… Banksia integ… Bank… species
- #> 2 Acacia longifolia Acacia long… Acacia longi… Acacia longif… Acac… species
- #> 3 Commersonia rosea Commersonia… Androcalva r… Androcalva ro… Andr… species
- #> 4 not a species
- #> # ℹ 6 more variables: taxonomic_dataset , taxonomic_status ,
- #> # scientific_name , aligned_reason , update_reason ,
- #> # number_of_collapsed_taxa
+create_taxonomic_update_lookup(
+ taxa = c(
+ "Banksia integrifolia",
+ "Banksya integrifolla",
+ "Banksya integriifolla",
+ "Banksyya integriifolla",
+ "Banksia red flowers",
+ "Banksia sp.",
+ "Banksia catoglypta",
+ "Dryandra catoglypta",
+ "Dryandra cataglypta",
+ "Dryandra australis",
+ "Acacia longifolia",
+ "Commersonia rosea",
+ "Panicum sp. Hairy glumes (C.R.Michell 4192)",
+ "Panicum sp. Hairy glumes (Michell)",
+ "Panicum sp. Hairy glumes",
+ "not a species"
+ ),
+ resources = tax_resources
+)
+#> # A tibble: 16 × 12
+#> original_name aligned_name accepted_name suggested_name genus taxon_rank
+#>
+#> 1 Banksia integrifo… Banksia int… Banksia inte… Banksia integ… Bank… species
+#> 2 Banksya integrifo… Banksia int… Banksia inte… Banksia integ… Bank… species
+#> 3 Banksya integriif… Banksia int… Banksia inte… Banksia integ… Bank… species
+#> 4 Banksyya integrii… Banksia sp.… Banksia sp. [… Bank… genus
+#> 5 Banksia red flowe… Banksia sp.… Banksia sp. [… Bank… genus
+#> 6 Banksia sp. Banksia sp. Banksia sp. Bank… genus
+#> 7 Banksia catoglypta Banksia cat… Banksia cato… Banksia catog… Bank… species
+#> 8 Dryandra catoglyp… Dryandra ca… Banksia cato… Banksia catog… Bank… species
+#> 9 Dryandra cataglyp… Dryandra ca… Banksia cato… Banksia catog… Bank… species
+#> 10 Dryandra australis Dryandra sp… Dryandra sp. … Drya… genus
+#> 11 Acacia longifolia Acacia long… Acacia longi… Acacia longif… Acac… species
+#> 12 Commersonia rosea Commersonia… Androcalva r… Androcalva ro… Andr… species
+#> 13 Panicum sp. Hairy… Panicum sp.… Panicum sp. … Panicum sp. H… Pani… species
+#> 14 Panicum sp. Hairy… Panicum sp.… Panicum sp. … Panicum sp. H… Pani… species
+#> 15 Panicum sp. Hairy… Panicum sp.… Panicum sp. … Panicum sp. H… Pani… species
+#> 16 not a species
+#> # ℹ 6 more variables: taxonomic_dataset , taxonomic_status ,
+#> # scientific_name , aligned_reason , update_reason ,
+#> # number_of_collapsed_taxa
+```
Checking for a list of species to see if they are classified as
Australian natives:
@@ -106,6 +147,27 @@ native_anywhere_in_australia(c("Eucalyptus globulus","Pinus radiata"), resources
#> 2 Pinus radiata introduced
```
+Determining the number of species present in NSW and their establishment
+means:
+
+``` r
+state_diversity_counts("NSW", resources = tax_resources)
+#> # A tibble: 7 × 3
+#> origin state num_species
+#>
+#> 1 doubtfully naturalised NSW 94
+#> 2 formerly naturalised NSW 8
+#> 3 native NSW 5980
+#> 4 native and doubtfully naturalised NSW 2
+#> 5 native and naturalised NSW 34
+#> 6 naturalised NSW 1584
+#> 7 presumed extinct NSW 9
+```
+
+The related function `create_species_state_origin_matrix()` generates a
+table for all taxa in Australia, indicating their distribution and
+establishment means, by state.
+
Getting a family lookup table for genera from the specified taxonomy:
``` r
@@ -127,6 +189,20 @@ get_apc_genus_family_lookup(c("Eucalyptus",
#> 6 Triodia Poaceae
```
+Compiling a list of outdated synonyms for currently accepted names:
+
+``` r
+names_to_check <- c("Acacia aneura", "Banksia nivea", "Cardamine gunnii", "Stenocarpus sinuatus")
+synonyms_for_accepted_names(resources = tax_resources, accepted_names = names_to_check, collapse = T)
+#> # A tibble: 4 × 5
+#> family accepted_name synonyms scientific_name accepted_name_usage_ID
+#>
+#> 1 Brassicaceae Cardamine gunnii Cardami… Cardamine gunn… https://id.biodiversi…
+#> 2 Fabaceae Acacia aneura Acacia … Acacia aneura … https://id.biodiversi…
+#> 3 Proteaceae Banksia nivea Dryandr… Banksia nivea … https://id.biodiversi…
+#> 4 Proteaceae Stenocarpus sinu… Stenoca… Stenocarpus si… https://id.biodiversi…
+```
+
## Cheatsheet
@@ -150,8 +226,8 @@ citation("APCalign")
#> Wenk E, Cornwell W, Fuchs A, Kar F, Monro A, Sauquet H, Stephens R,
#> Falster D (2024). "APCalign: an R package workflow and app for
#> aligning and updating flora names to the Australian Plant Census."
-#> _Australian Journal of Botany_. R package version: 1.0.1,
-#> .
+#> _Australian Journal of Botany_, *72*(4). R package version: 1.1.4,
+#> .
#>
#> A BibTeX entry for LaTeX users is
#>
@@ -159,9 +235,12 @@ citation("APCalign")
#> title = {APCalign: an R package workflow and app for aligning and updating flora names to the Australian Plant Census},
#> journal = {Australian Journal of Botany},
#> author = {Elizabeth Wenk and Will Cornwell and Ann Fuchs and Fonti Kar and Anna Monro and Herve Sauquet and Ruby Stephens and Daniel Falster},
+#> volume = {72},
+#> number = {4},
#> year = {2024},
-#> note = {R package version: 1.0.1},
-#> url = {https://www.biorxiv.org/content/10.1101/2024.02.02.578715v1},
+#> publisher = {CSIRO Publishing},
+#> note = {R package version: 1.1.4},
+#> url = {https://doi.org/10.1071/BT24014},
#> }
```
diff --git a/man/APCalign.Rd b/man/APCalign.Rd
index e9b9e7e..fe32d2f 100644
--- a/man/APCalign.Rd
+++ b/man/APCalign.Rd
@@ -14,7 +14,7 @@ the established status of plant taxa across different states/territories.
}
\section{Functions}{
-\strong{Standarise taxon names}
+\strong{Standardise taxon names}
\itemize{
\item \link{load_taxonomic_resources}
\item \link{create_taxonomic_update_lookup}
diff --git a/man/create_species_state_origin_matrix.Rd b/man/create_species_state_origin_matrix.Rd
index 518ca00..f609e4d 100644
--- a/man/create_species_state_origin_matrix.Rd
+++ b/man/create_species_state_origin_matrix.Rd
@@ -29,7 +29,7 @@ This function processes the geographic data available in the APC and
returns state level native, introduced and more complicated origins status for all taxa.
}
\examples{
-\donttest{create_species_state_origin_matrix()}#'
+\donttest{create_species_state_origin_matrix()}
\donttest{create_species_state_origin_matrix(include_infrataxa = TRUE)}
diff --git a/man/native_anywhere_in_australia.Rd b/man/native_anywhere_in_australia.Rd
index 73687c2..f4e63c2 100644
--- a/man/native_anywhere_in_australia.Rd
+++ b/man/native_anywhere_in_australia.Rd
@@ -4,11 +4,7 @@
\alias{native_anywhere_in_australia}
\title{Native anywhere in Australia}
\usage{
-native_anywhere_in_australia(
- species,
- resources = load_taxonomic_resources(),
- include_infrataxa = FALSE
-)
+native_anywhere_in_australia(species, resources = load_taxonomic_resources())
}
\arguments{
\item{species}{A character string typically representing the binomial for the species.}
@@ -16,9 +12,6 @@ native_anywhere_in_australia(
\item{resources}{An optional list of taxonomic resources to use for the lookup.
If not provided, the function will load default taxonomic resources using the
\code{load_taxonomic_resources()} function.}
-
-\item{include_infrataxa}{option to include subspecies, varieties and forms in the output.
-Set to false as the default, outputting results just for species-rank taxa.}
}
\value{
A tibble with two columns: \code{species}, which is the same as the unique values of
diff --git a/man/synonyms_for_accepted_names.Rd b/man/synonyms_for_accepted_names.Rd
index d230cdf..5ccdbac 100644
--- a/man/synonyms_for_accepted_names.Rd
+++ b/man/synonyms_for_accepted_names.Rd
@@ -22,13 +22,15 @@ versus collapsed into a vector for each accepted name}
A table with the currently accepted name and columns documenting all synonyms and all synonyms with taxonomic status.
}
\description{
-This function generates lists a string of synonyms for currently accepted names to facilitate working out past names of a taxon
-when the current name is known
+This function generates lists a string of synonyms for currently accepted species and infra-species to facilitate working out past names of a taxon
+when the current name is known.
}
\examples{
+\donttest{
synonyms_for_accepted_names(
accepted_names = c("Justicia tenella", "Acacia aneura"),
collapse = TRUE
)
+}
}
diff --git a/tests/testthat/helper.R b/tests/testthat/helper.R
index 794a2e1..2619e5e 100644
--- a/tests/testthat/helper.R
+++ b/tests/testthat/helper.R
@@ -1,6 +1 @@
-# hack to enable faster load times for tests
-if(exists("resources", envir = globalenv())) {
- resources <- get("resources", envir = globalenv())
-} else {
- resources <- load_taxonomic_resources(stable_or_current_data = "stable", version = "2024-10-11")
-}
+resources <- load_taxonomic_resources(stable_or_current_data = "stable", version = "2024-10-11")
diff --git a/tests/testthat/test-operation_outputs.R b/tests/testthat/test-operation_outputs.R
index 7a43135..54cd820 100644
--- a/tests/testthat/test-operation_outputs.R
+++ b/tests/testthat/test-operation_outputs.R
@@ -300,31 +300,70 @@ test_that("No warnings if trying to match input name to empty accepted name set.
test_that("synonyms_for_accepted_names outputs expected number of rows", {
expect_silent(
- x <- synonyms_for_accepted_names(
+ synonyms_T <- synonyms_for_accepted_names(
accepted_names = c("Justicia tenella", "Acacia aneura"),
collapse = TRUE, resources = resources
)
)
+ expect_silent(
+ synonyms_F <- synonyms_for_accepted_names(
+ accepted_names = c("Justicia tenella", "Acacia aneura", "Cardamine gunnii"),
+ collapse = FALSE, resources = resources
+ )
+ )
+
expect_equal(
- nrow(x),
+ nrow(synonyms_T),
2
)
expect_silent(
- x <- synonyms_for_accepted_names(
- accepted_names = c("Justicia tenella", "Acacia aneura"),
+ synonyms_F <- synonyms_for_accepted_names(
+ accepted_names = c("Justicia tenella", "Acacia aneura", "Cardamine gunnii"),
collapse = FALSE, resources = resources
)
)
- # currently there are 9 rows, but this can increase with additional synonyms being added
+ # currently there are 15 rows, but this can increase with additional synonyms being added
expect_gte(
- nrow(x),
- 8
+ nrow(synonyms_F),
+ 14
)
- expect_contains(x$canonical_name, "Racosperma aneurum")
+ expect_gte(
+ nrow(synonyms_F),
+ nrow(synonyms_T)
+ )
+
+ expect_contains(synonyms_F$synonym, "Racosperma aneurum")
+ expect_contains(synonyms_F$synonym, "Cardamine heterophylla")
+ expect_no_error(stringr::str_detect(synonyms_T$synonyms, "Racosperma aneurum"))
+ expect_no_error(stringr::str_detect(synonyms_T$synonyms, "Cardamine heterophylla"))
+
+
+ expect_equal(
+ names(synonyms_T),
+ c("family", "accepted_name", "synonyms", "scientific_name", "accepted_name_usage_ID"))
+ expect_equal(
+ names(synonyms_F),
+ c("family", "accepted_name", "synonym", "taxonomic_status", "scientific_name", "accepted_name_usage_ID", "taxon_ID"))
+
+ expect_equal(
+ nrow(synonyms_F),
+ length(unique(synonyms_F$taxon_ID))
+ )
+
+ expect_equal(
+ length(unique(synonyms_F$synonym)),
+ length(unique(synonyms_F$taxon_ID))
+ )
+
+ expect_equal(
+ length(unique(synonyms_F$accepted_name)),
+ length(unique(synonyms_F$accepted_name_usage_ID))
+ )
+
}
)
diff --git a/vignettes/APCalign.Rmd b/vignettes/APCalign.Rmd
index 06e4c6b..f723e13 100644
--- a/vignettes/APCalign.Rmd
+++ b/vignettes/APCalign.Rmd
@@ -9,7 +9,7 @@ vignette: >
-When working with biodiversity data, it is important to verify taxonomic names with an authoritative list and correct any out-of-date names. The 'APCalign' package simplifies this process by:
+When working with biodiversity data, it is important to verify taxonomic names with an authoritative list and correct any out-of-date names or names with typos. The 'APCalign' package simplifies this process by:
- Accessing up-to-date taxonomic information from the [Australian Plant Census](https://biodiversity.org.au/nsl/services/search/taxonomy) and the [Australia Plant Name Index](https://biodiversity.org.au/nsl/services/search/names).
- Aligning authoritative names to your taxonomic names using our [fuzzy matching algorithm](https://traitecoevo.github.io/APCalign/articles/updating-taxon-names.html)
@@ -17,8 +17,9 @@ When working with biodiversity data, it is important to verify taxonomic names
## Installation
-'APCalign' is currently not on CRAN. You can install its current developmental version using
+The latest version of 'APCalign' should be available on CRAN and can be installed using `install.packages("APCalign")`.
+You can alternatively install the current developmental version using
``` r
@@ -28,8 +29,9 @@ remotes::install_github("traitecoevo/APCalign")
library(APCalign)
```
-To demonstrate how to use 'APCalign', we will use an example dataset `gbif_lite` which is documented in `?gbif_lite`
+Alternatively, our ShinyApp, is available at [unsw.shinyapps.io/APCalign-app](https://unsw.shinyapps.io/APCalign-app/)
+To demonstrate how to use 'APCalign', we will use an example dataset `gbif_lite` which is documented in `?gbif_lite`
``` r
@@ -58,7 +60,7 @@ There are two versions of the databases that you can retrieve with the `stable_o
- `stable` will retrieve the most recent, archived version of the databases from our [GitHub releases](https://github.com/traitecoevo/APCalign/releases). This is set as the default option.
- `current` will retrieve the up-to-date databases directly from the APC and APNI website.
-Note that the databases are quite large so the initial retrieval of `stable` versions will take a few minutes. Once the taxonomic resources have been stored locally, subsequent retrievals will take less time. Retrieving `current` resources will always take longer since it is accessing the latest information from the website. Check out our [Resource Caching](https://traitecoevo.github.io/APCalign/articles/caching.html) article to learn more about how the APC and APNIC databases are accessed, stored and retrieved.
+Note that the databases are quite large so the initial retrieval of `stable` versions will take a few minutes. Once the taxonomic resources have been stored locally, subsequent retrievals will take less time. Retrieving `current` resources will always take longer since it is accessing the latest information from the website. Check out our [Resource Caching](https://traitecoevo.github.io/APCalign/articles/caching.html) article to learn more about how the APC and APNI databases are accessed, stored and retrieved.
``` r
@@ -67,11 +69,14 @@ stable_start_time <- Sys.time()
stable_resources <- load_taxonomic_resources(stable_or_current_data = "stable")
```
-
+
```
#> Loading resources into memory...
-#>
=========================================
===================================================================================
============================================================================================================================
+#>
+=========================================
+===================================================================================
+============================================================================================================================
#> ...done
stable_end_time <- Sys.time()
@@ -79,11 +84,14 @@ current_start_time <- Sys.time()
current_resources <- load_taxonomic_resources(stable_or_current_data = "current")
```
-
+
```
#> Loading resources into memory...
-#>
=========================================
===================================================================================
============================================================================================================================
+#>
+=========================================
+===================================================================================
+============================================================================================================================
#> ...done
current_end_time <- Sys.time()
@@ -99,11 +107,14 @@ For a more reproducible workflow, we recommend specifying the exact `stable` ver
resources <- load_taxonomic_resources(stable_or_current_data = "stable", version = "2024-10-11")
```
-
+
```
#> Loading resources into memory...
-#>
=========================================
===================================================================================
============================================================================================================================
+#>
+=========================================
+===================================================================================
+============================================================================================================================
#> ...done
```
@@ -166,16 +177,16 @@ The `suggested_name` is the best possible name option for the `original_name`.
We can access the established status data by state/territory using `create_species_state_origin_matrix()`
-
+You can specify to include infrataxa (subspecies, varieties and forms) using `include_infrataxa = TRUE`, or generate statistics
+just for species using `include_infrataxa = FALSE`.
``` r
# Retrieve status data by state/territory
-status_matrix <- create_species_state_origin_matrix(resources = resources)
+status_matrix <- create_species_state_origin_matrix(resources = resources, include_infrataxa = TRUE)
```
Here is a breakdown of all possible values for `origin`
-
``` r
library(purrr)
#>
@@ -190,29 +201,32 @@ library(janitor)
#>
#> chisq.test, fisher.test
-# Obtain unique values
+# Obtain unique values for establishment status
status_matrix |>
- select(-species) |>
- flatten_chr() |>
- tabyl()
-#> flatten_chr(select(status_matrix, -species)) n percent
-#> doubtfully naturalised 1129 2.387326e-03
-#> formerly naturalised 277 5.857302e-04
-#> native 40363 8.534956e-02
-#> native and doubtfully naturalised 9 1.903094e-05
-#> native and naturalised 137 2.896933e-04
-#> native and uncertain origin 2 4.229099e-06
-#> naturalised 8769 1.854248e-02
-#> not present 422103 8.925576e-01
-#> presumed extinct 102 2.156840e-04
-#> uncertain origin 23 4.863464e-05
+ tidyr::pivot_longer(4:21) |>
+ filter(value != "not present") |>
+ distinct(value)
+
+# value
+#
+# 1 native
+# 2 naturalised
+# 3 doubtfully naturalised
+# 4 native and naturalised
+# 5 formerly naturalised
+# 6 presumed extinct
+# 7 uncertain origin
+# 8 native and uncertain origin
+# 9 native and doubtfully naturalised
```
-
-
-You can also obtain the breakdown of species by established status for a particular state/territory using `state_diversity_counts()`
+
+``` r
+status_matrix |> select(family, species, ACT:WA) |> print(n=20)
+```
+You can also obtain the breakdown of species by established status for a particular state/territory using `state_diversity_counts()`
``` r
state_diversity_counts("NSW", resources = resources)
@@ -331,7 +345,6 @@ There are arguments in `align_taxa` that allows you to select which of the 50 ma
`update_taxonomy()` uses the information generated by `align_taxa()` to, whenever possible, update names to APC-accepted names.
-
``` r
updated_gbif_taxa <- aligned_gbif_taxa |>
update_taxonomy(resources = resources)
@@ -428,3 +441,41 @@ aligned_gbif_taxa |>
#> # taxon_distribution , scientific_name , taxon_ID , taxon_ID_genus , scientific_name_ID ,
#> # canonical_name , row_number , number_of_collapsed_taxa
```
+
+### Generating lists of synonyms {#synonyms}
+
+`synonyms_for_accepted_names()` compiles a table of all outdated and misapplied names which once applied to an accepted taxon name. The output can be in a condensed format, with the synonyms for an accepted name appearing in a single cell, or in a long format, with a separate row for each synonym. Both formats indicate each synonyms "type" - `nomenclatural_synonym`, `taxonomic_synonym`, `orthographic_variant`, `misapplied`, etc. This function is particularly useful for research applications where you know a currently accepted taxon name and want to indicate part names that apply, to document the links or to efficiently search the literature.
+
+```r
+> names_to_check <- c("Acacia aneura", "Banksia nivea", "Cardamine gunnii", "Stenocarpus sinuatus")
+> synonyms_for_accepted_names(resources = resources, accepted_names = names_to_check, collapse = T)
+
+# A tibble: 4 × 8
+# family accepted_name synonyms taxon_rank name_type scientific_name accepted_name_usage_ID genus
+#
+#1 Brassicaceae Cardamine gunnii Cardamine hirsuta var. heterophylla (taxonomic synonym); Cardamine hirsuta var. debilis (taxonomic synonym); Cardamine gunnii, ty… species scientif… Cardamine gunn… https://id.biodiversi… Card…
+#2 Fabaceae Acacia aneura Acacia aneura var. intermedia (taxonomic synonym); Racosperma aneurum var. intermedium (taxonomic synonym); Acacia aneura var. (N… species scientif… Acacia aneura … https://id.biodiversi… Acac…
+#3 Proteaceae Banksia nivea Dryandra nivea var. venosa (taxonomic synonym); Josephia rachidifolia (taxonomic synonym); Dryandra nivea var. adscendens (taxono… species scientif… Banksia nivea … https://id.biodiversi… Bank…
+#4 Proteaceae Stenocarpus sinuatus Agnostus sinuatus (taxonomic synonym); Agnostus sinuata (taxonomic synonym); Stenocarpus sinuosus var. intergrifolius (taxonomic … species scientif… Stenocarpus si… https://id.biodiversi… Sten…
+
+```
+
+```r
+names_to_check <- c("Acacia aneura", "Banksia nivea", "Cardamine gunnii", "Stenocarpus sinuatus")
+> synonyms_for_accepted_names(resources = resources, accepted_names = names_to_check, collapse = F)
+# A tibble: 25 × 9
+# family accepted_name synonym taxonomic_status taxon_rank name_type scientific_name accepted_name_usage_ID genus
+#
+# 1 Brassicaceae Cardamine gunnii Cardamine hirsuta var. heterophylla taxonomic synonym species scientific Cardamine gunnii Hewson https://id.biodiversity.org.au/node/apni/2886061 Cardamine
+# 2 Brassicaceae Cardamine gunnii Cardamine hirsuta var. debilis taxonomic synonym species scientific Cardamine gunnii Hewson https://id.biodiversity.org.au/node/apni/2886061 Cardamine
+# 3 Brassicaceae Cardamine gunnii Cardamine gunnii, type variant nomenclatural synonym species scientific Cardamine gunnii Hewson https://id.biodiversity.org.au/node/apni/2886061 Cardamine
+# 4 Brassicaceae Cardamine gunnii Cardamine heterophylla var. heterophylla nomenclatural synonym species scientific Cardamine gunnii Hewson https://id.biodiversity.org.au/node/apni/2886061 Cardamine
+# 5 Brassicaceae Cardamine gunnii Cardamine heterophylla nomenclatural synonym species scientific Cardamine gunnii Hewson https://id.biodiversity.org.au/node/apni/2886061 Cardamine
+# 6 Brassicaceae Cardamine gunnii Cardamine debilis misapplied species scientific Cardamine gunnii Hewson https://id.biodiversity.org.au/node/apni/2886061 Cardamine
+# 7 Fabaceae Acacia aneura Acacia aneura var. intermedia taxonomic synonym species scientific Acacia aneura F.Muell. ex Benth. https://id.biodiversity.org.au/node/apni/6707550 Acacia
+# 8 Fabaceae Acacia aneura Racosperma aneurum var. intermedium taxonomic synonym species scientific Acacia aneura F.Muell. ex Benth. https://id.biodiversity.org.au/node/apni/6707550 Acacia
+# 9 Fabaceae Acacia aneura Acacia aneura var. (Napperby S.L.Everist 4226) taxonomic synonym species scientific Acacia aneura F.Muell. ex Benth. https://id.biodiversity.org.au/node/apni/6707550 Acacia
+# 10 Fabaceae Acacia aneura Acacia aneura var. (Thargomindah D.E.Boyland 8099) taxonomic synonym species scientific Acacia aneura F.Muell. ex Benth. https://id.biodiversity.org.au/node/apni/6707550 Acacia
+# ℹ 15 more rows
+# ℹ Use `print(n = ...)` to see more rows
+```
\ No newline at end of file
diff --git a/vignettes/updating-taxon-names.Rmd b/vignettes/updating-taxon-names.Rmd
index e3354ff..77ba4ab 100644
--- a/vignettes/updating-taxon-names.Rmd
+++ b/vignettes/updating-taxon-names.Rmd
@@ -21,7 +21,6 @@ options(rmarkdown.html_vignette.check_title = FALSE)
library(APCalign)
library(readr)
library(dplyr)
-library(here)
#' Format table with kable and default styling for html
#'