Skip to content

Commit

Permalink
Algorithm fixes (#174)
Browse files Browse the repository at this point in the history
small algorithm fixes

   * export strip_names_2
   * for genus-resolution names where the genus is outdated, the genus-column was being updated, but the new genus wasn't being "inserted" into the suggested name due to a typo
   * leave suggested_name blank if not even genus/family can be aligned
  • Loading branch information
ehwenk authored Jan 22, 2024
1 parent fd20bdd commit 3173a85
Show file tree
Hide file tree
Showing 7 changed files with 52 additions and 34 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ export(native_anywhere_in_australia)
export(standardise_names)
export(state_diversity_counts)
export(strip_names)
export(strip_names_2)
export(update_taxonomy)
import(dplyr)
import(stringr)
Expand Down
20 changes: 4 additions & 16 deletions R/match_taxa.R
Original file line number Diff line number Diff line change
Expand Up @@ -519,10 +519,7 @@ match_taxa <- function(
taxonomic_dataset = NA_character_,
taxon_rank = "genus",
aligned_name_tmp = paste0(stringr::word(cleaned_name,1), " sp. [", cleaned_name),
aligned_name = ifelse(is.na(identifier_string2),
paste0(aligned_name_tmp, "]"),
paste0(aligned_name_tmp, identifier_string2, "]")
),
aligned_name = NA,
aligned_reason = paste0(
"Taxon name includes '--' (double dash) indicating an intergrade between two taxa, but exact and fuzzy matches fail to align to a genus in the APC or APNI (",
Sys.Date(),
Expand Down Expand Up @@ -724,10 +721,7 @@ match_taxa <- function(
taxonomic_dataset = NA_character_,
taxon_rank = "genus",
aligned_name_tmp = paste0(stringr::word(cleaned_name,1), " sp. [", cleaned_name),
aligned_name = ifelse(is.na(identifier_string2),
paste0(aligned_name_tmp, "]"),
paste0(aligned_name_tmp, identifier_string2, "]")
),
aligned_name = NA,
aligned_reason = paste0(
"Taxon name includes '/' (slash) indicating an uncertain species identification but an accepted genus and taxon can only be aligned to genus-rank. Exact and fuzzy matches fail to align to a genus in the APC or APNI (",
Sys.Date(),
Expand Down Expand Up @@ -1032,10 +1026,7 @@ match_taxa <- function(
taxonomic_dataset = NA_character_,
taxon_rank = "genus",
aligned_name_tmp = paste0(stringr::word(cleaned_name,1), " sp. [", cleaned_name),
aligned_name = ifelse(is.na(identifier_string2),
paste0(aligned_name_tmp, "]"),
paste0(aligned_name_tmp, identifier_string2, "]")
),
aligned_name = NA,
aligned_reason = paste0(
"Taxon name includes 'affinis' or 'aff' indicating an unknown taxon that bears an affinity to a different taxon in the same genus and taxon can only be aligned to genus-rank. Exact and fuzzy matches fail to align to a genus in the APC or APNI ",
Sys.Date(),
Expand Down Expand Up @@ -1299,10 +1290,7 @@ match_taxa <- function(
taxonomic_dataset = NA_character_,
taxon_rank = "genus",
aligned_name_tmp = paste0(stringr::word(cleaned_name,1), " x [", cleaned_name),
aligned_name = ifelse(is.na(identifier_string2),
paste0(aligned_name_tmp, "]"),
paste0(aligned_name_tmp, identifier_string2, "]")
),
aligned_name = NA,
aligned_reason = paste0(
"Taxon name includes ' x ' indicating a hybrid taxon and taxon can only be aligned to genus-rank. Exact and fuzzy matches fail to align to a genus in the APC or APNI (",
Sys.Date(),
Expand Down
4 changes: 2 additions & 2 deletions R/strip_names.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ strip_names <- function(taxon_names) {
#' Strip taxonomic names of subtaxa designations, filled words and special characters
#'
#' Given a vector of taxonomic names, this function removes subtaxa designations ("subsp.", "var.", "f.", and "ser"),
#' additional filler words and characters (" x " [hybrid taxa], "sp.", "cf"),
#' additional filler words and characters (" x " for hybrid taxa, "sp.", "cf"),
#' special characters (e.g., "-", ".", "(", ")", "?"), and extra whitespace. The resulting vector
#' of names is also converted to lowercase.
#'
Expand All @@ -54,7 +54,7 @@ strip_names <- function(taxon_names) {
#' "Acacia sp.",
#' "Lepidium sp. Tanguin Hill (K.R.Newbey 10501)"))
#'
#' @noRd
#' @export
strip_names_2 <- function(taxon_names) {
taxon_names %>%
stringr::str_replace_all("\\.", "") %>%
Expand Down
11 changes: 5 additions & 6 deletions R/update_taxonomy.R
Original file line number Diff line number Diff line change
Expand Up @@ -148,14 +148,13 @@ update_taxonomy <- function(aligned_data,
dplyr::bind_rows(taxa_blank) %>%
dplyr::mutate(
suggested_name = ifelse(is.na(suggested_name), aligned_name, suggested_name),
suggested_name = ifelse(is.na(suggested_name), original_name, suggested_name),
update_reason = ifelse(taxonomic_status_aligned == "accepted", "aligned name accepted by APC", update_reason),
taxonomic_status = ifelse(is.na(taxonomic_status), "unknown", taxonomic_status),
taxonomic_dataset = ifelse(stringr::str_detect(taxonomic_dataset, "APC"), "APC", taxonomic_dataset),
## `genus` was the first word of the `aligned_name` in the input table; now needs to be set to NA for unknown taxa
genus = ifelse(taxonomic_status == "unknown", NA_character_, genus),
taxon_rank = ifelse(taxonomic_status == "unknown", NA_character_, taxon_rank),
# the next line makes everythign incosistent. If we want low, should do on loading APC
# the next line makes everything inconsistent. If we want low, should do on loading APC
taxon_rank = stringr::str_to_lower(taxon_rank),
canonical_name = suggested_name,
taxonomic_status_aligned = ifelse(is.na(taxonomic_status_aligned), NA_character_, taxonomic_status_aligned)
Expand Down Expand Up @@ -244,14 +243,14 @@ relevel_taxonomic_status_preferred_order <- function(taxonomic_status) {
update_taxonomy_APC_genus <- function(data, resources) {

if(is.null(data)) return(NULL)

data %>%
# merge in columns from APC, at the genus-level
dplyr::left_join(
by = "genus",
resources$genera_all %>%
dplyr::filter(stringr::str_detect(taxonomic_dataset, "APC")) %>%
dplyr::arrange(canonical_name, taxonomic_status) %>% ### how do I specify that I want to arrange by `preferred order`
dplyr::arrange(canonical_name, taxonomic_status) %>%
dplyr::distinct(canonical_name, .keep_all = TRUE) %>%
dplyr::mutate(
genus = canonical_name,
Expand All @@ -276,8 +275,8 @@ update_taxonomy_APC_genus <- function(data, resources) {
taxon_ID_genus = resources$genera_all$taxon_ID[match(accepted_name_usage_ID, resources$genera_all$accepted_name_usage_ID)],
# genus names in `aligned_name` that are not APC-accepted need to be updated to their current name in `suggested_name`
aligned_minus_genus = ifelse(is.na(genus_accepted), NA, stringr::str_replace(aligned_name, extract_genus(aligned_name), "")),
suggested_name = ifelse(taxonomic_status == "accepted", paste0(genus_accepted, aligned_minus_genus), NA),
suggested_name = ifelse(taxonomic_status != "accepted", aligned_name, suggested_name),
# if there is an APC-accepted genus, replace whatever the initial genus was with the accepted genus, otherwise the suggested name is the aligned name
suggested_name = ifelse(taxonomic_status == "genus accepted", paste0(genus_accepted, aligned_minus_genus), aligned_name),
# indicate taxonomic_status of the genus name in `aligned_name` and why it needed to be updated for the `suggested_name`
genus_update_reason = as.character(my_order),
genus = genus_accepted,
Expand Down
1 change: 1 addition & 0 deletions _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ reference:
- update_taxonomy
- standardise_names
- strip_names
- strip_names_2
- subtitle: Established status across states/territories
- contents:
- create_species_state_origin_matrix
Expand Down
29 changes: 29 additions & 0 deletions man/strip_names_2.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 10 additions & 10 deletions tests/testthat/benchmarks/test_matches_alignments_updates.csv
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ Aporuelliaa abc--def,match_03c,Aporuellia sp. [Aporuelliaa abc--def; test_all_ma
Driandra abc--def,match_03c,Dryandra sp. [Driandra abc--def; test_all_matches_TRUE],APC,genus,Banksia,FALSE,https://id.biodiversity.org.au/instance/apni/865048,https://id.biodiversity.org.au/name/apni/77744,Dryandra R.Br.
Xyystidium abc--def,match_03d,Xystidium sp. [Xyystidium abc--def; test_all_matches_TRUE],APNI,genus,Xystidium,FALSE,NA,https://id.biodiversity.org.au/name/apni/244613,Xystidium Trin.
Zygiaa abc--def,match_03d,Zygia sp. [Zygiaa abc--def; test_all_matches_TRUE],APNI,genus,Zygia,FALSE,NA,https://id.biodiversity.org.au/name/apni/65077,Zygia P.Browne
Abcde fgh -- ijk,match_03e,Abcde sp. [Abcde fgh -- ijk; test_all_matches_TRUE],NA,genus,NA,FALSE,NA,NA,NA
Ryandra abc--def,match_03e,Ryandra sp. [Ryandra abc--def; test_all_matches_TRUE],NA,genus,NA,FALSE,NA,NA,NA
Abcde fgh -- ijk,match_03e,NA,NA,genus,NA,TRUE,NA,NA,NA
Ryandra abc--def,match_03e,NA,NA,genus,NA,TRUE,NA,NA,NA
Abildgaardia odontocarpa / Abildgaardia oxystachya,match_04a,Abildgaardia sp. [Abildgaardia odontocarpa / Abildgaardia oxystachya; test_all_matches_TRUE],APC,genus,Abildgaardia,FALSE,https://id.biodiversity.org.au/node/apni/2905759,https://id.biodiversity.org.au/name/apni/55984,Abildgaardia Vahl
Acanthocarpus fimbriatus / Acanthocarpus mucronatus,match_04a,Acanthocarpus sp. [Acanthocarpus fimbriatus / Acanthocarpus mucronatus; test_all_matches_TRUE],APC,genus,Acanthocarpus,FALSE,https://id.biodiversity.org.au/node/apni/2899190,https://id.biodiversity.org.au/name/apni/72610,Acanthocarpus Lehm.
Acanthocarpus fimbriatus / mucronatus,match_04a,Acanthocarpus sp. [Acanthocarpus fimbriatus / mucronatus; test_all_matches_TRUE],APC,genus,Acanthocarpus,FALSE,https://id.biodiversity.org.au/node/apni/2899190,https://id.biodiversity.org.au/name/apni/72610,Acanthocarpus Lehm.
Expand All @@ -51,8 +51,8 @@ Aporuelliaa abc / def,match_04c,Aporuellia sp. [Aporuelliaa abc / def; test_all_
Drrandra abc / def,match_04c,Dryandra sp. [Drrandra abc / def; test_all_matches_TRUE],APC,genus,Banksia,FALSE,https://id.biodiversity.org.au/instance/apni/865048,https://id.biodiversity.org.au/name/apni/77744,Dryandra R.Br.
Xyystidium abc/def,match_04d,Xystidium sp. [Xyystidium abc/def; test_all_matches_TRUE],APNI,genus,Xystidium,FALSE,NA,https://id.biodiversity.org.au/name/apni/244613,Xystidium Trin.
Zygiaa abc / def,match_04d,Zygia sp. [Zygiaa abc / def; test_all_matches_TRUE],APNI,genus,Zygia,FALSE,NA,https://id.biodiversity.org.au/name/apni/65077,Zygia P.Browne
Abcde fgh / ijk,match_04e,Abcde sp. [Abcde fgh / ijk; test_all_matches_TRUE],NA,genus,NA,FALSE,NA,NA,NA
Ryandra abc / def,match_04e,Ryandra sp. [Ryandra abc / def; test_all_matches_TRUE],NA,genus,NA,FALSE,NA,NA,NA
Abcde fgh / ijk,match_04e,NA,NA,genus,NA,TRUE,NA,NA,NA
Ryandra abc / def,match_04e,NA,NA,genus,NA,TRUE,NA,NA,NA
Cycas candida K.D.Hill,match_05a,Cycas candida,APC,species,Cycas candida,TRUE,https://id.biodiversity.org.au/node/apni/2893335,https://id.biodiversity.org.au/name/apni/188177,Cycas candida K.D.Hill
Eremophila papillata Chinnock,match_05a,Eremophila papillata,APC,species,Eremophila papillata,TRUE,https://id.biodiversity.org.au/node/apni/2910890,https://id.biodiversity.org.au/name/apni/207453,Eremophila papillata Chinnock
Acalypha indica var. australis F.M.Bailey,match_05b,Acalypha indica var. australis,APC,variety,Acalypha lanceolata,TRUE,https://id.biodiversity.org.au/instance/apni/889946,https://id.biodiversity.org.au/name/apni/72588,Acalypha indica var. australis F.M.Bailey
Expand Down Expand Up @@ -125,8 +125,8 @@ Aporuelliaa aff def,match_09c,Aporuellia sp. [Aporuelliaa aff. def; test_all_mat
Drrandra affinis def,match_09c,Dryandra sp. [Drrandra aff. def; test_all_matches_TRUE],APC,genus,Banksia,FALSE,https://id.biodiversity.org.au/instance/apni/865048,https://id.biodiversity.org.au/name/apni/77744,Dryandra R.Br.
Xyystidium aff. abc,match_09d,Xystidium sp. [Xyystidium aff. abc; test_all_matches_TRUE],APNI,genus,Xystidium,FALSE,NA,https://id.biodiversity.org.au/name/apni/244613,Xystidium Trin.
Zygiaa aff. abc,match_09d,Zygia sp. [Zygiaa aff. abc; test_all_matches_TRUE],APNI,genus,Zygia,FALSE,NA,https://id.biodiversity.org.au/name/apni/65077,Zygia P.Browne
Abcde affinis fgh,match_09e,Abcde sp. [Abcde aff. fgh; test_all_matches_TRUE],NA,genus,NA,FALSE,NA,NA,NA
Rryandra aff def,match_09e,Rryandra sp. [Rryandra aff. def; test_all_matches_TRUE],NA,genus,NA,FALSE,NA,NA,NA
Abcde affinis fgh,match_09e,NA,NA,genus,NA,TRUE,NA,NA,NA
Rryandra aff def,match_09e,NA,NA,genus,NA,TRUE,NA,NA,NA
Aceeena x ovinaaa,match_10a,Acaena x ovina,APC,species,Acaena x ovina,FALSE,https://id.biodiversity.org.au/taxon/apni/51446291,https://id.biodiversity.org.au/name/apni/72209,Acaena x ovina A.Cunn.
Banksiia serrratte,match_10a,Banksia serrata,APC,species,Banksia serrata,TRUE,https://id.biodiversity.org.au/taxon/apni/51293610,https://id.biodiversity.org.au/name/apni/109014,Banksia serrata L.f.
Eremoophila opppositifolia ssp. rubraaa,match_10a,Eremophila oppositifolia subsp. rubra,APC,subspecies,Eremophila oppositifolia subsp. rubra,TRUE,https://id.biodiversity.org.au/node/apni/7951458,https://id.biodiversity.org.au/name/apni/117903,Eremophila oppositifolia subsp. rubra (C.T.White & W.D.Francis) Chinnock
Expand Down Expand Up @@ -154,8 +154,8 @@ Aporuelliaa abc x def,match_11c,Aporuellia x [Aporuelliaa abc x def; test_all_ma
Drrandra x def,match_11c,Dryandra x [Drrandra x def; test_all_matches_TRUE],APC,genus,Banksia,FALSE,https://id.biodiversity.org.au/instance/apni/865048,https://id.biodiversity.org.au/name/apni/77744,Dryandra R.Br.
Xyystidium x def,match_11d,Xystidium x [Xyystidium x def; test_all_matches_TRUE],APNI,genus,Xystidium,FALSE,NA,https://id.biodiversity.org.au/name/apni/244613,Xystidium Trin.
Zygiaa abc x Zygia def,match_11d,Zygia x [Zygiaa abc x Zygia def; test_all_matches_TRUE],APNI,genus,Zygia,FALSE,NA,https://id.biodiversity.org.au/name/apni/65077,Zygia P.Browne
Abcde fgh x ijk,match_11e,Abcde x [Abcde fgh x ijk; test_all_matches_TRUE],NA,genus,NA,FALSE,NA,NA,NA
Ryandra abc x def,match_11e,Ryandra x [Ryandra abc x def; test_all_matches_TRUE],NA,genus,NA,FALSE,NA,NA,NA
Abcde fgh x ijk,match_11e,NA,NA,genus,NA,TRUE,NA,NA,NA
Ryandra abc x def,match_11e,NA,NA,genus,NA,TRUE,NA,NA,NA
Baeckea sp. murchison river,match_12a,Baeckea sp. Murchison River (M.E.Trudgen 12009),APC,species,Baeckea sp. Murchison River (M.E.Trudgen 12009),TRUE,https://id.biodiversity.org.au/node/apni/2888052,https://id.biodiversity.org.au/name/apni/191267,Baeckea sp. Murchison River (M.E.Trudgen 12009) WA Herbarium
Eremophila oppositifolia rubra (needle leaves),match_12a,Eremophila oppositifolia subsp. rubra,APC,subspecies,Eremophila oppositifolia subsp. rubra,TRUE,https://id.biodiversity.org.au/node/apni/7951458,https://id.biodiversity.org.au/name/apni/117903,Eremophila oppositifolia subsp. rubra (C.T.White & W.D.Francis) Chinnock
Eremophila oppositifolia rubra early collection,match_12a,Eremophila oppositifolia subsp. rubra,APC,subspecies,Eremophila oppositifolia subsp. rubra,TRUE,https://id.biodiversity.org.au/node/apni/7951458,https://id.biodiversity.org.au/name/apni/117903,Eremophila oppositifolia subsp. rubra (C.T.White & W.D.Francis) Chinnock
Expand Down Expand Up @@ -220,5 +220,5 @@ Achneriia crevifoliaa,match_22b,Achneria sp. [Achneriia crevifoliaa; test_all_ma
Actinocarpos,match_22b,Actinocarpus sp. [Actinocarpos; test_all_matches_TRUE],APC,genus,Damasonium,FALSE,https://id.biodiversity.org.au/instance/apni/884226,https://id.biodiversity.org.au/name/apni/74816,Actinocarpus R.Br.
Drryandra,match_22b,Dryandra sp. [Drryandra; test_all_matches_TRUE],APC,genus,Banksia,FALSE,https://id.biodiversity.org.au/instance/apni/865048,https://id.biodiversity.org.au/name/apni/77744,Dryandra R.Br.
Dryandraa,match_22b,Dryandra sp. [Dryandraa; test_all_matches_TRUE],APC,genus,Banksia,FALSE,https://id.biodiversity.org.au/instance/apni/865048,https://id.biodiversity.org.au/name/apni/77744,Dryandra R.Br.
Actiniladum sp.,NA,NA,NA,NA,NA,FALSE,NA,NA,NA
Ecalypha indica australis,NA,NA,NA,NA,NA,FALSE,NA,NA,NA
Actiniladum sp.,NA,NA,NA,NA,NA,TRUE,NA,NA,NA
Ecalypha indica australis,NA,NA,NA,NA,NA,TRUE,NA,NA,NA

0 comments on commit 3173a85

Please sign in to comment.