Skip to content

Commit

Permalink
Support GBIF name matching for international atlases
Browse files Browse the repository at this point in the history
- `galah_identify` now works for non-Australian atlases
- misc notes and bug fixes
  • Loading branch information
mjwestgate committed Apr 28, 2022
1 parent 1244875 commit a0e0b2e
Show file tree
Hide file tree
Showing 11 changed files with 85 additions and 39 deletions.
7 changes: 1 addition & 6 deletions R/atlas_GET.R
Original file line number Diff line number Diff line change
Expand Up @@ -75,12 +75,7 @@ internal_GET <- function(url, path, params = list(), on_error = NULL,
}

data <- res$parse("UTF-8")
result <- fromJSON(data, flatten = TRUE)
if(is.list(result)){
as.data.frame(result) # necessary as GBIF returns a list
}else{
result
}
fromJSON(data, flatten = TRUE)
}

cache_filename <- function(args, ext) {
Expand Down
11 changes: 11 additions & 0 deletions R/atlas_counts.R
Original file line number Diff line number Diff line change
Expand Up @@ -414,6 +414,17 @@ atlas_counts_lookup <- function(identify = NULL,
as_tibble(counts_final)
}

# Supporting GBIF uses one of two APIs
# To use group by, but no filters:
# group by -> `path <- /occurrence/counts`
# this facets by basisOfRecord or year, but no filtering

# To use filters but not group by:
# path <- /occurrence/search then set limit = 0 to only get counts
# https://api.gbif.org/v1/occurrence/search?taxonKey=1&limit=0
# This option requires some hard-coding of what values are allowable by gbif
# i.e. show_all_values() would not call an API for atlas == "Global"

# get just the record count for a query
# handle too long queries in here?
record_count <- function(query) {
Expand Down
2 changes: 1 addition & 1 deletion R/galah_config.R
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ validate_option <- function(name, value, error_call = caller_env()) {
abort(bullets, call = error_call)
}
} else if (name == "atlas") {
if (!value %in% show_all_atlases()$region) {
if (!value %in% show_all_atlases()$atlas) {
bullets <- c(
"Unsupported atlas provided.",
i = glue("Use `show_all_atlases()` to see supported atlases."),
Expand Down
26 changes: 23 additions & 3 deletions R/galah_group_by.R
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,10 @@ galah_group_by <- function(..., expand = TRUE){
provided_variables <- dequote(unlist(lapply(dots, as_label)))
if (getOption("galah_config")$run_checks){
validate_fields(provided_variables)
}
available_variables <- provided_variables[provided_variables %in% show_all_fields()$id]
}else{
available_variables <- provided_variables[
provided_variables %in% show_all_fields()$id]
}
if(length(available_variables) > 0){
df <- tibble(name = available_variables)
df$type <- ifelse(str_detect(df$name, "[[:lower:]]"), "field", "assertions")
Expand All @@ -106,4 +108,22 @@ galah_group_by <- function(..., expand = TRUE){
}else{
df
}
}
}

# for passing to atlas_counts, see rgbif::count_facet
# in practice, the only fields allowable by `path <- /occurrence/counts`
# are `year` (with optional year range);
# https://api.gbif.org/v1/occurrence/counts/year?year=1981,2012
# NOTE: range query is optional

# galah_call() |>
# galah_group_by(year) |>
# galah_filter(year >= 1981 & year <= 2012) |>
# atlas_counts()

# ...and `basisOfRecord` (no filters)
# https://api.gbif.org/v1/occurrence/counts/basisOfRecord

# galah_call() |>
# galah_group_by(basisOfRecord) |>
# atlas_counts()
27 changes: 12 additions & 15 deletions R/galah_identify.R
Original file line number Diff line number Diff line change
Expand Up @@ -82,10 +82,10 @@ galah_identify <- function(..., search = TRUE) {
verbose <- getOption("galah_config")$verbose

# check for types first
if (inherits(input_query, "ala_id")) {
if (!is.null(attr(input_query, "call"))) {
query <- input_query$taxon_concept_id
} else if (inherits(input_query, c("gbifid+", "nbnid+"))) { # from taxize
query <- as.character(input_query)
# } else if (inherits(input_query, c("gbifid+", "nbnid+"))) { # from taxize
# query <- as.character(input_query)
} else { # if the input isn't of known type, try to find IDs
if (search) {
check_atlas(atlas)
Expand Down Expand Up @@ -166,18 +166,15 @@ check_number_returned <- function(n_in, n_out, error_call = caller_env()) {
}

check_atlas <- function(atlas, error_call = caller_env()) {
if(atlas != "Australia"){
atlas_origin <- switch(atlas,
"UK" = "UK",
"Sweden" = "Swedish",
"Austria" = "Austrian",
"Guatemala" = "Guatemalan",
"Spain" = "Spanish"
)
bullets <- c(glue("Searching is not supported for the {atlas_origin} atlas."),
i = "try using the `taxize` package to search instead",
i = "taxonomic identifiers can be passed to `galah_identify` by setting `search = FALSE`"
)
if(atlas == "UK"){
# atlas_origin <- switch(atlas,
# "UK" = "UK",
# "Sweden" = "Swedish",
# "Austria" = "Austrian",
# "Guatemala" = "Guatemalan",
# "Spain" = "Spanish"
# )
bullets <- c("Searching is not supported for the NBN.")
abort(bullets, call = error_call)
}
}
Expand Down
1 change: 1 addition & 0 deletions R/galah_select.R
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ galah_select <- function(...,
}
}

# NOTE: gbif doesn't appear to support column specification in downloads

preset_cols <- function(type) {
cols <- switch(type,
Expand Down
3 changes: 1 addition & 2 deletions R/search_identifiers.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,7 @@ search_identifiers <- function(identifier) {
international_atlas <- getOption("galah_config")$atlas
bullets <- c(
"`search_identifiers` only provides information on Australian taxonomy.",
i = glue("To search taxonomy for {international_atlas} use `taxize`."),
i = "See vignette('international_atlases' for more information."
i = glue("To search for a species name, use `search_taxa()` instead.")
)
abort(bullets, call = caller_env())
}
Expand Down
8 changes: 8 additions & 0 deletions R/search_profile_attributes.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,14 @@
#' @export search_profile_attributes

search_profile_attributes <- function(profile) {

if (getOption("galah_config")$atlas != "Australia") {
bullets <- c(
"Data profiles are only available for the Australian atlas"
)
abort(bullets, call = caller_env())
}

# check if is numeric or can be converted to numeric
short_name <- profile_short_name(profile)
if (is.na(short_name)) {
Expand Down
17 changes: 13 additions & 4 deletions R/search_taxa.R
Original file line number Diff line number Diff line change
Expand Up @@ -71,11 +71,13 @@ name_query <- function(query) {
name_lookup <- function(name) {
url <- server_config("name_matching_base_url")

if(getOption("galah_config")$atlas == "Global"){
atlas_lookup <- show_all_atlases()
taxonomy <- atlas_lookup$taxonomy_source[
atlas_lookup$atlas == getOption("galah_config")$atlas
]
if(taxonomy == "GBIF"){
path <- "species/match"
query <- list(
verbose = FALSE,
name = name[[1]])
query <- list(verbose = FALSE, name = name[[1]])
}else{
if (is.null(names(name)) || isTRUE(names(name) == "")) {
# search by scientific name
Expand Down Expand Up @@ -110,6 +112,13 @@ name_lookup <- function(name) {
return(as.data.frame(list(search_term = name), stringsAsFactors = FALSE))
}
names(result) <- rename_columns(names(result), type = "taxa")

# rename `usage_key` to `taxon_concept_id`
usage_key_check <- names(result) == "usage_key"
if(any(usage_key_check)){
names(result)[usage_key_check] <- "taxon_concept_id"
result$taxon_concept_id <- as.character(result$taxon_concept_id)
}

# if search term includes more than one rank, how to include in output?
if (length(name) > 1) {
Expand Down
4 changes: 3 additions & 1 deletion R/search_values.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,6 @@ search_values <- function(field, query){

field_text <- show_all_values(field)
field_text[grepl(query, tolower(field_text$category)), ]
}
}

# equivalents in rgbif: isocodes (for countries)
18 changes: 11 additions & 7 deletions R/show_all_atlases.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@ show_all_atlases <- function() {
ala_info <- "https://bie.ala.org.au/"
nbn_info <- "https://www.nhm.ac.uk/our-science/data/uk-species.html"
tibble(
atlas = c("Australia", "Austria", "Global", "Guatemala",
"Spain", "Sweden", "UK"),
acronym = c("ALA", "BAO", "GBIF", "SNIBgt",
"GBIF.es", "SBDI", "NBN"),
region = c("Australia", "Austria", "Global", "Guatemala",
"Spain", "Sweden", "UK"),
url = c(
"https://www.ala.org.au",
"https://biodiversityatlas.at",
Expand Down Expand Up @@ -58,7 +58,7 @@ server_config <- function(url, error_call = caller_env()) {
if (!(url %in% names(conf))) {
service <- service_name(url)
lookup <- show_all_atlases()
atlas_acronym <- lookup$acronym[lookup$region == atlas]
atlas_acronym <- lookup$acronym[lookup$atlas == atlas]
abort(
glue("{service} is not supported for {atlas_acronym}"),
call = error_call)
Expand Down Expand Up @@ -123,7 +123,8 @@ austria_config <- function() {
records_base_url = "https://biocache-ws.biodiversityatlas.at/",
spatial_base_url = "https://spatial.biodiversityatlas.at/ws",
# Occurrence download only returns the first image
images_base_url = "https://images.biodiversityatlas.at/"
images_base_url = "https://images.biodiversityatlas.at/",
name_matching_base_url = "https://api.gbif.org/v1"
)
}

Expand All @@ -139,7 +140,8 @@ guatemala_config <- function() {
list(
images_base_url = "https://imagenes.snib.conap.gob.gt/",
spatial_base_url = "https://geoespacial.snib.conap.gob.gt/ws",
records_base_url = "https://snib.conap.gob.gt/registros-ws/"
records_base_url = "https://snib.conap.gob.gt/registros-ws/",
name_matching_base_url = "https://api.gbif.org/v1"
# No species pages available
)
}
Expand All @@ -149,6 +151,7 @@ spain_config <- function() {
list(
records_base_url = "https://registros-ws.gbif.es/",
species_base_url = "https://especies-ws.gbif.es/",
name_matching_base_url = "https://api.gbif.org/v1",
images_base_url = "https://imagenes.gbif.es/",
spatial_base_url = "https://espacial.gbif.es/ws",
doi_base_url = "https://doi.gbif.es/",
Expand All @@ -161,7 +164,8 @@ sweden_config <- function() {
# Uses GBIF taxonomy
spatial_base_url = "https://spatial.biodiversitydata.se/ws/",
species_base_url = "https://species.biodiversitydata.se/ws/",
records_base_url = "https://records.biodiversitydata.se/ws/"
records_base_url = "https://records.biodiversitydata.se/ws/",
name_matching_base_url = "https://api.gbif.org/v1"
)
}

Expand All @@ -171,6 +175,6 @@ uk_config <- function() {
spatial_base_url = "https://layers.nbnatlas.org/ws",
species_base_url = "https://species-ws.nbnatlas.org",
records_base_url = "https://records-ws.nbnatlas.org",
images_base_url = "https://images.nbnatlas.org/"
images_base_url = "https://images.nbnatlas.org/",
)
}

0 comments on commit a0e0b2e

Please sign in to comment.