Skip to content

Commit

Permalink
Check behaviour of living atlases shown by test
Browse files Browse the repository at this point in the history
Now support:
- occurrence downloads for Austria
- data profiles for Sweden and Spain
- media downloads for Sweden
  • Loading branch information
mjwestgate committed Nov 11, 2024
1 parent 3d34208 commit 7eadd83
Show file tree
Hide file tree
Showing 21 changed files with 285 additions and 119 deletions.
1 change: 0 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,6 @@ importFrom(sf,st_is_simple)
importFrom(sf,st_is_valid)
importFrom(stringr,str_detect)
importFrom(stringr,str_extract)
importFrom(stringr,str_extract_all)
importFrom(stringr,str_remove)
importFrom(stringr,str_replace)
importFrom(stringr,str_replace_all)
Expand Down
60 changes: 31 additions & 29 deletions R/atlas_media.R
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
#' @rdname atlas_
#' @order 4
#' @importFrom dplyr any_of
#' @importFrom dplyr bind_rows
#' @importFrom dplyr relocate
#' @importFrom dplyr right_join
#' @importFrom dplyr join_by
#' @importFrom glue glue
#' @importFrom httr2 url_build
#' @importFrom httr2 url_parse
#' @importFrom potions pour
#' @importFrom purrr pluck
#' @importFrom rlang abort
#' @importFrom tibble tibble
#' @importFrom stringr str_remove
#' @importFrom tidyr unnest_longer
#' @export
atlas_media <- function(request = NULL,
Expand All @@ -21,6 +21,11 @@ atlas_media <- function(request = NULL,
data_profile = NULL
) {

atlas <- pour("atlas", "region", .pkg = "galah")
if(!(atlas %in% c("Austraila", "Sweden"))){
abort(glue("`atlas_media` is not supported for atlas = {atlas}"))
}

# capture supplied arguments
args <- as.list(environment())
# convert to `data_request` object
Expand All @@ -36,9 +41,10 @@ atlas_media <- function(request = NULL,
media_fields <- c("images", "videos", "sounds")
if(is.null(.query$select)){
.query <- update_data_request(.query,
select = galah_select(group = c("basic", "media")))
select = galah_select(group = c("basic", "media")))
present_fields <- media_fields
# if `select` is present, ensure that at least one 'media' field is requested
query_collapse <- collapse(.query)
# if `select` is present, ensure that at least one 'media' field is requested
}else{
x <- collapse(.query)

Expand All @@ -57,35 +63,31 @@ atlas_media <- function(request = NULL,
abort(bullets)
}else{
present_fields <- selected_fields[selected_fields %in% media_fields]
.query <- x
query_collapse <- x
}
} # end `select` checks

# `filter` to records that contain media of valid types
media_fq <- glue("({present_fields}:*)")
if(length(present_fields) > 1){
media_fq <- glue("({glue_collapse(media_fq, ' OR ')})")
}

# update .query with fields filter
# note that behaviour here depends on whether we have run compute_checks() above
if(inherits(.query, "data_request")){
.query$filter <- bind_rows(.query$filter,
tibble(variable = present_fields,
logical = "!=",
value = "\"\"",
query = as.character(media_fq)))
}else if(inherits(.query, "query")){ # i.e. if .query is already a `query`
url <- url_parse(.query$url)
url$query$fq <- paste0(url$query$fq, "AND", media_fq)
.query$url <- url_build(url)
.query <- compute_occurrences(.query)
}else{
abort("unknown object class in `atlas_media()`")
# add media content to filters
if(length(present_fields) > 0){
# do region-specific filter parsing
atlas <- pour("atlas", "region")
if(atlas == "Sweden"){
sbdi_filter_fields <- present_fields |>
str_remove("s$") |>
paste0("IDsCount")
media_fq <- glue("{sbdi_filter_fields}:[1 TO *]")
}else{ # i.e. Australia
media_fq <- glue("({present_fields}:*)")
}
# add back to source object
media_fq <- glue("({glue_collapse(media_fq, ' OR ')})")
url <- url_parse(query_collapse$url)
url$query$fq <- paste0(url$query$fq, " AND ", media_fq)
query_collapse$url <- url_build(url)
}

# get occurrences
occ <- .query |>
occ <- query_collapse |>
collect(wait = TRUE) |>
unnest_longer(col = any_of(present_fields))
occ$media_id <- build_media_id(occ)
Expand All @@ -98,4 +100,4 @@ atlas_media <- function(request = NULL,
media,
by = join_by("media_id" == "image_id"))
relocate(occ_media, "media_id", 1)
}
}
10 changes: 4 additions & 6 deletions R/build.R
Original file line number Diff line number Diff line change
Expand Up @@ -68,12 +68,10 @@ build_query <- function(identify = NULL,
}
}
# add profiles information (ALA only)
if(pour("atlas", "region") == "Australia"){
if(!is.null(data_profile)) {
query$qualityProfile <- data_profile
} else {
query$disableAllQualityFilters <- "true"
}
if(!is.null(data_profile)) {
query$qualityProfile <- data_profile
} else {
query$disableAllQualityFilters <- "true"
}
build_single_fq(query)
}
Expand Down
24 changes: 13 additions & 11 deletions R/collapse_media.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,19 @@ collapse_media <- function(.query){
abort("Requests for metadata of type = \"media\" must have information passed via `filter()`")
}
occ <- .query$filter$data
media_cols <- which(colnames(occ) %in% c("images", "videos", "sounds"))
media_ids <- do.call(c, occ[, media_cols]) |>
unlist()
media_ids <- media_ids[!is.na(media_ids)]
## Note: next `gsub()` calls were added specifically for SBDI parsing
## In practice, that image service treats inputs as invalid whether they are
## quoted or not. Suspected server-side problem. Tested 2024-02-27.
# media_ids <- media_ids |>
# gsub("\"", "", x = _) |> # remove quotes
# gsub("^\\[|\\]$", "", x = _) # remove leading or trailing square brackets
names(media_ids) <- NULL
if(any(colnames(occ) %in% c("images", "videos", "sounds"))){ # Australia, Sweden
media_cols <- which(colnames(occ) %in% c("images", "videos", "sounds"))
media_ids <- do.call(c, occ[, media_cols]) |>
unlist()
media_ids <- media_ids[!is.na(media_ids)]
names(media_ids) <- NULL
}else if(any(colnames(occ) == "all_image_url")){ # Austria
media_ids <- pull(occ, "all_image_url")
media_ids <- media_ids[!is.na(media_ids)]
names(media_ids) <- NULL
}else{
abort("Media metadata not found in supplied tibble")
}

result <- list(
type = "metadata/media",
Expand Down
16 changes: 10 additions & 6 deletions R/collapse_occurrences.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ collapse_occurrences_uk <- function(.query){
data_profile = .query$data_profile),
fields = "`SELECT_PLACEHOLDER`",
qa = "`ASSERTIONS_PLACEHOLDER`",
sourceTypeId = 2001,
sourceTypeId = source_type_id_lookup("United Kingdom"),
fileType = "csv",
reasonTypeId = pour("user", "download_reason_id"),
dwcHeaders = "true")
Expand All @@ -49,6 +49,7 @@ collapse_occurrences_uk <- function(.query){
}

#' calculate the query to be returned for GBIF
#' @importFrom glue glue
#' @noRd
#' @keywords Internal
collapse_occurrences_gbif <- function(.query, format = "SIMPLE_CSV"){
Expand All @@ -61,6 +62,11 @@ collapse_occurrences_gbif <- function(.query, format = "SIMPLE_CSV"){
value = "`TAXON_PLACEHOLDER`",
query = ""))
}
# get user string
username <- pour("user", "username", .pkg = "galah")
password <- pour("user", "password", .pkg = "galah")
user_string <- glue("{username}:{password}")
# build object
result <- list(
type = "data/occurrences",
url = url_lookup("data/occurrences"),
Expand All @@ -71,10 +77,7 @@ collapse_occurrences_gbif <- function(.query, format = "SIMPLE_CSV"){
Accept = "application/json"),
options = list(
httpauth = 1,
userpwd = paste0(
pour("user", "username", .pkg = "galah"),
":",
pour("user", "password", .pkg = "galah"))),
userpwd = user_string),
body = build_predicates(.query$filter,
.query$geolocate,
format = format))
Expand All @@ -100,7 +103,8 @@ collapse_occurrences_la <- function(.query){
qa = "`ASSERTIONS_PLACEHOLDER`",
facet = "false", # not tested
emailNotify = email_notify(),
sourceTypeId = 2004,
sourceTypeId = {pour("atlas", "region") |>
source_type_id_lookup()},
reasonTypeId = pour("user", "download_reason_id"),
email = pour("user", "email"),
dwcHeaders = "true")
Expand Down
5 changes: 3 additions & 2 deletions R/collect_occurrences.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,15 @@
#' @importFrom tibble tibble
collect_occurrences <- function(.query, wait, file = NULL){
switch(pour("atlas", "region"),
"United Kingdom" = collect_occurrences_uk(.query, file = file),
"Austria" = collect_occurrences_direct(.query, file = file),
"United Kingdom" = collect_occurrences_direct(.query, file = file),
collect_occurrences_default(.query, wait = wait, file = file))
}

#' Internal function to `collect_occurrences()` for UK
#' @noRd
#' @keywords Internal
collect_occurrences_uk <- function(.query, file){
collect_occurrences_direct <- function(.query, file){
.query$download <- TRUE
.query$file <- check_download_filename(file)
query_API(.query)
Expand Down
8 changes: 5 additions & 3 deletions R/compute_occurrences.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,17 @@
#' @keywords Internal
compute_occurrences <- function(.query){
switch(pour("atlas", "region"),
"United Kingdom" = compute_occurrences_uk(.query),
"Austria" = compute_occurrences_la_direct(.query),
"United Kingdom" = compute_occurrences_la_direct(.query),
"Global" = compute_occurrences_gbif(.query),
compute_occurrences_la(.query))
}

#' Internal function to `compute()` for `type = "occurrences"` for UK
#' Internal function to `compute()` for `type = "occurrences"` for
#' atlases that have 'direct' downloads (i.e. no true `compute` stage)
#' @noRd
#' @keywords Internal
compute_occurrences_uk <- function(.query){
compute_occurrences_la_direct <- function(.query){
result <- c(.query,
list(fields = extract_fields(.query)))
class(result) <- "computed_query"
Expand Down
8 changes: 5 additions & 3 deletions R/parse_metadata_unnest.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,10 @@ parse_profile_values <- function(.query){
profile_name <- extract_profile_name(url)
short_name <- profile_short_name(profile_name)
if (!pour("atlas", "region") == "Spain") {
url$path <- paste0("/dqf-service/api/v1/data-profiles/",
short_name)
path_name <- url |>
pluck("path") |>
dirname()
url$path <- glue("{path_name}/{short_name}")
}
result <- list(type = .query$type,
url = url_build(url))
Expand Down Expand Up @@ -63,7 +65,7 @@ extract_profile_name <- function(url) {
} else {
profile_name <- url |>
pluck("path") |>
sub("/dqf-service/api/v1/data-profiles/", "", x = _)
basename()
}
return(profile_name)
}
2 changes: 1 addition & 1 deletion R/print.R
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ switch_slot_text <- function(x, a){
},
"select" = x[[a]]$summary,
"group_by" = glue_collapse(x[[a]]$name, sep = " | "),
"data_profile" ={x[[a]]$data_profile[1]},
"data_profile" ={x[[a]][1]},
"mint_doi" = {x[[a]][1]},
"")
}
Expand Down
Binary file modified R/sysdata.rda
Binary file not shown.
13 changes: 13 additions & 0 deletions R/utilities_internal.R
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,8 @@ atlas_supports_reasons_api <- function(){

## show_all_atlases / search_atlases --------------------------#

#' @noRd
#' @keywords Internal
image_fields <- function() {
atlas <- pour("atlas", "region")
switch (atlas,
Expand All @@ -142,6 +144,8 @@ image_fields <- function() {
)
}

#' @noRd
#' @keywords Internal
species_facets <- function(){
atlas <- pour("atlas", "region")
if(atlas %in% c("Australia", "France", "Spain", "Sweden")) { # i.e. those using 'pipelines'
Expand All @@ -150,3 +154,12 @@ species_facets <- function(){
"species_guid"
}
}

#' @noRd
#' @keywords Internal
source_type_id_lookup <- function(region){
switch(region,
"Austria" = 1,
"United Kingdom" = 2001,
"2004") # ALA default for galah
}
6 changes: 4 additions & 2 deletions data-raw/node_config.csv
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ Australia,metadata/reasons,https://api.ala.org.au/logger/service/logger/reasons,
Australia,metadata/taxa-multiple,https://api.ala.org.au/namematching/api/searchByClassification,TRUE
Australia,metadata/taxa-single,https://api.ala.org.au/namematching/api/search?q={name},TRUE
Australia,metadata/taxa-unnest,https://api.ala.org.au/species/childConcepts/{id},TRUE
Austria,data/occurrences,https://biocache.biodiversityatlas.at/ws/occurrences/offline/download,TRUE
Austria,data/occurrences,https://biocache.biodiversityatlas.at/ws/occurrences/download,TRUE
Austria,data/occurrences-count,https://biocache.biodiversityatlas.at/ws/occurrences/search,TRUE
Austria,data/occurrences-count-groupby,https://biocache.biodiversityatlas.at/ws/occurrence/facets,TRUE
Austria,data/species,https://biocache.biodiversityatlas.at/ws/occurrences/facets/download,TRUE
Expand Down Expand Up @@ -73,7 +73,7 @@ Estonia,metadata/fields-unnest,https://elurikkus.ee/biocache-service/occurrence/
Estonia,metadata/providers,https://elurikkus.ee/collectory/ws/dataProvider,TRUE
Estonia,metadata/taxa-single,https://elurikkus.ee/bie-index/search?q={name}&pageSize=5,TRUE
Estonia,metadata/taxa-unnest,https://elurikkus.ee/bie-index/childConcepts/{id},TRUE
France,data/occurrences,https://openobs.mnhn.fr/biocache-service/occurrences/offline/download,TRUE
France,data/occurrences,https://openobs.mnhn.fr/biocache-service/occurrences/download,TRUE
France,data/occurrences-count,https://openobs.mnhn.fr/biocache-service/occurrences/search,TRUE
France,data/occurrences-count-groupby,https://openobs.mnhn.fr/biocache-service/occurrence/facets,TRUE
France,data/species,https://openobs.mnhn.fr/biocache-service/occurrences/facets/download,TRUE
Expand Down Expand Up @@ -167,6 +167,8 @@ Sweden,metadata/licences,https://images.biodiversitydata.se/ws/licence,TRUE
Sweden,metadata/lists,https://lists.biodiversitydata.se/ws/speciesList,TRUE
Sweden,metadata/lists-unnest,https://lists.biodiversitydata.se/ws/speciesListItems/{list_id},TRUE
Sweden,metadata/media,https://images.biodiversitydata.se/ws/getImageInfoForIdList,TRUE
Sweden,metadata/profiles,https://data-quality-service.biodiversitydata.se/api/v1/data-profiles?enabled=true,TRUE
Sweden,metadata/profiles-unnest,https://data-quality-service.biodiversitydata.se/api/v1/data-profiles/{profile},TRUE
Sweden,metadata/providers,https://collections.biodiversitydata.se/ws/dataProvider,TRUE
Sweden,metadata/reasons,https://logger.biodiversitydata.se/service/logger/reasons,TRUE
Sweden,metadata/taxa-multiple,https://namematching.biodiversitydata.se/api/searchByClassification,TRUE
Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test-galah_apply_profile.R
Original file line number Diff line number Diff line change
Expand Up @@ -77,4 +77,4 @@ test_that("galah_apply_profile allows only one profile at a time", {
skip_if_offline()
expect_error(galah_apply_profile(ALA, CSDM),
"Too many data profiles supplied.")
})
})
Loading

0 comments on commit 7eadd83

Please sign in to comment.