Skip to content

Commit

Permalink
Ensure that check_fields() works consistently without using regex
Browse files Browse the repository at this point in the history
- fix bug where `match.arg()` was always noisy in `request_metadata()`
- rewire `check_fields()` to use data captured from `filter()`, rather than parsing urls
- Place @daxkellie as second author from this version
  • Loading branch information
mjwestgate committed Nov 8, 2024
1 parent 43f4b55 commit 3d34208
Show file tree
Hide file tree
Showing 15 changed files with 78 additions and 95 deletions.
6 changes: 3 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@ Authors@R:
family = "Westgate",
email = "martin.westgate@csiro.au",
role = c("aut", "cre")),
person(given = "Matilda",
family = "Stevenson",
role = "aut"),
person(given = "Dax",
family = "Kellie",
email = "dax.kellie@csiro.au",
role = "aut"),
person(given = "Matilda",
family = "Stevenson",
role = "aut"),
person(given = "Peggy",
family = "Newman",
email = "peggy.newman@csiro.au",
Expand Down
6 changes: 3 additions & 3 deletions R/atlas_media.R
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,9 @@ atlas_media <- function(request = NULL,
# note that behaviour here depends on whether we have run compute_checks() above
if(inherits(.query, "data_request")){
.query$filter <- bind_rows(.query$filter,
tibble(variable = "media",
logical = "==",
value = paste(present_fields, collapse = "|"),
tibble(variable = present_fields,
logical = "!=",
value = "\"\"",
query = as.character(media_fq)))
}else if(inherits(.query, "query")){ # i.e. if .query is already a `query`
url <- url_parse(.query$url)
Expand Down
2 changes: 1 addition & 1 deletion R/build_query_set.R
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ build_query_set_data <- function(x, mint_doi, ...){
if(!fields_absent[["select"]] | x$type %in% c("occurrences", "species")){
result <- list(collapse_fields(), collapse_assertions())
}else{
result <- list()
result <- list()
}
}
# handle `identify()`
Expand Down
34 changes: 16 additions & 18 deletions R/check.R
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ check_field_identities <- function(df, .query){
df
}

#' sub-function to `check_fields()` for living atlases
#' sub-function to `check_fields()` for GBIF
#' @importFrom jsonlite fromJSON
#' @importFrom purrr pluck
#' @noRd
Expand Down Expand Up @@ -241,7 +241,7 @@ check_fields_gbif_counts <- function(.query){
c(filter_invalid, group_by_invalid)
}

#' sub-function to `check_fields()` for living atlases
#' sub-function to `check_fields()` for GBIF
#' @importFrom jsonlite fromJSON
#' @importFrom purrr pluck
#' @noRd
Expand Down Expand Up @@ -272,12 +272,6 @@ check_fields_gbif_predicates <- function(.query){
#' @noRd
#' @keywords Internal
check_fields_la <- function(.query){
if(inherits(.query$url, "data.frame")){
url <- url_parse(.query$url$url[1])
}else{
url <- url_parse(.query$url[1])
}
queries <- url$query

# set fields to check against
# NOTE: These are retrieved in collapse()
Expand All @@ -287,20 +281,18 @@ check_fields_la <- function(.query){

# extract fields from filter & identify
filter_invalid <- NA
if(is.null(queries$fq)){
if(is.null(.query$filter)){
# note: above was previously: `exists("fq", where = queries)`
# Error in as.environment(where) : using 'as.environment(NULL)' is defunct
filters <- NULL
}else{
if (nchar(queries$fq) > 0) {
provided_fields <- string_to_tibble(queries$fq)
filters <- provided_fields |>
pull("value") |>
gsub("\\(|\\)|\\-|\\:", "", x = _)
} else {
filters <- NULL
}

# extract field names
# note: `filter()` often concatenates field names with logical statements
# hence `strsplit()` step here
filters <- .query$filter$variable |>
strsplit("\\||\\&") |>
unlist() |>
unique()
if (length(filters) > 0) {
if (!all(filters %in% valid_any)) {
invalid_fields <- filters[!(filters %in% valid_any)]
Expand All @@ -311,6 +303,12 @@ check_fields_la <- function(.query){

# galah_group_by fields check
group_by_invalid <- NA
if(inherits(.query$url, "data.frame")){
url <- url_parse(.query$url$url[1])
}else{
url <- url_parse(.query$url[1])
}
queries <- url$query
if (!is.null(queries$facets)) {
facets <- queries[names(queries) == "facets"] |> unlist() # NOTE: arrange() is missing
if (length(facets) > 0) {
Expand Down
3 changes: 2 additions & 1 deletion R/collapse_media.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,9 @@ collapse_media <- function(.query){
result <- list(
type = "metadata/media",
url = url_lookup("metadata/media"),
headers = build_headers(),
body = toJSON(list(imageIds = media_ids)),
headers = build_headers())
filter = .query$filter)

class(result) <- "query"
return(result)
Expand Down
2 changes: 2 additions & 0 deletions R/collapse_occurrences.R
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ collapse_occurrences_uk <- function(.query){
type = "data/occurrences",
url = url_build(url),
headers = build_headers(),
filter = .query$filter,
select = .query$select)
class(result) <- "query"
return(result)
Expand Down Expand Up @@ -117,6 +118,7 @@ collapse_occurrences_la <- function(.query){
type = "data/occurrences",
url = url_build(url),
headers = build_headers(),
filter = .query$filter,
select = .query$select)
class(result) <- "query"
return(result)
Expand Down
24 changes: 12 additions & 12 deletions R/collapse_occurrences_count.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,15 +34,16 @@ collapse_occurrences_count_atlas <- function(identify = NULL,
data_profile = data_profile)
# set behaviour depending on `group_by()`
if(is.null(group_by)){
result <- list(type = "data/occurrences-count")
url <- url_lookup("data/occurrences-count") |>
url_parse()
url$query <- c(query, pageSize = 0)
result$url <- url_build(url)
result$slot_name <- "totalRecords"
result$expand <- FALSE
result <- list(type = "data/occurrences-count",
url = url_build(url),
headers = build_headers(),
filter = filter,
slot_name = "totalRecords",
expand = FALSE)
}else{
result <- list(type = "data/occurrences-count-groupby")
url <- url_lookup("data/occurrences-count-groupby") |>
url_parse()
facets <- as.list(group_by$name)
Expand All @@ -56,17 +57,16 @@ collapse_occurrences_count_atlas <- function(identify = NULL,
if(is.null(arrange)){
arrange <- tibble(variable = "count", direction = "descending")
}

slice_arrange <- bind_cols(slice, arrange)
arrange_list <- check_slice_arrange(slice_arrange)
url$query <- c(query, facets, arrange_list)
result$url <- url_build(url)
result$expand <- ifelse(length(facets) > 1, TRUE, FALSE)
result$arrange <- slice_arrange
result <- list(type = "data/occurrences-count-groupby",
url = url_build(url),
headers = build_headers(),
filter = filter,
expand = ifelse(length(facets) > 1, TRUE, FALSE),
arrange = slice_arrange)
}

# aggregate and return
result$headers <- build_headers()
class(result) <- "query"
return(result)
}
Expand Down
1 change: 1 addition & 0 deletions R/collapse_species.R
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ collapse_species_atlas <- function(.query){
type = "data/species",
url = url_build(url),
headers = build_headers(),
filter = .query$filter,
download = TRUE)
class(result) <- "query"
result
Expand Down
19 changes: 11 additions & 8 deletions R/collapse_species_count.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,16 @@ collapse_species_count_atlas <- function(identify = NULL,
filter,
geolocate,
data_profile = data_profile)
result <- list(type = "data/species-count")
# set behaviour depending on `group_by()`
if(is.null(group_by)){
url$query <- c(query,
list(flimit = 1,
facets = species_facets()))
result$url <- url_build(url)
result$expand <- FALSE
result <- list(type = "data/species-count",
url = url_build(url),
headers = build_headers(),
filter = filter,
expand = FALSE)
}else{
facets <- c(as.list(group_by$name), species_facets())
names(facets) <- rep("facets", length(facets))
Expand All @@ -54,12 +56,13 @@ collapse_species_count_atlas <- function(identify = NULL,
slice_arrange <- bind_cols(slice, arrange)
arrange_list <- check_slice_arrange(slice_arrange)
url$query <- c(query, facets, arrange_list)
result$url <- url_build(url)
result$expand <- TRUE
result$arrange <- slice_arrange
result <- list(type = "data/species-count",
url = url_build(url),
headers = build_headers(),
filter = filter,
expand = TRUE,
arrange = slice_arrange)
}
# aggregate and return
result$headers <- build_headers()
class(result) <- "query"
return(result)
}
7 changes: 4 additions & 3 deletions R/galah_call.R
Original file line number Diff line number Diff line change
Expand Up @@ -173,16 +173,17 @@ request_metadata <- function(type = c("fields",
"reasons",
"taxa",
"identifiers")){
type <- try(match.arg(type))
if(inherits(type, "try-error")){
type_checked <- try(match.arg(type),
silent = TRUE)
if(inherits(type_checked, "try-error")){
bullets <- c(
glue("Unrecognised metadata requested."),
i = "See `?show_all()` for a list of valid metadata types.",
x = glue("Can't find metadata type `{type}`.")
)
abort(bullets)
}
x <- list(type = type)
x <- list(type = type_checked)
class(x) <- "metadata_request"
return(x)
}
Expand Down
38 changes: 0 additions & 38 deletions R/utilities_internal.R
Original file line number Diff line number Diff line change
Expand Up @@ -85,44 +85,6 @@ camel_to_snake_case <- function(x){
tolower()
}

#' Simple internal function to split strings
#' @importFrom stringr str_extract
#' @importFrom stringr str_extract_all
#' @importFrom stringr str_trim
#' @importFrom stringr str_remove
#' @importFrom tibble as_tibble
#' @noRd
#' @keywords Internal
string_to_tibble <- function(string, split_by = c(":")){
# ChatGPT created the reprex in this function.
# Possibly over-engineered, but it works

# The pattern logic is:
# (?<=\\(|OR|AND|-): A positive lookbehind to ensure the match is preceded by (, OR, AND, or -.
# \\s*: Matches any whitespace characters.
# ([-\\w\\(\\)]+): Captures one or more word characters, hyphens, or parentheses.
# \\s*(?=:): Matches any whitespace characters followed by a colon, using a positive lookahead to ensure the match is followed by a colon.
extracted_strings <-
# new
stringr::str_extract_all(
string,
"(?<=\\(|OR|AND)\\s*([-\\w\\(\\)]+)\\s*(?=:)"
) |>
# old (v2.0.2)
# stringr::str_extract_all(
# string,
# "(?<=\\()[^\\*]*?(?=\\:)|(?<=OR\\s)[^\\*]*?(?=\\:)"
# ) |>
unlist() |>
str_remove("-|\\(") |>
str_extract("[:alnum:]+") |>
str_trim() |>
as_tibble() |>
unique()
return(extracted_strings)

}

##---------------------------------------------------------------
## Other helpful functions --
##---------------------------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion man/galah.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 14 additions & 4 deletions tests/testthat/test-atlas_media.R
Original file line number Diff line number Diff line change
Expand Up @@ -56,14 +56,24 @@ test_that("`collapse()` and `collect()` work for `type = 'media'`", {
filter(media == occ_collect) |>
collapse()
expect_true(inherits(media_collapse, "query"))
expect_equal(length(media_collapse), 4)
expect_equal(names(media_collapse), c("type", "url", "body", "headers"))
expect_equal(length(media_collapse), 5)
expect_equal(names(media_collapse),
c("type",
"url",
"headers",
"body",
"filter"))
expect_true(media_collapse$type == "metadata/media")
# compute
media_compute <- compute(media_collapse)
expect_true(inherits(media_compute, "computed_query"))
expect_equal(length(media_compute), 4)
expect_equal(names(media_compute), c("type", "url", "body", "headers"))
expect_equal(length(media_compute), 5)
expect_equal(names(media_compute),
c("type",
"url",
"headers",
"body",
"filter"))
# collect
media_collect <- collect(media_compute)
expect_s3_class(media_collect, c("tbl_df", "tbl", "data.frame"))
Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test-atlas_occurrences.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ test_that("`compute(type = 'occurrences')` works", {
# collapse
query_collapse <- collapse(base_query)
expect_true(inherits(query_collapse, "query"))
expect_equal(length(query_collapse), 3)
expect_equal(length(query_collapse), 4)
expect_equal(query_collapse$type, "data/occurrences")
# compute
response <- compute(base_query)
Expand Down
9 changes: 7 additions & 2 deletions tests/testthat/test-galah_filter.R
Original file line number Diff line number Diff line change
Expand Up @@ -256,8 +256,13 @@ test_that("galah_filter handles lsid as an input", {
collapse()
# number of taxa searches is 3, not 4
expect_s3_class(query, "query")
expect_equal(length(query), 5)
expect_equal(names(query), c("type", "url", "slot_name", "expand", "headers"))
expect_equal(length(query), 6)
expect_equal(names(query), c("type",
"url",
"headers",
"filter",
"slot_name",
"expand"))
})

test_that("galah_filter handles different fields separated by OR", {
Expand Down

0 comments on commit 3d34208

Please sign in to comment.