ideas-lab-nus · hongyuanjia · Oct 9, 2022 · Sep 23, 2022 · Oct 4, 2022 · Oct 8, 2022
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: epwshiftr
 Title: Create Future 'EnergyPlus' Weather Files using 'CMIP6' Data
-Version: 0.1.3.9011
+Version: 0.1.3.9012
 Authors@R: c(
     person(given = "Hongyuan",
            family = "Jia",
@@ -48,7 +48,7 @@ Encoding: UTF-8
 URL: https://github.com/ideas-lab-nus/epwshiftr
 BugReports: https://github.com/ideas-lab-nus/epwshiftr/issues
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.2.0
+RoxygenNote: 7.2.1
 Collate:
     'coord.R'
     'dict.R'
@@ -58,5 +58,7 @@ Collate:
     'gh.R'
     'morph.R'
     'netcdf.R'
+    'query.R'
 VignetteBuilder: knitr
 Config/testthat/start-first: utils, esgf, coord, netcdf, morph
+Config/testthat/edition: 3
diff --git a/NAMESPACE b/NAMESPACE
@@ -2,7 +2,9 @@
 
 S3method(print,CMIP6CV)
 S3method(print,CMIP6DReq)
-export(CMIP6Dict)
+S3method(print,EsgfQueryParam)
+export(Cmip6Dict)
+export(EsgfQuery)
 export(cmip6_dict)
 export(esgf_query)
 export(extract_data)
@@ -13,6 +15,7 @@ export(init_cmip6_index)
 export(load_cmip6_index)
 export(match_coord)
 export(morphing_epw)
+export(query_esgf)
 export(set_cmip6_index)
 export(summary_database)
 importFrom(PCICt,as.PCICt)

diff --git a/NEWS.md b/NEWS.md
@@ -72,7 +72,7 @@
   be able to capture the average of future climate (#41).
 * Now epwshiftr is able to download, parse and store CMIP6 Controlled
   Vocabularies (CVs) and Data Request data using the newly introduced class
-  `CMIP6Dict`. Please see `?CMIP6Dict` for details (#53).
+  `Cmip6Dict`. Please see `?Cmip6Dict` for details (#53).
 * A new option `epwshiftr.threshold_alpha` has been added to set the threshold
   of the absolute value for alpha, i.e. monthly-mean fractional change when
   performing morphing operations. The default value is set to `3`. If the
@@ -82,6 +82,8 @@
 * Now HDF5 format is supported (#60).
 * Now `replica` can be `NULL` in `esgf_query()` and `init_cmip6_index()`. In
   this case, both the master record and replicas are all returned (#61).
+* New class `EsgfQuery` is added to support more flexible. Please see
+  `?EsgfQuery` for details (#63).
 
 ## Bug fixes
 
@@ -96,7 +98,9 @@
   `morphing_epw()` (#25).
 
 ## Internal refactor
+
 * `fields` parameter is used to directly filter the ESGF query responses (#66).
+* Improve URL encoding (#62).
 
 # epwshiftr 0.1.3
 

diff --git a/R/dict.R b/R/dict.R
@@ -1,6 +1,6 @@
 #' CMIP6 Controlled Vocabularies (CVs) and Data Request Dictionary
 #'
-#' The `CMIP6Dict` object provides functionalities to fetch the latested CMIP6
+#' The `Cmip6Dict` object provides functionalities to fetch the latested CMIP6
 #' Controlled Vocabularies (CVs) and Data Request (DReq) information.
 #'
 #' The CMIP6 CVs gives a well-defined set of global attributes that are recorded
@@ -15,7 +15,7 @@
 #' endorsed MIP. The raw data of DReq is stored a Microsoft Excel file
 #' (`CMIP6_MIP_tables.xlsx`) in a
 #' [Subversion repo](http://proj.badc.rl.ac.uk/svn/exarch/CMIP6dreq/trunk).
-#' The `CMIP6Dict` object uses the parsed DReq data that is stored in the
+#' The `Cmip6Dict` object uses the parsed DReq data that is stored in the
 #' [GitHub Repo](https://github.com/PCMDI/cmip6-cmor-tables).
 #'
 #' For more information, please see:
@@ -26,10 +26,10 @@
 #' @examples
 #' \dontrun{
 #'
-#' # create a new CMIP6Dict object
+#' # create a new Cmip6Dict object
 #' dict <- cmip6_dict()
 #'
-#' # by default, there is no data when the CMIP6Dict was created
+#' # by default, there is no data when the Cmip6Dict was created
 #' dict$is_empty()
 #'
 #' # fetch and parse all CVs and Data Request data
@@ -71,16 +71,16 @@
 #' @author Hongyuan Jia
 #'
 #' @importFrom R6 R6Class
-#' @name CMIP6Dict
+#' @name Cmip6Dict
 #' @export
 cmip6_dict <- function() {
-    EPWSHIFTR_ENV$dict <- CMIP6Dict$new()
-    EPWSHIFTR_ENV$dict
+    this$dict <- Cmip6Dict$new()
+    this$dict
 }
 
-#' @name CMIP6Dict
+#' @name Cmip6Dict
 #' @export
-CMIP6Dict <- R6::R6Class("CMIP6Dict",
+Cmip6Dict <- R6::R6Class("Cmip6Dict",
     cloneable = FALSE, lock_class = TRUE,
     public = list(
         #' @description
@@ -96,9 +96,9 @@ CMIP6Dict <- R6::R6Class("CMIP6Dict",
         },
 
         #' @description
-        #' Is it an empty CMIP6Dict?
+        #' Is it an empty Cmip6Dict?
         #'
-        #' `$is_empty()` checks if this `CMIP6Dict` is empty, i.e. the `$build()
+        #' `$is_empty()` checks if this `Cmip6Dict` is empty, i.e. the `$build()
         #' ` or `$load()` method hasn't been called yet and there is no data of
         #' CVs and Data Request.
         #'
@@ -144,8 +144,17 @@ CMIP6Dict <- R6::R6Class("CMIP6Dict",
         #'        REST APIs. If `NULL`, `GITHUB_PAT` or `GITHUB_TOKEN`
         #'        environment variable will be used if exists. Default: `NULL`.
         #'
-        #' @return The updated `CMIP6Dict` itself.
-        build = function(token = NULL) {
+        #' @param force Whether to force to rebuild the dict when it has been
+        #'        already built before. Default: `FALSE`.
+        #'
+        #' @return The updated `Cmip6Dict` itself.
+        build = function(token = NULL, force = FALSE) {
+            assert_flag(force)
+
+            if (self$is_empty()) force <- TRUE
+
+            if (!force) return(self)
+
             dict <- cmip6dict_build(cmip6dict_fetch())
             for (nm in names(dict)) private[[paste0("m_", nm)]] <- dict[[nm]]
             self
@@ -192,12 +201,12 @@ CMIP6Dict <- R6::R6Class("CMIP6Dict",
         },
 
         #' @description
-        #' Save the CMIP6Dict object
+        #' Save the Cmip6Dict object
         #'
-        #' `$save()` stores all the core data of current `CMIP6Dict` object into
+        #' `$save()` stores all the core data of current `Cmip6Dict` object into
         #' an [RDS][saveRDS()] file named `CMIP6DICT` in the specified folder.
         #' This file can be reloaded via `$load()` method to restore the last
-        #' state of current `CMIP6Dict` object.
+        #' state of current `Cmip6Dict` object.
         #'
         #' @param dir A single string giving the directory to save the RDS file.
         #'        Default is set to the global option `epwshiftr.dir`. The
@@ -218,7 +227,7 @@ CMIP6Dict <- R6::R6Class("CMIP6Dict",
         },
 
         #' @description
-        #' Load the saved CMIP6Dict object from file
+        #' Load the saved Cmip6Dict object from file
         #'
         #' `$load()` loads the RDS file named `CMIP6DICT` that is created using
         #' `$save()` method.
@@ -248,12 +257,12 @@ CMIP6Dict <- R6::R6Class("CMIP6Dict",
         },
 
         #' @description
-        #' Print a summary of the current `CMIP6Dict` object
+        #' Print a summary of the current `Cmip6Dict` object
         #'
-        #' `$print()` gives the summary of current `CMIP6Dict` object including
+        #' `$print()` gives the summary of current `Cmip6Dict` object including
         #' the version of CVs and Data Request, and the last built time.
         #'
-        #' @return The `CMIP6Dict` object itself, invisibly.
+        #' @return The `Cmip6Dict` object itself, invisibly.
         print = function() {
             d <- cli::cli_div(
                 theme = list(rule = list("line-type" = "double"))
@@ -381,7 +390,7 @@ cmip6dict_download_cv_file <- function(tag, dir = tempdir(), token = NULL) {
 
     file <- ""
     cli::cli_progress_step(
-        "Downloading data of {.strong CMIP6 CVs} ['{.file {file}}']...",
+        "Downloading data of {.strong CMIP6 CVs} [{.file {file}}]...",
         "Downloaded data of {.strong CMIP6 CVs} successfully.",
         "Failed to download data of {.strong CMIP6 CVs}.",
         spinner = TRUE

diff --git a/R/epwshiftr-package.R b/R/epwshiftr-package.R
@@ -25,9 +25,14 @@
 "_PACKAGE"
 
 # package internal environment
-EPWSHIFTR_ENV <- new.env(parent = emptyenv())
-EPWSHIFTR_ENV$index_db <- NULL
-EPWSHIFTR_ENV$dict <- NULL
+this <- new.env(parent = emptyenv())
+this$index_db <- NULL
+this$dict <- NULL
+this$cache <- list()
+
+# nocov start
+attach_cache <- function(cache) this$cache <- cache
+# nocov end
 
 ## usethis namespace: start
 #' @importFrom checkmate assert_string

diff --git a/R/esgf.R b/R/esgf.R
@@ -121,10 +121,10 @@ RES_FILE <- c(
 #'        described below. Default: `"r1i1p1f1"`.
 #'        If `NULL`, all possible variants are returned.
 #'
-#' * `r`: realization_index (<k>) = realization number (integer >0)
-#' * `i`: initialization_index (<l>) = index for variant of initialization method (integer >0)
-#' * `p`: physics_index (<m>) = index for model physics variant (integer >0)
-#' * `f`: forcing_index (<n>) = index for variant of forcing (integer >0)
+#' * `r`: realization_index (`<k>`) = realization number (integer >0)
+#' * `i`: initialization_index (`<l>`) = index for variant of initialization method (integer >0)
+#' * `p`: physics_index (`<m>`) = index for model physics variant (integer >0)
+#' * `f`: forcing_index (`<n>`) = index for variant of forcing (integer >0)
 #'
 #' @param replica Whether the record is the "master" copy, or a replica. Use
 #'        `FALSE` to return only originals and `TRUE` to return only replicas.
@@ -264,55 +264,71 @@ esgf_query <- function(activity = "ScenarioMIP",
         variant = "variant_label"
     )
 
-    pair <- function(x, first = FALSE) {
+    pair <- function(x, encode = TRUE) {
+        checkmate::assert_vector(x, TRUE, null.ok = TRUE)
+
         # get name
         var <- deparse(substitute(x))
+
         # skip if empty
         if (is.null(x) || length(x) == 0) {
             return()
         }
         # get key name
         key <- dict[names(dict) == var]
         if (!length(key)) key <- var
+
         if (is.logical(x)) x <- tolower(x)
-        s <- paste0(key, "=", paste0(x, collapse = "%2C")) # %2C = ","
-        if (first) s else paste0("&", s)
+
+        if (encode) x <- query_param_encode(as.character(x))
+
+        paste0(key, "=", paste0(x, collapse = query_param_encode(",")))
     }
 
-    `%and%` <- function(lhs, rhs) if (is.null(rhs)) lhs else paste0(lhs, rhs)
+    `%and%` <- function(lhs, rhs) {
+        if (is.null(rhs)) {
+            lhs
+        } else if (lhs == url_base) {
+            paste(lhs, rhs, sep = "", collapse = "")
+        } else {
+            paste(lhs, rhs, sep = "&", collapse = "&")
+        }
+    }
 
     project <- "CMIP6"
-    format <- "application%2Fsolr%2Bjson"
+    format <- "application/solr+json"
+    offset <- 0L
 
     resolution <- c(
         gsub(" ", "", resolution, fixed = TRUE),
         gsub(" ", "+", resolution, fixed = TRUE)
     )
 
+    # use `fileds` to directly subset data from responses
+    if (type == "Dataset") {
+        fields <- RES_DATASET
+    } else if (type == "File") {
+        fields <- RES_FILE
+    }
+
     q <- url_base %and%
-        pair(project, TRUE) %and%
+        pair(offset) %and%
+        pair(limit) %and%
+        pair(type) %and%
+        pair(replica) %and%
+        pair(latest) %and%
+        pair(project) %and%
         pair(activity) %and%
         pair(experiment) %and%
         pair(source) %and%
         pair(variable) %and%
-        pair(resolution) %and%
+        pair(resolution, FALSE) %and%
         pair(variant) %and%
         pair(data_node) %and%
         pair(frequency) %and%
-        pair(replica) %and%
-        pair(latest) %and%
-        pair(type) %and%
-        pair(limit) %and%
+        pair(fields) %and%
         pair(format)
 
-    # use `fileds` to directly subset data from responses
-    if (type == "Dataset") {
-        fields <- RES_DATASET
-    } else if (type == "File") {
-        fields <- RES_FILE
-    }
-    q <- q %and% pair(fields)
-
     q <- tryCatch(jsonlite::read_json(q), warning = function(w) w, error = function(e) e)
 
     # nocov start
@@ -573,7 +589,7 @@ init_cmip6_index <- function(activity = "ScenarioMIP",
         data.table::fwrite(dt, file.path(.data_dir(TRUE), "cmip6_index.csv"))
         verbose("Data file index saved to '", normalizePath(file.path(.data_dir(TRUE), "cmip6_index.csv")), "'")
 
-        EPWSHIFTR_ENV$index_db <- data.table::copy(dt)
+        this$index_db <- data.table::copy(dt)
     }
 
     dt
@@ -596,10 +612,10 @@ init_cmip6_index <- function(activity = "ScenarioMIP",
 #' @importFrom data.table copy fread
 #' @export
 load_cmip6_index <- function(force = FALSE) {
-    if (is.null(EPWSHIFTR_ENV$index_db)) force <- TRUE
+    if (is.null(this$index_db)) force <- TRUE
 
     if (!force) {
-        idx <- data.table::copy(EPWSHIFTR_ENV$index_db)
+        idx <- data.table::copy(this$index_db)
     } else {
         f <- normalizePath(file.path(.data_dir(force = FALSE), "cmip6_index.csv"), mustWork = FALSE)
         if (!file.exists(f)) {
@@ -661,7 +677,7 @@ load_cmip6_index <- function(force = FALSE) {
     }
 
     # udpate package internal stored file index
-    EPWSHIFTR_ENV$index_db <- data.table::copy(idx)
+    this$index_db <- data.table::copy(idx)
 
     idx[]
 }
@@ -707,7 +723,7 @@ set_cmip6_index <- function(index, save = FALSE) {
     }
 
     # udpate package internal stored file index
-    EPWSHIFTR_ENV$index_db <- data.table::copy(index)
+    this$index_db <- data.table::copy(index)
 
     invisible(index)
 }

diff --git a/R/morph.R b/R/morph.R
@@ -157,15 +157,15 @@ remove_units <- function (data, var) {
 #'
 #' If using a shift, for each month, a shift \eqn{\Delta x_m} is applied to
 #' \eqn{x_0}. \eqn{\Delta x_m} is the absolute change in the monthly mean value
-#' of the variable for the month \eqn{m}, i.e. \eqn{\Delta x_m = <x_0>_m - <x>_m}
-#' . Here the monthly variance of the variable
-#' is unchanged.
+#' of the variable for the month \eqn{m},
+#' i.e. \eqn{\Delta x_m = <x_0>_m - <x>_m}. Here the monthly variance of the
+#' variable is unchanged.
 #'
 #' ## Stretch:
 #'
 #' If using a stretch, for each month, a stretch \eqn{\alpha _m} is applied to
 #' \eqn{x_0}, where \eqn{\alpha _m} is the fractional change in the monthly-mean
-#' value of a variable, i.e. \eqn{\alpha _m} = <x>_m / <x_0>_m. In this case,
+#' value of a variable, i.e. \eqn{\alpha _m = <x>_m / <x_0>_m}. In this case,
 #' the variance will be multiplied by to \eqn{alpha^2_m}
 #'
 #' ## Combined Shift and Stretch:

diff --git a/R/netcdf.R b/R/netcdf.R
@@ -406,7 +406,7 @@ summary_database <- function (
     }
 
     # update index
-    EPWSHIFTR_ENV$index_db <- copy(idx)
+    this$index_db <- copy(idx)
 
     if (update) {
         # save database into the app data directory