[fix] Auto-detect order of NetCDF dimensions (#6)

* [fix] Automatically detect dimensions of NetCDF files * [fix] Make sure 'summary_database()' works when no matched found * [docs] Update NEWS
ideas-lab-nus · Aug 10, 2020 · fdc979d · fdc979d
1 parent a1beac0
commit fdc979d
Show file tree

Hide file tree

Showing 4 changed files with 226 additions and 224 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -1,7 +1,11 @@
 # epwshiftr 0.1.1
 
+## Bug fixes
+
 * `esgf_query()` will give an informative message when LLNL ESGF node is not
   available (#3).
+* `extract_data()` will automatically detect input NetCDF dimensions (#6).
+* `summary_database()` now will proceed when no matched found (#6).
 
 # epwshiftr 0.1.0
 

diff --git a/R/netcdf.R b/R/netcdf.R
@@ -188,12 +188,16 @@ summary_database <- function (
         # c) calculate the overlapped percentages coverred datetime of input
         # file to index data
         set(idx_m, NULL, "index_match", seq_len(nrow(idx_m)))
-        idx_m[, by = c("index_match"),
-            overlap := {
-                diff <- as.numeric(difftime(i.datetime_end, i.datetime_start, units = "days"))
-                sq <- seq(datetime_start, datetime_end, by = "1 day")
-                sum(sq >= i.datetime_start & sq <= i.datetime_end) / diff
-        }]
+        if (!nrow(idx_m)) {
+            set(idx_m, NULL, "overlap", double())
+        } else {
+            idx_m[, by = c("index_match"),
+                overlap := {
+                    diff <- as.numeric(difftime(i.datetime_end, i.datetime_start, units = "days"))
+                    sq <- seq(datetime_start, datetime_end, by = "1 day")
+                    sum(sq >= i.datetime_start & sq <= i.datetime_end) / diff
+            }]
+        }
 
         # d) only keep items that have overlapped percentages larger than 60%
         idx_m <- idx_m[overlap >= 0.6]
@@ -342,6 +346,29 @@ get_nc_data <- function (x, lats, lons, years, unit = TRUE) {
     dims[J("lon"), on = "name", `:=`(value = list(which_lon))]
     dims[J("lat"), on = "name", `:=`(value = list(which_lat))]
 
+    # find correct dimentions
+    ord <- list(
+        c("lon", "lat", "time"),
+        c("lat", "lon", "time"),
+        c("time", "lon", "lat"),
+        c("lat", "time", "lon"),
+        c("lon", "time", "lat"),
+        c("time", "lat", "lon")
+    )
+    ord <- ord[vapply(ord, function (i) {
+        d <- dims[J(i), on = "name"]$length
+        !is.null(tryCatch(RNetCDF::var.get.nc(nc, var, d, rep(1L, 3), collapse = FALSE),
+            error = function (e) {
+                if (grepl("exceeds dimension bound", conditionMessage(e), fixed = TRUE)) {
+                    NULL
+                } else {
+                    signalCondition(e)
+                }
+            }
+        ))
+    }, logical(1))][[1L]]
+    dims <- dims[J(ord), on = "name"]
+
     dt <- rbindlist(use.names = TRUE, lapply(seq_along(which_time), function (i) {
         if (!length(which_time[[i]])) {
             return(data.table(
@@ -357,29 +384,12 @@ get_nc_data <- function (x, lats, lons, years, unit = TRUE) {
         # Thus, the order of the dimensions according to the CDL conventions
         # (e.g., time, latitude, longitude) is reversed in the R array (e.g.,
         # longitude, latitude, time).
-        dt <- tryCatch(as.data.table(RNetCDF::var.get.nc(nc, var,
+        dt <- as.data.table(RNetCDF::var.get.nc(nc, var,
             c(min(dims$value[[1L]]), min(dims$value[[2L]]), min(dims$value[[3L]])),
             c(length(dims$value[[1L]]), length(dims$value[[2L]]), length(dims$value[[3L]])),
-            collapse = FALSE)),
-            error = function (e) {
-                if (grepl("exceeds dimension bound", conditionMessage(e), fixed = TRUE)) {
-                    NULL
-                } else {
-                    signalCondition(e)
-                }
-            }
+            collapse = FALSE)
         )
 
-        # in case permutation did not work
-        if (is.null(dt)) {
-            dims <- dims[c(3L, 2L, 1L)][, id := .I]
-            dt <- as.data.table(RNetCDF::var.get.nc(nc, var,
-                c(min(dims$value[[1L]]), min(dims$value[[2L]]), min(dims$value[[3L]])),
-                c(length(dims$value[[1L]]), length(dims$value[[2L]]), length(dims$value[[3L]])),
-                collapse = FALSE
-            ))
-        }
-
         setnames(dt, c(dims$name, "value"))
         setnames(dt, "time", "datetime")
 

diff --git a/README.Rmd b/README.Rmd
@@ -15,18 +15,18 @@ knitr::opts_chunk$set(
 library(epwshiftr)
 
 # copy files in advance
-f <- c("tas_day_EC-Earth3_ssp585_r1i1p1f1_gr_20490101-20491231.nc",
-       "tas_day_EC-Earth3_ssp585_r1i1p1f1_gr_20500101-20501231.nc",
-       "tas_day_EC-Earth3_ssp585_r1i1p1f1_gr_20510101-20511231.nc",
-       "hurs_day_EC-Earth3_ssp585_r1i1p1f1_gr_20490101-20491231.nc",
-       "hurs_day_EC-Earth3_ssp585_r1i1p1f1_gr_20500101-20501231.nc",
-       "hurs_day_EC-Earth3_ssp585_r1i1p1f1_gr_20510101-20511231.nc",
-       "tas_day_EC-Earth3_ssp585_r1i1p1f1_gr_20790101-20791231.nc",
-       "tas_day_EC-Earth3_ssp585_r1i1p1f1_gr_20800101-20801231.nc",
-       "tas_day_EC-Earth3_ssp585_r1i1p1f1_gr_20810101-20811231.nc",
-       "hurs_day_EC-Earth3_ssp585_r1i1p1f1_gr_20790101-20791231.nc",
-       "hurs_day_EC-Earth3_ssp585_r1i1p1f1_gr_20800101-20801231.nc",
-       "hurs_day_EC-Earth3_ssp585_r1i1p1f1_gr_20810101-20811231.nc"
+f <- c("tas_day_AWI-CM-1-1-MR_ssp585_r1i1p1f1_gn_20490101-20491231.nc",
+       "tas_day_AWI-CM-1-1-MR_ssp585_r1i1p1f1_gn_20500101-20501231.nc",
+       "tas_day_AWI-CM-1-1-MR_ssp585_r1i1p1f1_gn_20510101-20511231.nc",
+       "hurs_day_AWI-CM-1-1-MR_ssp585_r1i1p1f1_gn_20490101-20491231.nc",
+       "hurs_day_AWI-CM-1-1-MR_ssp585_r1i1p1f1_gn_20500101-20501231.nc",
+       "hurs_day_AWI-CM-1-1-MR_ssp585_r1i1p1f1_gn_20510101-20511231.nc",
+       "tas_day_AWI-CM-1-1-MR_ssp585_r1i1p1f1_gn_20790101-20791231.nc",
+       "tas_day_AWI-CM-1-1-MR_ssp585_r1i1p1f1_gn_20800101-20801231.nc",
+       "tas_day_AWI-CM-1-1-MR_ssp585_r1i1p1f1_gn_20810101-20811231.nc",
+       "hurs_day_AWI-CM-1-1-MR_ssp585_r1i1p1f1_gn_20790101-20791231.nc",
+       "hurs_day_AWI-CM-1-1-MR_ssp585_r1i1p1f1_gn_20800101-20801231.nc",
+       "hurs_day_AWI-CM-1-1-MR_ssp585_r1i1p1f1_gn_20810101-20811231.nc"
 )
 file.copy(file.path("/mnt/d/ScenarioMIP", f), tempdir())
 ```
@@ -103,13 +103,16 @@ idx <- init_cmip6_index(
     experiment = c("ssp585"),
 
     # specify GCM name
-    source = "EC-Earth3",
+    source = "AWI-CM-1-1-MR",
 
-    # only consider data nodes that are current working
-    data_node = nodes[status == "UP", data_node],
+    # specify variant,
+    variant = "r1i1p1f1",
 
     # specify years of interest
-    years = c(2050, 2080)
+    years = c(2050, 2080),
+
+    # save to data dictionary
+    save = TRUE
 )
 
 # the index has been automatically saved into directory specified using
@@ -196,8 +199,8 @@ knitr::kable(head(morphed$rh))
 
 ```{r epw}
 # create future EPWs grouped by GCM, experiment ID, interval (year)
-epws <- future_epw(morphed, by = c("source_id", "experiment_id", "interval"),
-    dir = tempdir(), separate = TRUE
+epws <- future_epw(morphed, by = c("source", "experiment", "interval"),
+    dir = tempdir(), separate = TRUE, overwrite = TRUE
 )
 
 epws