Note: this is a bookdown project, supposed to be run from within the src/generate_watercourse_100mseg subfolder. You can use the generate_watercourse_100mseg.Rproj RStudio project file in this subfolder to run it.

Upon building, a persistent GRASS project database will be created in which the data will be created. Because serious slowdown was encountered when exporting the points object (not the lines) to the final GPKG file, exporting (hence, recreating the GPKG file) will only be done when knitting with params = list(grass_reexport = TRUE).

1 Making the data source

We will create a master dataset of watercourse segments of 100 m (watercourse_100mseg), in order to derive sampling frames of lotic types (3260 in particular).

It is a processed data source, derived from the raw data source watercourses (version watercourses_20200807). The segments will have a 100 m length, except at the end of the linestrings of watercourses.

Further we will also derive a second layer which represent the endpoints of the 100 m segments.

local_root <- find_root(has_file("generate_watercourse_100mseg.Rproj"))
datapath <- file.path(local_root, "data")
n2khab_datapath <- find_root_file("n2khab_data",
                                  criterion = has_dir("n2khab_data"))
if (!dir.exists(datapath)) dir.create(datapath)
finalpath <- find_root_file("n2khab_data/20_processed/watercourse_100mseg", 
                            criterion = has_dir("n2khab_data"))
if (!dir.exists(finalpath)) dir.create(finalpath)
grass_location <- file.path(datapath, "grass_watercourses")
if (!dir.exists(grass_location)) dir.create(file.path(grass_location, "PERMANENT"),
                                            recursive = TRUE)
grassdbase_exists <- file.exists(file.path(grass_location, "PERMANENT/PROJ_INFO"))
if (.Platform$OS.type == "unix") {
  Sys.setenv(GRASS_PYTHON = system("which python3", intern = TRUE))
}

1.1 Geoprocessing steps in GRASS

Read the watercourses data source.

watercourses <- read_sf(file.path(n2khab_datapath, "10_raw/watercourses"))

Initialize GRASS projectfolder if non-existant, or link to the already existing one:

if (interactive()) {
gisbase_grass <- 
  if (.Platform$OS.type == "windows") link2GI::paramGRASSw()$gisbase_GRASS[1] else {
    link2GI::paramGRASSx()$gisbase_GRASS[1]
  }
}
initGRASS(gisBase = if (interactive()) gisbase_grass else params$gisbase_grass,
          home = tempdir(),
          gisDbase = datapath, location = "grass_watercourses", 
          mapset = "PERMANENT", override = TRUE)
## gisdbase    /media/floris/DATA/PROJECTS/09685_NatuurlijkMilieu/160 Bewerkingen en resultaat/Repos_en_data/n2khab-preprocessing/src/generate_watercourse_100mseg/data 
## location    grass_watercourses 
## mapset      PERMANENT 
## rows        90984 
## columns     235484 
## north       244044.9 
## south       153060.7 
## west        22963.97 
## east        258448.4 
## nsres       1.000002 
## ewres       1.000002 
## projection:
##  PROJCS["Belge 1972 / Belgian Lambert 72",
##     GEOGCS["Belge 1972",
##         DATUM["Reseau_National_Belge_1972",
##             SPHEROID["International 1924",6378388,297,
##                 AUTHORITY["EPSG","7022"]],
##             TOWGS84[-99.059,53.322,-112.486,0.419,-0.83,1.885,-1],
##             AUTHORITY["EPSG","6313"]],
##         PRIMEM["Greenwich",0,
##             AUTHORITY["EPSG","8901"]],
##         UNIT["degree",0.0174532925199433,
##             AUTHORITY["EPSG","9122"]],
##         AUTHORITY["EPSG","4313"]],
##     PROJECTION["Lambert_Conformal_Conic_2SP"],
##     PARAMETER["latitude_of_origin",90],
##     PARAMETER["central_meridian",4.36748666666667],
##     PARAMETER["standard_parallel_1",51.1666672333333],
##     PARAMETER["standard_parallel_2",49.8333339],
##     PARAMETER["false_easting",150000.013],
##     PARAMETER["false_northing",5400088.438],
##     UNIT["metre",1,
##         AUTHORITY["EPSG","9001"]],
##     AXIS["Easting",EAST],
##     AXIS["Northing",NORTH],
##     AUTHORITY["EPSG","31370"]]

Cross-platform function to pass full GRASS command syntax as a string from R:

execshell <- function(commandstring, intern = FALSE) {
  if (.Platform$OS.type == "windows") {
    res <- shell(commandstring, intern = TRUE)
  } else {
    res <- system(commandstring, intern = TRUE)
  }
  if (!intern) cat(res, sep = "\n") else return(res)
}
# setting CRS and extent of the GRASS location, based on 'watercourses'
execGRASS("g.proj", flags = c("c", "quiet"), 
          epsg = 31370)
use_sf()

Add watercourses data source to GRASS database:

execGRASS("v.in.ogr",
          "o",
          input = file.path(n2khab_datapath, "10_raw/watercourses"),
          output = "watercourses")

1.1.1 Generate watercourse_100mseg in GRASS

To do this, we first use the v.edit command to flip the direction of the linestrings, because the segments are to be generated from mouth to source, leaving segments of <100 m only at the source side. After that, we will use the v.split command. It will take each linestring of the raw watercourses data source and split it into segments of 100 m length. This generally results in one segment (for each original linestring) that is shorter than 100 m!

execshell("g.copy vector=watercourses,watercourses_original")
execshell("v.edit map=watercourses tool=flip where='1=1'")
execshell("v.split -f --overwrite input=watercourses output=watercourse_100mseg_pre1 length=100")
execshell("v.category input=watercourse_100mseg_pre1 option=report")
## Layer/table: 1/watercourse_100mseg_pre1
## type       count        min        max
## point          0          0          0
## line      271676          1      26513
## boundary       0          0          0
## centroid       0          0          0
## area           0          0          0
## face           0          0          0
## kernel         0          0          0
## all       271676          1      26513

As seen from the above output, in accordance with the v.split manual, the categories and the attribute table of the original vector map are simply copied over. This means that the individual 100 m segments have no unique category, i.e. they don’t have their own rows (with ID) in another attribute table. To get that, as explained in the documentation, we define a second layer1 with a unique category per feature (100 m segment) and create a new attribute table that links to the new layer (an attribute table always has exactly 1 row for each category and uses 1 layer to take categories from). Next, we copy the category of layer 1 as a mere attribute into a second column in the new attribute table (which is linked to layer 2), in order to define the link between the grouped category cat_group (categories of watercourses) and the unique category cat.

The execshell() command directly sends the below commands to the shell as one script for execution by GRASS.

execshell('v.category input=watercourse_100mseg_pre1 option=add layer=2 output=watercourse_100mseg
       # v.category input=watercourse_100mseg option=report
       # v.db.connect -p map=watercourse_100mseg
       v.db.addtable watercourse_100mseg layer=2 key="rank" columns="vhag_code int"
       # v.db.connect -p map=watercourse_100mseg
       # v.info -c watercourse_100mseg layer=2
       # v.info -c watercourse_100mseg layer=1
       v.to.db map=watercourse_100mseg layer=2 option=query query_layer=1 query_column=VHAG columns=vhag_code --overwrite
       # v.db.select map=watercourse_100mseg layer=2 | head
       ')

1.1.2 Generate watercourse_100msegpoints in GRASS

To do this, we use the v.to.points command. We want each point at the downstream side of its corresponding segment. We create one point per 100 m segment. As we flipped the direction of the watercourse lines, we want the startpoint of each segment.

execGRASS("v.to.points",
          flags = c("overwrite"),
          input = "watercourse_100mseg",
          layer = "2",
          output = "watercourse_100msegpoints",
          use = "start")

1.2 Exporting resulting objects from GRASS

We already took care of standardized column names within GRASS; so no further changes are needed when exporting the data from GRASS. We do it this way in order to minimize the number of times the data need to be written forth and back to a geopackage, which takes extra time (> 250e3 features) and file organization.

execGRASS("v.out.ogr",
          flags = "overwrite",
          input = "watercourse_100mseg",
          layer = "2",
          output = file.path(finalpath, "watercourse_100mseg.gpkg"),
          output_layer = "watercourse_100mseg_lines")
# This export takes a very long time (> 60 min).
# There must be some bug in the export procedure 
# (after all, the data are simpler than previous one).
execGRASS("v.out.ogr",
          "a",
          input = "watercourse_100msegpoints",
          layer = "1",
          output = file.path(finalpath, "watercourse_100mseg.gpkg"),
          output_layer = "watercourse_100mseg_points")

2 Attribute variables of the processed data source: explanation

Both layers in the GPKG file have exactly the same attribute variables:

  • rank: a unique integer increment (1-2-3-4-5-…). It is to be used in a relative way: it ranks the 100 m segments / points along each original linestring in the raw data source.
  • vhag_code: the unique VHAG code of the original linestring in the raw data source. It is common to all segments / points in the processed data source that belong to the same original watercourse.

To obtain other attributes, one can make a join on the VHAG code column of the raw data source.

3 Checks on the data source

filepath <- file.path(finalpath, "watercourse_100mseg.gpkg")

Checksums:

file(filepath) %>% 
  openssl::md5() %>% 
  str_c(collapse = '') %>% 
  `names<-`("md5sum")
##                             md5sum 
## "be050493ca0b80bfe2bfe60762cfd10b"
file(filepath) %>% 
  openssl::sha256() %>% 
  str_c(collapse = '') %>% 
  `names<-`("sha256sum")
##                                                          sha256sum 
## "6feb220de44aa4d47cf68ba956309b6ff6c85e1018225c96de1dffe24e2fce15"
(seg <- read_sf(filepath, 
                layer = "watercourse_100mseg_lines"))
## Simple feature collection with 271676 features and 2 fields
## geometry type:  LINESTRING
## dimension:      XY
## bbox:           xmin: 22963.97 ymin: 153060.7 xmax: 258448.4 ymax: 243962.7
## projected CRS:  Belge 1972 / Belgian Lambert 72
## # A tibble: 271,676 x 3
##     rank vhag_code                                                          geom
##    <int>     <int>                                              <LINESTRING [m]>
##  1     1        54 (73158.13 207180.3, 73154.56 207175.9, 73138.51 207159.9, 73…
##  2     2        54 (73084.51 207112.8, 73079.36 207108.4, 73073.22 207101.2, 73…
##  3     3        54 (73098.75 207028.7, 73107.98 207020.9, 73118.83 207013.5, 73…
##  4     4        54 (73168.05 206976.4, 73168.68 206976.6, 73174 206975.3, 73187…
##  5     5        54 (73251.27 206922.3, 73257.6 206916.8, 73277.84 206901.5, 732…
##  6     6        54 (73334.24 206894.9, 73348.22 206906.8, 73367 206923.2, 73383…
##  7     7        54 (73399.43 206909.2, 73401.18 206905.7, 73403.79 206896.3, 73…
##  8     8        54 (73428.71 206822.9, 73429.98 206820.4, 73441.36 206793.6, 73…
##  9     9        54 (73464.1 206729.8, 73464.1 206729, 73466.33 206724.3, 73469.…
## 10    10        54 (73474.06 206630.7, 73474.92 206620.2, 73475.84 206594, 7347…
## # … with 271,666 more rows
(pts <- read_sf(filepath, 
                layer = "watercourse_100mseg_points"))
## Simple feature collection with 271676 features and 2 fields
## geometry type:  POINT
## dimension:      XY
## bbox:           xmin: 22979.76 ymin: 153064.4 xmax: 258447.8 ymax: 243962.7
## projected CRS:  Belge 1972 / Belgian Lambert 72
## # A tibble: 271,676 x 3
##     rank vhag_code                geom
##    <int>     <int>         <POINT [m]>
##  1     1        54 (73158.13 207180.3)
##  2     2        54 (73084.51 207112.8)
##  3     3        54 (73098.75 207028.7)
##  4     4        54 (73168.05 206976.4)
##  5     5        54 (73251.27 206922.3)
##  6     6        54 (73334.24 206894.9)
##  7     7        54 (73399.43 206909.2)
##  8     8        54 (73428.71 206822.9)
##  9     9        54  (73464.1 206729.8)
## 10    10        54 (73474.06 206630.7)
## # … with 271,666 more rows
all.equal(seg$rank, pts$rank)
## [1] TRUE
all.equal(seg$vhag_code, pts$vhag_code)
## [1] TRUE
all.equal(unique(seg$vhag_code), unique(watercourses$VHAG))
## [1] TRUE

Cartographic display of a row selection:

seg %>% 
  filter(rank < 2e4) %>% 
  ggplot() +
  geom_sf(colour = "grey50") +
  geom_sf(data = pts %>% filter(rank < 2e4), colour = "red", size = 0.5) +
  lims(x = c(36e3, 40e3), y = c(184e3, 188e3)) +
  coord_sf(datum = 31370) +
  theme_bw()+
  theme(panel.grid = element_blank())

4 Used environment

  • version: R version 4.0.3 (2020-10-10)
  • os: Linux Mint 20
  • system: x86_64, linux-gnu
  • ui: X11
  • language: nl_BE:nl
  • collate: nl_BE.UTF-8
  • ctype: nl_BE.UTF-8
  • tz: Europe/Brussels
  • date: 2020-12-16
  • GEOS: 3.8.1
  • GDAL: 3.1.3
  • PROJ: 7.1.1
  • GDAL_with_GEOS: true
  • USE_PROJ_H: true
  • GRASS: 7.8.4
Table 4.1: Loaded R packages
package loadedversion date source
abind 1.4-5 2016-07-21 CRAN (R 4.0.2)
askpass 1.1 2019-01-13 CRAN (R 4.0.2)
assertthat 0.2.1 2019-03-21 CRAN (R 4.0.2)
bookdown 0.21 2020-10-13 CRAN (R 4.0.3)
callr 3.5.1 2020-10-13 CRAN (R 4.0.3)
class 7.3-17 2020-04-26 CRAN (R 4.0.3)
classInt 0.4-3 2020-04-07 CRAN (R 4.0.3)
cli 2.2.0 2020-11-20 CRAN (R 4.0.3)
colorspace 2.0-0 2020-11-11 CRAN (R 4.0.3)
crayon 1.3.4 2017-09-16 CRAN (R 4.0.2)
DBI 1.1.0 2019-12-15 CRAN (R 4.0.3)
desc 1.2.0 2018-05-01 CRAN (R 4.0.2)
devtools 2.3.2 2020-09-18 CRAN (R 4.0.2)
digest 0.6.27 2020-10-24 CRAN (R 4.0.3)
dplyr 1.0.2 2020-08-18 CRAN (R 4.0.2)
e1071 1.7-4 2020-10-14 CRAN (R 4.0.3)
ellipsis 0.3.1 2020-05-15 CRAN (R 4.0.2)
evaluate 0.14 2019-05-28 CRAN (R 4.0.2)
fansi 0.4.1 2020-01-08 CRAN (R 4.0.2)
farver 2.0.3 2020-01-16 CRAN (R 4.0.2)
fs 1.5.0 2020-07-31 CRAN (R 4.0.2)
generics 0.1.0 2020-10-31 CRAN (R 4.0.3)
ggplot2 3.3.2 2020-06-19 CRAN (R 4.0.2)
glue 1.4.2 2020-08-27 CRAN (R 4.0.2)
gtable 0.3.0 2019-03-25 CRAN (R 4.0.2)
htmltools 0.5.0 2020-06-16 CRAN (R 4.0.2)
KernSmooth 2.23-18 2020-10-29 CRAN (R 4.0.3)
knitr 1.30 2020-09-22 CRAN (R 4.0.2)
lifecycle 0.2.0 2020-03-06 CRAN (R 4.0.2)
link2GI 0.4-6 2020-12-16 Github ()
lubridate 1.7.9 2020-06-08 CRAN (R 4.0.2)
lwgeom 0.2-5 2020-06-12 CRAN (R 4.0.2)
magrittr 2.0.1 2020-11-17 CRAN (R 4.0.3)
memoise 1.1.0 2017-04-21 CRAN (R 4.0.2)
munsell 0.5.0 2018-06-12 CRAN (R 4.0.2)
openssl 1.4.3 2020-09-18 CRAN (R 4.0.2)
pillar 1.4.7 2020-11-20 CRAN (R 4.0.3)
pkgbuild 1.1.0 2020-07-13 CRAN (R 4.0.2)
pkgconfig 2.0.3 2019-09-22 CRAN (R 4.0.2)
pkgload 1.1.0 2020-05-29 CRAN (R 4.0.2)
prettyunits 1.1.1 2020-01-24 CRAN (R 4.0.2)
processx 3.4.5 2020-11-30 CRAN (R 4.0.3)
ps 1.4.0 2020-10-07 CRAN (R 4.0.3)
purrr 0.3.4 2020-04-17 CRAN (R 4.0.2)
R6 2.5.0 2020-10-28 CRAN (R 4.0.3)
Rcpp 1.0.5 2020-07-06 CRAN (R 4.0.3)
remotes 2.2.0 2020-07-21 CRAN (R 4.0.2)
renv 0.12.3 2020-11-25 CRAN (R 4.0.3)
rgrass7 0.2-3 2020-12-04 github ()
rlang 0.4.9 2020-11-26 CRAN (R 4.0.3)
rmarkdown 2.5 2020-10-21 CRAN (R 4.0.3)
roxygen2 7.1.1 2020-06-27 CRAN (R 4.0.2)
rprojroot 2.0.2 2020-11-15 CRAN (R 4.0.3)
rstudioapi 0.13 2020-11-12 CRAN (R 4.0.3)
scales 1.1.1 2020-05-11 CRAN (R 4.0.2)
sessioninfo 1.1.1 2018-11-05 CRAN (R 4.0.2)
sf 0.9-6 2020-09-13 CRAN (R 4.0.3)
stars 0.4-3 2020-07-08 CRAN (R 4.0.2)
stringi 1.5.3 2020-09-09 CRAN (R 4.0.2)
stringr 1.4.0 2019-02-10 CRAN (R 4.0.2)
testthat 3.0.0 2020-10-31 CRAN (R 4.0.3)
tibble 3.0.4 2020-10-12 CRAN (R 4.0.3)
tidyselect 1.1.0 2020-05-11 CRAN (R 4.0.2)
units 0.6-7 2020-06-13 CRAN (R 4.0.3)
usethis 1.6.3 2020-09-17 CRAN (R 4.0.2)
utf8 1.1.4 2018-05-24 CRAN (R 4.0.2)
vctrs 0.3.5 2020-11-17 CRAN (R 4.0.3)
withr 2.3.0 2020-09-22 CRAN (R 4.0.2)
xfun 0.19 2020-10-30 CRAN (R 4.0.3)
XML 3.99-0.5 2020-07-23 CRAN (R 4.0.2)
xml2 1.3.2 2020-04-23 CRAN (R 4.0.2)
yaml 2.2.1 2020-02-01 CRAN (R 4.0.2)

  1. To do that, the vector map watercourse_100mseg_pre1 is copied, including layers & the link to the same attribute table.↩︎