diff --git a/r/DESCRIPTION b/r/DESCRIPTION index ef461f78..8a921654 100644 --- a/r/DESCRIPTION +++ b/r/DESCRIPTION @@ -19,10 +19,13 @@ Suggests: Roxygen: list(markdown = TRUE) RoxygenNote: 6.1.1.9000 Collate: + 'read_funcs.R' + 'nse_funcs.R' + 'compile.R' + 'datatable.R' + 'entity.R' + 'node.R' 'stencila.R' - 'util.R' 'typing.R' 'types.R' - 'node.R' - 'entity.R' - 'datatable.R' + 'util.R' diff --git a/r/NAMESPACE b/r/NAMESPACE index eeab1396..707fd1a4 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -2,28 +2,37 @@ S3method(as.data.frame,Datatable) S3method(print,Entity) +export(ArraySchema) export(Article) export(AudioObject) export(BlockContent) +export(BooleanSchema) export(Brand) +export(Cite) +export(CiteGroup) export(Code) export(CodeBlock) export(CodeChunk) -export(CodeExpr) +export(CodeExpression) +export(CodeFragment) export(Collection) +export(ConstantSchema) export(ContactPoint) export(CreativeWork) +export(CreativeWorkTypes) export(Datatable) export(DatatableColumn) -export(DatatableColumnSchema) export(Delete) export(Emphasis) export(Entity) +export(EnumSchema) export(Environment) +export(Figure) export(Heading) export(ImageObject) export(Include) export(InlineContent) +export(IntegerSchema) export(Link) export(List) export(ListItem) @@ -31,16 +40,23 @@ export(Mark) export(MediaObject) export(Mount) export(Node) +export(NumberSchema) export(Organization) export(Paragraph) +export(Parameter) +export(Periodical) export(Person) export(Product) +export(PublicationIssue) +export(PublicationVolume) export(Quote) export(QuoteBlock) export(ResourceParameters) +export(SchemaTypes) export(SoftwareApplication) export(SoftwareSession) export(SoftwareSourceCode) +export(StringSchema) export(Strong) export(Subscript) export(Superscript) @@ -49,6 +65,7 @@ export(TableCell) export(TableRow) export(ThematicBreak) export(Thing) +export(TupleSchema) export(VideoObject) export(as.Datatable.data.frame) export(datatable_from_dataframe) @@ -56,3 +73,5 @@ export(datatable_to_dataframe) export(entity_from_list) export(node_from_json) export(node_to_json) +export(nse_funcs) +export(read_funcs) diff --git a/r/R/compile.R b/r/R/compile.R new file mode 100644 index 00000000..42ed389e --- /dev/null +++ b/r/R/compile.R @@ -0,0 +1,161 @@ +#' @include nse_funcs.R +#' @include read_funcs.R + +#' Names of function that assign +assign_func_names <- c("assign", "base::assign", "<<-", "<-", "=") + +#' Names of functions that "import" packages +import_func_names <- c("library", "require", "::", ":::") + +#' Names of function in the base R environment +base_func_names <- ls(baseenv()) + +# Temporary +Function <- function(...) list(type = "Function", ...) + +compile_chunk <- function(chunk) { + language <- chunk$language + text <- chunk$text + imports <- NULL + declares <- NULL + assigns <- NULL + alters <- NULL + uses <- NULL + reads <- NULL + + # Only handle R code + if (is.null(language) || !(language %in% c("r", "R"))) return(chunk) + + # Parse the code into an AST + ast <- as.list(parse(text = text)) + + # Record assignments that are local + # to functions, they need to be considered + # for `uses`, but not for `assigns` + local_assigns <- NULL + + ast_walker <- function(node, depth = 0) { + if (is.symbol(node)) { + name <- as.character(node) + if (!(name %in% assigns)) uses <<- unique(c(uses, name)) + } else if (is.call(node)) { + # Resolve the function name + func <- node[[1]] + if (is.symbol(func)) { + # 'Normal' function call + # Add function to `uses` if it is is not in base environment + func_name <- as.character(func) + if (!(func_name %in% base_func_names)) { + uses <<- unique(c(uses, func_name)) + } + } else if (is.call(func) && func[[1]] == "::") { + # Call of namespaced function e.g pkg::func + # Do not add these to `uses` + func_name <- paste0(func[[2]], "::", func[[3]]) + } else { + # No func_name for other more complex calls + # that do not need to be detected below e.g. instance$method() + func_name <- "" + } + + if (func_name == "$") { + # Only walk the left side, not the right since they are symbols to + # extract from an object so should not be included in `uses` + ast_walker(node[[2]], depth) + return() + } else if (func_name == "function") { + # Function definition + # Walk the body with incremented depth + ast_walker(node[[length(node)]], depth + 1) + return() + } else if (func_name %in% assign_func_names) { + left <- node[[2]] + right <- node[[3]] + if (is.call(right) && right[[1]] == "function") { + # Assignment of a function + # Treat as a declaration + func_decl <- Function( + name = as.character(left) + ) + if (!is.null(right[[2]])) { + parameters <- NULL + params <- as.list(right[[2]]) + print(names(params)) + for (name in names(params)) { + param <- params[[name]] + parameters <- c(parameters, list( + Parameter( + name = name + ) + )) + } + func_decl$parameters <- parameters + } + declares <<- c(declares, list(func_decl)) + + # Only walk the function so that left is not made a `uses` + ast_walker(right, depth) + return() + } else if (func == "assign") { + # Assignment using `assign` function + # TODO: Check the `pos` arg relative to current depth + assigns <<- unique(c(assigns, left)) + } else if (is.symbol(left) && depth == 0) { + # Assignment to a name + assigns <<- unique(c(assigns, as.character(left))) + } else if (is.call(left)) { + # Assignment to an existing object e.g. a$b[1] <- NULL + # Recurse until we find the variable that is target of alteration + walk <- function(node) { + target <- node[[2]] + if (is.symbol(target)) { + if (is.null(alters) || !(target %in% alters)) { + alters <<- c(alters, as.character(target)) + } + } else { + walk(target) + } + } + walk(target) + } + } else if (func_name %in% import_func_names) { + # Package import + # Get the names of the package + if (length(node) > 1) { + imports <<- unique(c(imports, as.character(node[[2]]))) + } + } else if (func_name %in% read_funcs_names) { + # File read + # Collect relevant argument(s) from function call + args <- as.list(node[2:length(node)]) + read_func_index <- floor((match(func_name, read_funcs_names) - 1) / 2) + 1 + read_func <- read_funcs[[read_func_index]] + if (any(read_func$names %in% names(args))) { + files <- unlist(args[read_func$names]) + } else { + files <- unlist(args[read_func$positions]) + } + # Only use character arguments i.e. not symbols (variable names) + files <- files[is.character(files)] + if (length(files) > 0) reads <<- unique(c(reads, files)) + } + } + + # If there are child nodes, walk over them too + if (length(node) > 1) { + lapply(node[2:length(node)], ast_walker, depth) + } + } + lapply(ast, ast_walker) + + list( + language = language, + text = text, + imports = imports, + declares = declares, + assigns = assigns, + alters = alters, + uses = uses, + reads = reads + ) +} diff --git a/r/R/nse_funcs.R b/r/R/nse_funcs.R new file mode 100644 index 00000000..46e7856f --- /dev/null +++ b/r/R/nse_funcs.R @@ -0,0 +1,30 @@ +#' Module for defining a list of non-standard evaluation functions, +#' functions that use `substitute()` (or related) on one or more arguments. +#' See http://adv-r.had.co.nz/Computing-on-the-language.html. +#' This list is used when R code is compiled to ignore some +#' variable names when determining the `uses` property +#' of the chunk. + +#' Create a entry for a function that uses NSE +#' +#' @param func Name of the function +#' @param package Name of the package that the function is in +#' @param names Names of parameters that should be ignored +#' @param positions Positions of parameters that should be ignored +nse_func <- function(func, package, names=NULL, positions=NULL) { + list( + func = func, + package = package, + names = names, + positions = positions + ) +} + +#' List of functions that read from files +#' @export +nse_funcs <- list( + # base package + nse_func("base", "subset", c("subset", "select"), c(2, 4)), + # dplyr package + nse_func("dplyr", "filter") +) diff --git a/r/R/read_funcs.R b/r/R/read_funcs.R new file mode 100644 index 00000000..ab76ab81 --- /dev/null +++ b/r/R/read_funcs.R @@ -0,0 +1,54 @@ +#' Module for defining a list of "read" functions, functions +#' that read files from the filesystem. This list is used +#' when R code is compiled to help determine the `reads` propery +#' of the chunk + +#' Create a entry for a function that reads a file +#' +#' Most file reading functions have the file path as their first +#' parameter named `file`. If this is not the case, or if there +#' is more than one parameter that relates to a file that is read +#' by the function, use the `names` and `positions` parameters +#' +#' @param package Name of the package that the function is in +#' @param func Name of the function +#' @param names Names of parameters that are file paths that are read +#' @param positions Positions of parameters that are file paths that are read +read_func <- function(package, func, names="file", positions=1) { + list( + package = package, + func = func, + names = names, + positions = positions + ) +} + +#' List of functions that read from files +#' @export +read_funcs <- list( + # utils package + read_func("utils", "read.table"), + read_func("utils", "read.csv"), + read_func("utils", "read.csv2"), + read_func("utils", "read.delim"), + read_func("utils", "read.delim2"), + read_func("utils", "read.fwf"), + # foreign package + read_func("foreign", "read.arff"), + read_func("foreign", "read.dbf"), + read_func("foreign", "read.dta"), + read_func("foreign", "read.epiinfo"), + read_func("foreign", "read.mtp"), + read_func("foreign", "read.octave"), + read_func("foreign", "read.spss"), + read_func("foreign", "read.ssd"), + read_func("foreign", "read.systat"), + read_func("foreign", "read.xport") +) + +#' List of possible function call names to match +read_funcs_names <- Reduce( + function(prev, curr) c(prev, curr$func, paste0(curr$package, "::", curr$func)), + read_funcs, + character() +) diff --git a/r/R/types.R b/r/R/types.R index 4a739fae..77f5e6a3 100644 --- a/r/R/types.R +++ b/r/R/types.R @@ -2,6 +2,8 @@ # Do not modify it by hand. Instead, modify the source `.schema.yaml` files # in the `schema` directory and run `npm run build:r` to regenerate it. +#' @include typing.R + #' The most basic item, defining the minimum properties required. #' #' @name Entity diff --git a/r/R/util.R b/r/R/util.R index da3976a0..434fb88f 100644 --- a/r/R/util.R +++ b/r/R/util.R @@ -1,25 +1,26 @@ # Utility functions used internally in this package # and not intended to be exported. -#' Map a function across entries in a node +#' Map a function across entries in an object #' -#' This is analagous to `Object.entries(node).map(...)` +#' This is analagous to `Object.entries(object).map(...)` #' in Javascript. It handles bother scalar and vector -#' node types. +#' object types. #' -#' @param node The node to map over -#' @param func The function to apply to each of the node's entries +#' @param object The object to map over +#' @param func The function to apply to each of the object's entries #' @param ... Additional arguments to pass through to the function -map <- function(node, func, ...) { - if (is.list(node)) lapply(node, func, ...) - else func(node, ...) +map <- function(object, func, ...) { + if (is.list(object)) lapply(object, func, ...) + else func(object, ...) } -#' Transform a node by recursively applying a function to it. +#' Create a transformattion of a object by recursively +#' applying a function to it. Could be called `deepMap`. #' -#' @param node The node to map over -#' @param func The function to apply to each node +#' @param object The object to map over +#' @param func The function to apply to each object #' @param ... Additional arguments to pass through to the function -transform <- function(node, func, ...) { - map(node, function(child) map(child, func, ...)) +transform <- function(object, func, ...) { + map(object, function(child) map(child, func, ...)) }