diff --git a/DESCRIPTION b/DESCRIPTION index 5683edc4..8d882b72 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: pkgnet Type: Package Title: Get Network Representation of an R Package -Version: 0.1.0.9000 +Version: 0.2.0.9000 Authors@R: c( person("Brian", "Burns", email = "brian.burns@uptake.com", role = c("aut", "cre")), person("James", "Lamb", email = "james.lamb@uptake.com", role = c("aut")), @@ -25,7 +25,7 @@ Imports: methods, mvbutils, R6, - rmarkdown, + rmarkdown(>= 1.9), tools, visNetwork Suggests: diff --git a/NAMESPACE b/NAMESPACE index 34120adc..92c237a4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -46,6 +46,7 @@ importFrom(mvbutils,foodweb) importFrom(rmarkdown,render) importFrom(tools,file_path_as_absolute) importFrom(tools,package_dependencies) +importFrom(utils,browseURL) importFrom(utils,installed.packages) importFrom(utils,lsf.str) importFrom(visNetwork,visEdges) diff --git a/R/CreatePackageReport.R b/R/CreatePackageReport.R index db24ecdb..14baba6f 100644 --- a/R/CreatePackageReport.R +++ b/R/CreatePackageReport.R @@ -12,12 +12,13 @@ #' report will be produced in working directory. #' @importFrom assertthat assert_that is.string #' @importFrom methods is +#' @importFrom utils browseURL #' @return A list of instantiated pkg_reporters fitted to \code{pkg_name} #' @export CreatePackageReport <- function(pkg_name , pkg_reporters = DefaultReporters() , pkg_path = NULL - , report_path = file.path(getwd(), paste0(pkg_name, "_report.html")) + , report_path = tempfile(pattern = pkg_name, fileext = ".html") ) { # Input checks assertthat::assert_that( @@ -51,6 +52,8 @@ CreatePackageReport <- function(pkg_name , pkg_name = pkg_name ) + utils::browseURL(report_path) + return(invisible(builtReporters)) } diff --git a/inst/package_report/header.html b/inst/package_report/header.html new file mode 100644 index 00000000..132c5da4 --- /dev/null +++ b/inst/package_report/header.html @@ -0,0 +1,4 @@ + + \ No newline at end of file diff --git a/inst/package_report/package_dependency_reporter.Rmd b/inst/package_report/package_dependency_reporter.Rmd index 8a10c505..f36ec9a0 100644 --- a/inst/package_report/package_dependency_reporter.Rmd +++ b/inst/package_report/package_dependency_reporter.Rmd @@ -1,7 +1,3 @@ ---- -title: "Package Dependency Report" -output: html_document ---- ## Dependency Network diff --git a/inst/package_report/package_report.Rmd b/inst/package_report/package_report.Rmd index 360563da..c093156e 100644 --- a/inst/package_report/package_report.Rmd +++ b/inst/package_report/package_report.Rmd @@ -1,30 +1,22 @@ --- -name: "pkgnet" +title: "`r params$pkg_name`" output: html_document: self_contained: TRUE theme: flatly include: + in_header: header.html after_body: footer.html params: reporters: params$reporters pkg_name: params$pkg_name --- - - - ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = FALSE) pkgnet:::silence_logger() ``` -```{r title, results="asis"} -cat(sprintf("# %s", pkg_name)) -``` - ```{r} reportTabs <- lapply(params$reporters, function(reporter) { report_env <- list2env(list(reporter = reporter)) @@ -33,6 +25,7 @@ reportTabs <- lapply(params$reporters, function(reporter) { , envir = report_env ) }) + ``` ```{r results="asis"} diff --git a/inst/package_report/package_summary_reporter.Rmd b/inst/package_report/package_summary_reporter.Rmd index c5cdbc17..10669ca8 100644 --- a/inst/package_report/package_summary_reporter.Rmd +++ b/inst/package_report/package_summary_reporter.Rmd @@ -1,7 +1,3 @@ ---- -title: "Package Summary Report" -output: html_document ---- ## Package Summary diff --git a/man/CreatePackageReport.Rd b/man/CreatePackageReport.Rd index 68eb2ed9..82288d5f 100644 --- a/man/CreatePackageReport.Rd +++ b/man/CreatePackageReport.Rd @@ -5,8 +5,8 @@ \title{Surface the internal and external dependencies of an R package.} \usage{ CreatePackageReport(pkg_name, pkg_reporters = DefaultReporters(), - pkg_path = NULL, report_path = file.path(getwd(), paste0(pkg_name, - "_report.html"))) + pkg_path = NULL, report_path = tempfile(pattern = pkg_name, fileext = + ".html")) } \arguments{ \item{pkg_name}{(string) name of a package} diff --git a/vignettes/pkgnet-intro.Rmd b/vignettes/pkgnet-intro.Rmd index 4d53e40e..e5e308e3 100644 --- a/vignettes/pkgnet-intro.Rmd +++ b/vignettes/pkgnet-intro.Rmd @@ -10,337 +10,5314 @@ vignette: > %\VignetteEncoding{UTF-8} --- -```{r setupVignette, include = FALSE} -knitr::opts_chunk$set( - collapse = FALSE, - comment = "#>", - fig.align = 'center' -) -``` + + - - + + + + + + + + + + + + + +
+ + + +pkgnet
is an R library designed for the analysis of R libraries! The goal of the package is to build a graph representation of a package and its dependencies to inform a variety of activities, including:
Below is a brief tour of pkgnet
and its features.
pkgnet
represents both package dependencies and function dependencies as directed graphs. Before we look at the output of pkgnet
, here are few core concepts to keep in mind.
Functions or Dependencies are represented as nodes, and their dependent relationships are represented as edges (a.k.a. arcs or arrows). The direction of the edge points towards the node that is dependent on the other node.
+In the example dependency graph above:
+The descendants of a node are all subsequent nodes that depend on that node, either directly or via the transitive property.
+In the example dependency graph above:
+pkgnet
can analyze any R package locally installed. Run installed.packages()
to see the full list of packages installed on your system. For this example, let’s say we are analyzing a custom built package, baseballstats
.
To analyze baseballstats
, run the following two lines of code:
That’s it! You have generated a lot of valuable information with that one call for an installed package.
+However, if the full source repository for the package is available on your system, you can supplement this report with other information such as code coverage from covr. To do so, specify the path to the repository in CreatePackageReport
.
library(pkgnet)
+report2 <- CreatePackageReport(
+ pkg_name = "baseballstats"
+ , pkg_path = <path to the repo>
+)
An HTML report has been created with the pertinent information and a list object is available with the same information and more.
+An HTML report has been created, and its location is specified in the messages in the terminal.
+This report has three sections:
+Each section has helpful tables and visuals.
+As a sample, here’s how the Function Network Visualization looks for baseballstats
:
batting_avg
and slugging_avg
functions depend upon the at_bats
function.
+We also see that nothing depends on the on_base_pct
function. This might be valuable information to an R package developer.
It appears the function with the most dependencies, at_bats
, is well covered. However, no other functions are covered by unit tests.
+
Check out the full HTML report for more results
+The CreatePackageReport()
function returns a list with three items:
Each items contains information visible in the report and more. We can use this information for a more detailed analysis of the results and/or more easily incorporate pkgnet
results into other R processes.
Here are a few notable items available within the list object.
+Both the DependencyReporter
and the FunctionReporter
contain metrics about their package dependencies or functions (a.k.a network nodes) in a nodes
table.
#> [1] 5 13
+
+#> [1] "node" "coveredLines" "totalLines"
+#> [4] "coverageRatio" "meanCoveragePerLine" "filename"
+#> [7] "outDegree" "outBetweeness" "outCloseness"
+#> [10] "numDescendants" "hubScore" "pageRank"
+#> [13] "inDegree"
+Note, a few of these metrics provided by default are from the field of Network Theory. You can leverage the Network Object described below to derive many more.
+Both the DependencyReporter
and the FunctionReporter
contain measures based on their network structure in a network_measures
list.
#> $centralization.OutDegree
+#> [1] 0.3
+#>
+#> $centralization.betweenness
+#> [1] 0.03125
+#>
+#> $centralization.closeness
+#> [1] 0.2743056
+Both the DependencyReporter
and the FunctionReporter
are available as igraph objects named pkg_graph
#> IGRAPH 668bfff DN-- 5 4 --
+#> + attr: name (v/c)
+#> + edges from 668bfff (vertex names):
+#> [1] at_bats ->batting_avg at_bats ->slugging_avg
+#> [3] batting_avg ->OPS slugging_avg->OPS
+With the reports and objects produced by pkgnet
by default, there is plenty to inform us on the inner workings of an R package. However, we may want to know MORE! Since the igraph objects are available, we can leverage those graphs for further analysis.
In this section, let’s examine a larger R package, such as lubridate.
+If you would like to follow along with the examples in this section, run these commands in your terminal to download and install lubridate
1.
# Create a temporary workspace
+mkdir -p ~/pkgnet_example && cd ~/pkgnet_example
+
+# Grab the lubridate source code
+git clone https://github.com/tidyverse/lubridate
+cd lubridate
+
+# If you want the examples to match exactly
+git reset --hard 9797d69abe1574dd89310c834e52d358137669b8
+
+# Install it
+Rscript -e "devtools::install()"
Let’s examine the relationship between a function’s total number of descendants and the unit test coverage of that function’s code.
-```{r fakeDetail1, eval=FALSE} -# Run pkgnet -library(pkgnet) -report2 <- CreatePackageReport( - pkg_name = "lubridate" - , pkg_path = "~/pkgnet_example/lubridate" -) - -# Extract Nodes Table -funcNodes <- report2$FunctionReporter$nodes - -# List Coverage For Most Referenced Functions -mostRef <- funcNodes[order(numDescendants, decreasing = TRUE)][1:10] -mostRef[,list(`Function` = node - , `Descendant Count` = numDescendants - , `Coverage Ratio` = coverageRatio - , `Total Lines` = totalLines)] - -``` - -|Function | Descendant Count| Coverage Ratio| Total Lines| -|:-----------------------|----------------:|--------------:|-----------:| -|divide_period_by_period | 39| 1| 2| -|days | 22| 1| 1| -|check_duration | 15| 0| 1| -|as.POSIXt | 13| 0| 1| -|eweeks | 13| 0| 2| -|check_interval | 12| 0| 11| -|date<- | 12| NA| NA| -|add_months | 10| 1| 4| -|ceil_multi_unit | 10| 1| 1| -|am | 6| 1| 1| - -Inspecting results such as these can help an R package developer decide which function to cover with unit tests next. - -In this case, `check_duration`, one of the most referenced functions (either directly or indirectly), is not covered by unit tests. However, it appears to be a simple one line function that may not be necessary to cover in unit testing. `check_interval`, on the other hand, might benefit from some unit test coverage as it is a larger, uncovered function with a similar number of dependencies. - -## Discovering Similar Functions -Looking at that same large package, let's say we wanted to explore options for consolidating functions. One approach might be to explore consolidating functions that share the same dependencies. In that case, we could use the `igraph` object to highlight functions with the same dependencies via [Jaccard similarity](https://en.wikipedia.org/wiki/Jaccard_index). - -```{r fakeDetail2, eval=FALSE} -# Get igraph object -funcGraph <- report2$FunctionReporter$pkg_graph -funcNames <- igraph::vertex_attr(funcGraph, name = "name") - -# Jaccard Similarity -sim <- igraph::similarity(graph = funcGraph - , mode = "in" - , method = "jaccard") -diag(sim) <- 0 -sim[sim < 1] <- 0 - -simGraph <- igraph::graph_from_adjacency_matrix(adjmatrix = sim, mode = "undirected") - -# Find groups with same dependencies (similarity == 1) -sameDeps <- igraph::max_cliques(graph = simGraph - , min = 2 - ) - -# Write results -for (i in seq_along(sameDeps)) { - cat(paste0("Group ", i, ": ")) - cat(paste(funcNames[as.numeric(sameDeps[[i]])], collapse = ", ")) - cat("\n") -} -``` - -```{r resultFromFake, echo=FALSE, results='markup'} -cat("Group 1: stamp_time, stamp_date -Group 2: ms, hm -Group 3: new_interval, %--%, int_diff -Group 4: floor_date, quarter, semester -Group 5: picoseconds, microseconds, nanoseconds, milliseconds -Group 6: weeks, days, years, seconds_to_period, seconds, new_period, minutes, hours -Group 7: yq, dmy, ymd_hms, ymd_hm, ymd_h, ymd, ydm_hms, ydm_hm, ydm_h, ydm, pretty_dates, parse_date_time2, parse_date_time, myd, mdy_hms, mdy_hm, mdy_h, mdy, local_time, fast_strptime, dym, dmy_hms, dmy_hm, dmy_h -" -) -``` - -Now, we have identified seven different groups of functions within [lubridate](http://lubridate.tidyverse.org/) that share the _exact same_ dependencies. We could explore each group of functions for potential consolidation. - - - -```{r removeDemoPackage, include=FALSE} -devtools::uninstall(system.file('baseballstats',package="pkgnet")) -``` +# Run pkgnet
+library(pkgnet)
+report2 <- CreatePackageReport(
+ pkg_name = "lubridate"
+ , pkg_path = "~/pkgnet_example/lubridate"
+)
+
+# Extract Nodes Table
+funcNodes <- report2$FunctionReporter$nodes
+
+# List Coverage For Most Referenced Functions
+mostRef <- funcNodes[order(numDescendants, decreasing = TRUE)][1:10]
+mostRef[,list(`Function` = node
+ , `Descendant Count` = numDescendants
+ , `Coverage Ratio` = coverageRatio
+ , `Total Lines` = totalLines)]
Function | +Descendant Count | +Coverage Ratio | +Total Lines | +
---|---|---|---|
divide_period_by_period | +39 | +1 | +2 | +
days | +22 | +1 | +1 | +
check_duration | +15 | +0 | +1 | +
as.POSIXt | +13 | +0 | +1 | +
eweeks | +13 | +0 | +2 | +
check_interval | +12 | +0 | +11 | +
date<- | +12 | +NA | +NA | +
add_months | +10 | +1 | +4 | +
ceil_multi_unit | +10 | +1 | +1 | +
am | +6 | +1 | +1 | +
Inspecting results such as these can help an R package developer decide which function to cover with unit tests next.
+In this case, check_duration
, one of the most referenced functions (either directly or indirectly), is not covered by unit tests. However, it appears to be a simple one line function that may not be necessary to cover in unit testing. check_interval
, on the other hand, might benefit from some unit test coverage as it is a larger, uncovered function with a similar number of dependencies.
Looking at that same large package, let’s say we wanted to explore options for consolidating functions. One approach might be to explore consolidating functions that share the same dependencies. In that case, we could use the igraph
object to highlight functions with the same dependencies via Jaccard similarity.
# Get igraph object
+funcGraph <- report2$FunctionReporter$pkg_graph
+funcNames <- igraph::vertex_attr(funcGraph, name = "name")
+
+# Jaccard Similarity
+sim <- igraph::similarity(graph = funcGraph
+ , mode = "in"
+ , method = "jaccard")
+diag(sim) <- 0
+sim[sim < 1] <- 0
+
+simGraph <- igraph::graph_from_adjacency_matrix(adjmatrix = sim, mode = "undirected")
+
+# Find groups with same dependencies (similarity == 1)
+sameDeps <- igraph::max_cliques(graph = simGraph
+ , min = 2
+ )
+
+# Write results
+for (i in seq_along(sameDeps)) {
+ cat(paste0("Group ", i, ": "))
+ cat(paste(funcNames[as.numeric(sameDeps[[i]])], collapse = ", "))
+ cat("\n")
+}
#> Group 1: stamp_time, stamp_date
+#> Group 2: ms, hm
+#> Group 3: new_interval, %--%, int_diff
+#> Group 4: floor_date, quarter, semester
+#> Group 5: picoseconds, microseconds, nanoseconds, milliseconds
+#> Group 6: weeks, days, years, seconds_to_period, seconds, new_period, minutes, hours
+#> Group 7: yq, dmy, ymd_hms, ymd_hm, ymd_h, ymd, ydm_hms, ydm_hm, ydm_h, ydm, pretty_dates, parse_date_time2, parse_date_time, myd, mdy_hms, mdy_hm, mdy_h, mdy, local_time, fast_strptime, dym, dmy_hms, dmy_hm, dmy_h
+Now, we have identified seven different groups of functions within lubridate that share the exact same dependencies. We could explore each group of functions for potential consolidation.
+Examples from version 1.7.3 of Lubridate↩