diff --git a/DESCRIPTION b/DESCRIPTION index 5683edc4..8d882b72 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: pkgnet Type: Package Title: Get Network Representation of an R Package -Version: 0.1.0.9000 +Version: 0.2.0.9000 Authors@R: c( person("Brian", "Burns", email = "brian.burns@uptake.com", role = c("aut", "cre")), person("James", "Lamb", email = "james.lamb@uptake.com", role = c("aut")), @@ -25,7 +25,7 @@ Imports: methods, mvbutils, R6, - rmarkdown, + rmarkdown(>= 1.9), tools, visNetwork Suggests: diff --git a/NAMESPACE b/NAMESPACE index 34120adc..92c237a4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -46,6 +46,7 @@ importFrom(mvbutils,foodweb) importFrom(rmarkdown,render) importFrom(tools,file_path_as_absolute) importFrom(tools,package_dependencies) +importFrom(utils,browseURL) importFrom(utils,installed.packages) importFrom(utils,lsf.str) importFrom(visNetwork,visEdges) diff --git a/R/CreatePackageReport.R b/R/CreatePackageReport.R index db24ecdb..14baba6f 100644 --- a/R/CreatePackageReport.R +++ b/R/CreatePackageReport.R @@ -12,12 +12,13 @@ #' report will be produced in working directory. #' @importFrom assertthat assert_that is.string #' @importFrom methods is +#' @importFrom utils browseURL #' @return A list of instantiated pkg_reporters fitted to \code{pkg_name} #' @export CreatePackageReport <- function(pkg_name , pkg_reporters = DefaultReporters() , pkg_path = NULL - , report_path = file.path(getwd(), paste0(pkg_name, "_report.html")) + , report_path = tempfile(pattern = pkg_name, fileext = ".html") ) { # Input checks assertthat::assert_that( @@ -51,6 +52,8 @@ CreatePackageReport <- function(pkg_name , pkg_name = pkg_name ) + utils::browseURL(report_path) + return(invisible(builtReporters)) } diff --git a/inst/package_report/header.html b/inst/package_report/header.html new file mode 100644 index 00000000..132c5da4 --- /dev/null +++ b/inst/package_report/header.html @@ -0,0 +1,4 @@ + + \ No newline at end of file diff --git a/inst/package_report/package_dependency_reporter.Rmd b/inst/package_report/package_dependency_reporter.Rmd index 8a10c505..f36ec9a0 100644 --- a/inst/package_report/package_dependency_reporter.Rmd +++ b/inst/package_report/package_dependency_reporter.Rmd @@ -1,7 +1,3 @@ ---- -title: "Package Dependency Report" -output: html_document ---- ## Dependency Network diff --git a/inst/package_report/package_report.Rmd b/inst/package_report/package_report.Rmd index 360563da..c093156e 100644 --- a/inst/package_report/package_report.Rmd +++ b/inst/package_report/package_report.Rmd @@ -1,30 +1,22 @@ --- -name: "pkgnet" +title: "`r params$pkg_name`" output: html_document: self_contained: TRUE theme: flatly include: + in_header: header.html after_body: footer.html params: reporters: params$reporters pkg_name: params$pkg_name --- - - - ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = FALSE) pkgnet:::silence_logger() ``` -```{r title, results="asis"} -cat(sprintf("# %s", pkg_name)) -``` - ```{r} reportTabs <- lapply(params$reporters, function(reporter) { report_env <- list2env(list(reporter = reporter)) @@ -33,6 +25,7 @@ reportTabs <- lapply(params$reporters, function(reporter) { , envir = report_env ) }) + ``` ```{r results="asis"} diff --git a/inst/package_report/package_summary_reporter.Rmd b/inst/package_report/package_summary_reporter.Rmd index c5cdbc17..10669ca8 100644 --- a/inst/package_report/package_summary_reporter.Rmd +++ b/inst/package_report/package_summary_reporter.Rmd @@ -1,7 +1,3 @@ ---- -title: "Package Summary Report" -output: html_document ---- ## Package Summary diff --git a/man/CreatePackageReport.Rd b/man/CreatePackageReport.Rd index 68eb2ed9..82288d5f 100644 --- a/man/CreatePackageReport.Rd +++ b/man/CreatePackageReport.Rd @@ -5,8 +5,8 @@ \title{Surface the internal and external dependencies of an R package.} \usage{ CreatePackageReport(pkg_name, pkg_reporters = DefaultReporters(), - pkg_path = NULL, report_path = file.path(getwd(), paste0(pkg_name, - "_report.html"))) + pkg_path = NULL, report_path = tempfile(pattern = pkg_name, fileext = + ".html")) } \arguments{ \item{pkg_name}{(string) name of a package} diff --git a/vignettes/pkgnet-intro.Rmd b/vignettes/pkgnet-intro.Rmd index 4d53e40e..e5e308e3 100644 --- a/vignettes/pkgnet-intro.Rmd +++ b/vignettes/pkgnet-intro.Rmd @@ -10,337 +10,5314 @@ vignette: > %\VignetteEncoding{UTF-8} --- -```{r setupVignette, include = FALSE} -knitr::opts_chunk$set( - collapse = FALSE, - comment = "#>", - fig.align = 'center' -) -``` + + - - + + + + + + + + + + + + + + + + + +
+ +
+ + + + +

pkgnet is an R library designed for the analysis of R libraries! The goal of the package is to build a graph representation of a package and its dependencies to inform a variety of activities, including:

+ +

Below is a brief tour of pkgnet and its features.

+
+
+

Dependencies As a Graph

+

pkgnet represents both package dependencies and function dependencies as directed graphs. Before we look at the output of pkgnet, here are few core concepts to keep in mind.

+
+
+ +

+Example Dependency Graph +

+
+
+

Dependency

+

Functions or Dependencies are represented as nodes, and their dependent relationships are represented as edges (a.k.a. arcs or arrows). The direction of the edge points towards the node that is dependent on the other node.

+

In the example dependency graph above:

+ +
+
+

Descendants

+

The descendants of a node are all subsequent nodes that depend on that node, either directly or via the transitive property.

+

In the example dependency graph above:

+ +
+
+
+
+

Running pkgnet

+

pkgnet can analyze any R package locally installed. Run installed.packages() to see the full list of packages installed on your system. For this example, let’s say we are analyzing a custom built package, baseballstats.

+

To analyze baseballstats, run the following two lines of code:

+
library(pkgnet)
+report1 <- CreatePackageReport(pkg_name = "baseballstats")
+

That’s it! You have generated a lot of valuable information with that one call for an installed package.

+

However, if the full source repository for the package is available on your system, you can supplement this report with other information such as code coverage from covr. To do so, specify the path to the repository in CreatePackageReport.

+
library(pkgnet)
+report2 <- CreatePackageReport(
+  pkg_name = "baseballstats"
+  , pkg_path = <path to the repo>
+)
+
+
+
+

Examining The Results

+

An HTML report has been created with the pertinent information and a list object is available with the same information and more.

+
+

The Report

+

An HTML report has been created, and its location is specified in the messages in the terminal.

+

This report has three sections:

+
    +
  1. Package Summary – general information about the package and package level statistics
    +
  2. +
  3. Dependency Network – information regarding the packages upon which the current package under analysis relies upon
    +
  4. +
  5. Function Network – information regarding the functions within the current package under analysis and their inter dependencies
  6. +
+

Each section has helpful tables and visuals.

+

As a sample, here’s how the Function Network Visualization looks for baseballstats:

+ +
+

+
+

Default

+

+ All Functions and their dependencies are visible. For example, we can see that both batting_avg and slugging_avg functions depend upon the at_bats function.

+

We also see that nothing depends on the on_base_pct function. This might be valuable information to an R package developer.

+
+

+
+

With Coverage Information

+

+ Same as the default visualization except we can see coverage information as well (Red = 0%, Green = 100%).

+

It appears the function with the most dependencies, at_bats, is well covered. However, no other functions are covered by unit tests.

+
+

+
+

+

Check out the full HTML report for more results

+
+
+

The List Object

+

The CreatePackageReport() function returns a list with three items:

+
    +
  1. SummaryReporter
    +
  2. +
  3. DependencyReporter
    +
  4. +
  5. FunctionReporter
  6. +
+

Each items contains information visible in the report and more. We can use this information for a more detailed analysis of the results and/or more easily incorporate pkgnet results into other R processes.

+

Here are a few notable items available within the list object.

+
+

Node Information

+

Both the DependencyReporter and the FunctionReporter contain metrics about their package dependencies or functions (a.k.a network nodes) in a nodes table.

+ +
#> [1]  5 13
+ +
#>  [1] "node"                "coveredLines"        "totalLines"         
+#>  [4] "coverageRatio"       "meanCoveragePerLine" "filename"           
+#>  [7] "outDegree"           "outBetweeness"       "outCloseness"       
+#> [10] "numDescendants"      "hubScore"            "pageRank"           
+#> [13] "inDegree"
+

Note, a few of these metrics provided by default are from the field of Network Theory. You can leverage the Network Object described below to derive many more.

+
+
+

Network Measures

+

Both the DependencyReporter and the FunctionReporter contain measures based on their network structure in a network_measures list.

+ +
#> $centralization.OutDegree
+#> [1] 0.3
+#> 
+#> $centralization.betweenness
+#> [1] 0.03125
+#> 
+#> $centralization.closeness
+#> [1] 0.2743056
+
+
+

Network Object

+

Both the DependencyReporter and the FunctionReporter are available as igraph objects named pkg_graph

+ +
#> IGRAPH 668bfff DN-- 5 4 -- 
+#> + attr: name (v/c)
+#> + edges from 668bfff (vertex names):
+#> [1] at_bats     ->batting_avg  at_bats     ->slugging_avg
+#> [3] batting_avg ->OPS          slugging_avg->OPS
+
+
+
+
+
+

A Deeper Look

+

With the reports and objects produced by pkgnet by default, there is plenty to inform us on the inner workings of an R package. However, we may want to know MORE! Since the igraph objects are available, we can leverage those graphs for further analysis.

+

In this section, let’s examine a larger R package, such as lubridate.

+

If you would like to follow along with the examples in this section, run these commands in your terminal to download and install lubridate1.

+
# Create a temporary workspace
+mkdir -p ~/pkgnet_example && cd ~/pkgnet_example
+
+# Grab the lubridate source code
+git clone https://github.com/tidyverse/lubridate
+cd lubridate
+
+# If you want the examples to match exactly
+git reset --hard 9797d69abe1574dd89310c834e52d358137669b8
+
+# Install it
+Rscript -e "devtools::install()"
+
+

Coverage of Most Referenced Functions

+

Let’s examine the relationship between a function’s total number of descendants and the unit test coverage of that function’s code.

-```{r fakeDetail1, eval=FALSE} -# Run pkgnet -library(pkgnet) -report2 <- CreatePackageReport( - pkg_name = "lubridate" - , pkg_path = "~/pkgnet_example/lubridate" -) - -# Extract Nodes Table -funcNodes <- report2$FunctionReporter$nodes - -# List Coverage For Most Referenced Functions -mostRef <- funcNodes[order(numDescendants, decreasing = TRUE)][1:10] -mostRef[,list(`Function` = node - , `Descendant Count` = numDescendants - , `Coverage Ratio` = coverageRatio - , `Total Lines` = totalLines)] - -``` - -|Function | Descendant Count| Coverage Ratio| Total Lines| -|:-----------------------|----------------:|--------------:|-----------:| -|divide_period_by_period | 39| 1| 2| -|days | 22| 1| 1| -|check_duration | 15| 0| 1| -|as.POSIXt | 13| 0| 1| -|eweeks | 13| 0| 2| -|check_interval | 12| 0| 11| -|date<- | 12| NA| NA| -|add_months | 10| 1| 4| -|ceil_multi_unit | 10| 1| 1| -|am | 6| 1| 1| - -Inspecting results such as these can help an R package developer decide which function to cover with unit tests next. - -In this case, `check_duration`, one of the most referenced functions (either directly or indirectly), is not covered by unit tests. However, it appears to be a simple one line function that may not be necessary to cover in unit testing. `check_interval`, on the other hand, might benefit from some unit test coverage as it is a larger, uncovered function with a similar number of dependencies. - -## Discovering Similar Functions -Looking at that same large package, let's say we wanted to explore options for consolidating functions. One approach might be to explore consolidating functions that share the same dependencies. In that case, we could use the `igraph` object to highlight functions with the same dependencies via [Jaccard similarity](https://en.wikipedia.org/wiki/Jaccard_index). - -```{r fakeDetail2, eval=FALSE} -# Get igraph object -funcGraph <- report2$FunctionReporter$pkg_graph -funcNames <- igraph::vertex_attr(funcGraph, name = "name") - -# Jaccard Similarity -sim <- igraph::similarity(graph = funcGraph - , mode = "in" - , method = "jaccard") -diag(sim) <- 0 -sim[sim < 1] <- 0 - -simGraph <- igraph::graph_from_adjacency_matrix(adjmatrix = sim, mode = "undirected") - -# Find groups with same dependencies (similarity == 1) -sameDeps <- igraph::max_cliques(graph = simGraph - , min = 2 - ) - -# Write results -for (i in seq_along(sameDeps)) { - cat(paste0("Group ", i, ": ")) - cat(paste(funcNames[as.numeric(sameDeps[[i]])], collapse = ", ")) - cat("\n") -} -``` - -```{r resultFromFake, echo=FALSE, results='markup'} -cat("Group 1: stamp_time, stamp_date -Group 2: ms, hm -Group 3: new_interval, %--%, int_diff -Group 4: floor_date, quarter, semester -Group 5: picoseconds, microseconds, nanoseconds, milliseconds -Group 6: weeks, days, years, seconds_to_period, seconds, new_period, minutes, hours -Group 7: yq, dmy, ymd_hms, ymd_hm, ymd_h, ymd, ydm_hms, ydm_hm, ydm_h, ydm, pretty_dates, parse_date_time2, parse_date_time, myd, mdy_hms, mdy_hm, mdy_h, mdy, local_time, fast_strptime, dym, dmy_hms, dmy_hm, dmy_h -" -) -``` - -Now, we have identified seven different groups of functions within [lubridate](http://lubridate.tidyverse.org/) that share the _exact same_ dependencies. We could explore each group of functions for potential consolidation. - - - -```{r removeDemoPackage, include=FALSE} -devtools::uninstall(system.file('baseballstats',package="pkgnet")) -``` + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FunctionDescendant CountCoverage RatioTotal Lines
divide_period_by_period3912
days2211
check_duration1501
as.POSIXt1301
eweeks1302
check_interval12011
date<-12NANA
add_months1014
ceil_multi_unit1011
am611
+

Inspecting results such as these can help an R package developer decide which function to cover with unit tests next.

+

In this case, check_duration, one of the most referenced functions (either directly or indirectly), is not covered by unit tests. However, it appears to be a simple one line function that may not be necessary to cover in unit testing. check_interval, on the other hand, might benefit from some unit test coverage as it is a larger, uncovered function with a similar number of dependencies.

+
+
+

Discovering Similar Functions

+

Looking at that same large package, let’s say we wanted to explore options for consolidating functions. One approach might be to explore consolidating functions that share the same dependencies. In that case, we could use the igraph object to highlight functions with the same dependencies via Jaccard similarity.

+ +
#> Group 1: stamp_time, stamp_date
+#> Group 2: ms, hm
+#> Group 3: new_interval, %--%, int_diff
+#> Group 4: floor_date, quarter, semester
+#> Group 5: picoseconds, microseconds, nanoseconds, milliseconds
+#> Group 6: weeks, days, years, seconds_to_period, seconds, new_period, minutes, hours
+#> Group 7: yq, dmy, ymd_hms, ymd_hm, ymd_h, ymd, ydm_hms, ydm_hm, ydm_h, ydm, pretty_dates, parse_date_time2, parse_date_time, myd, mdy_hms, mdy_hm, mdy_h, mdy, local_time, fast_strptime, dym, dmy_hms, dmy_hm, dmy_h
+

Now, we have identified seven different groups of functions within lubridate that share the exact same dependencies. We could explore each group of functions for potential consolidation.

+
+
+
+
+
    +
  1. Examples from version 1.7.3 of Lubridate

  2. +
+
+ + + + + + + + diff --git a/vignettes_source/pkgnet-intro.Rmd b/vignettes_source/pkgnet-intro.Rmd new file mode 100644 index 00000000..4d53e40e --- /dev/null +++ b/vignettes_source/pkgnet-intro.Rmd @@ -0,0 +1,346 @@ +--- +title: "Exploring The Structure and Dependencies of An R Package" +output: + rmarkdown::html_vignette: + toc: true + fig_caption: yes +vignette: > + %\VignetteIndexEntry{Exploring The Structure and Dependencies of An R Package} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r setupVignette, include = FALSE} +knitr::opts_chunk$set( + collapse = FALSE, + comment = "#>", + fig.align = 'center' +) +``` + + + + + + + + +`pkgnet` is an R library designed for the analysis of R libraries! The goal of the package is to build a graph representation of a package and its dependencies to inform a variety of activities, including: + +- prioritizing functions to unit test based on their centrality +- examining the recursive dependencies you are taking on by using a given package +- exploring the structure of a new package provided by a coworker or downloaded from the internet + +Below is a brief tour of `pkgnet` and its features. + +*** +# Dependencies As a Graph +`pkgnet` represents both package dependencies and function dependencies as directed graphs. Before we look at the output of `pkgnet`, here are few core concepts to keep in mind. + +```{r whatIsDepGraph, echo=FALSE, message=FALSE, results='markup', fig.height=3, fig.width=3, fig.cap = "Example Dependency Graph"} +nodes <- data.frame( + id = 1:4 + , label = LETTERS[1:4] + , x = c(1,3,2,4) + , y = c(1,1,2,3) + , level = c(1,1,2,3) + ) + +edges <- data.frame( + from = c(1,2,3,2) + , to = c(3,3,4,4) + ) + +g <- visNetwork::visNetwork(nodes = nodes + , edges = edges + , width = "100%" + ) +g <- visNetwork::visNodes(graph = g + , shape = "circle" + , font = list(size = 25 + , bold = TRUE + , align = 'center' + ) + ) +g <- visNetwork::visEdges(graph = g + , arrows = "to" + ) +g <- visNetwork::visHierarchicalLayout(graph = g + , direction = "DU" + ) +g <- visNetwork::visInteraction(graph = g + , dragNodes = TRUE + , dragView = TRUE + , zoomView = FALSE) +g +``` + +## Dependency +Functions or Dependencies are represented as nodes, and their dependent relationships are represented as edges (a.k.a. arcs or arrows). The direction of the edge points towards the node that is dependent on the other node. + +In the example dependency graph above: + +* **C** is dependent upon both **A** and **C**. +* **D** is dependent upon both **C** and **B**. +* **D** is indirectly dependent upon **A** through **C** via the transitive property. + +## Descendants +The descendants of a node are all subsequent nodes that depend on that node, either directly or via the transitive property. + +In the example dependency graph above: + +* **C** and **D** are descendants of **A**. +* **B** is **NOT** a descendant of **A**. + +*** +# Running pkgnet + +`pkgnet` can analyze any R package locally installed. Run `installed.packages()` to see the full list of packages installed on your system. For this example, let's say we are analyzing a custom built package, `baseballstats`. + +```{r makeDemoPackage, include=FALSE} +devtools::install_local(system.file('baseballstats',package="pkgnet"),force=TRUE) +``` + +To analyze `baseballstats`, run the following two lines of code: + +```{r pkgnetRunFirst, message=FALSE, warning=FALSE, results='hide'} +library(pkgnet) +report1 <- CreatePackageReport(pkg_name = "baseballstats") +``` + +That's it! You have generated a lot of valuable information with that one call for an installed package. + +However, if the full source repository for the package is available on your system, you can supplement this report with other information such as code coverage from [covr](https://CRAN.R-project.org/package=covr). To do so, specify the path to the repository in `CreatePackageReport`. + + +```{r pkgnetRunFAKE, eval=FALSE} +library(pkgnet) +report2 <- CreatePackageReport( + pkg_name = "baseballstats" + , pkg_path = +) +``` + +```{r pkgnetRunSecond, message=FALSE, warning=FALSE, results='hide', echo=FALSE} +library(pkgnet) +report2 <- CreatePackageReport( + pkg_name = "baseballstats" + , pkg_path = system.file('baseballstats',package="pkgnet") +) +``` + +*** +# Examining The Results + +An HTML report has been created with the pertinent information and a list object is available with the same information and more. + +## The Report + +An HTML report has been created, and its location is specified in the messages in the terminal. + +This report has three sections: + +1. **Package Summary** -- general information about the package and package level statistics +2. **Dependency Network** -- information regarding the packages upon which the current package under analysis relies upon +3. **Function Network** -- information regarding the functions within the current package under analysis and their inter dependencies + +Each section has helpful tables and visuals. + +As a sample, here's how the **Function Network Visualization** looks for `baseballstats`: + + +
+ +
+#### Default +```{r demoVis1, echo=FALSE, fig.width=3, fig.height=3} +report1$FunctionReporter$graph_viz +``` +All Functions and their dependencies are visible. For example, we can see that both `batting_avg` and `slugging_avg` functions depend upon the `at_bats` function. + +We also see that nothing depends on the `on_base_pct` function. This might be valuable information to an R package developer. +
+ +
+#### With Coverage Information +```{r demoVis2, echo=FALSE, , fig.width=3, fig.height=3} +report2$FunctionReporter$graph_viz +``` +Same as the default visualization except we can see coverage information as well (Red = 0%, Green = 100%). + +It appears the function with the most dependencies, `at_bats`, is well covered. However, no other functions are covered by unit tests. +
+ +
+ + +**Check out the full HTML report for more results** + +## The List Object + +The `CreatePackageReport()` function returns a list with three items: + +1. SummaryReporter +2. DependencyReporter +3. FunctionReporter + +Each items contains information visible in the report *and more*. We can use this information for a more detailed analysis of the results and/or more easily incorporate `pkgnet` results into other R processes. + +Here are a few notable items available within the list object. + +### Node Information +Both the `DependencyReporter` and the `FunctionReporter` contain metrics about their package dependencies or functions (a.k.a network nodes) in a `nodes` table. + +```{r nodes} +dim(report2$FunctionReporter$nodes) +names(report2$FunctionReporter$nodes) +``` + +Note, a few of these metrics provided by default are from the field of [Network Theory](https://en.wikipedia.org/wiki/Network_theory). You can leverage the **Network Object** described below to derive many more. + +### Network Measures +Both the `DependencyReporter` and the `FunctionReporter` contain measures based on their network structure in a `network_measures` list. + +```{r networkMeasures} +report2$FunctionReporter$network_measures +``` + +### Network Object +Both the `DependencyReporter` and the `FunctionReporter` are available as [igraph](http://igraph.org/r/) objects named `pkg_graph` + +```{r networkObj} +report2$FunctionReporter$pkg_graph +``` + +*** +# A Deeper Look +With the reports and objects produced by `pkgnet` by default, there is plenty to inform us on the inner workings of an R package. However, we may want to know MORE! Since the [igraph](http://igraph.org/r/) objects are available, we can leverage those graphs for further analysis. + +In this section, let's examine a larger R package, such as [lubridate](http://lubridate.tidyverse.org/). + +If you would like to follow along with the examples in this section, run these commands in your terminal to download and install `lubridate`^[Examples from version 1.7.3 of Lubridate]. +```{r bashText, engine='bash', eval=FALSE} +# Create a temporary workspace +mkdir -p ~/pkgnet_example && cd ~/pkgnet_example + +# Grab the lubridate source code +git clone https://github.com/tidyverse/lubridate +cd lubridate + +# If you want the examples to match exactly +git reset --hard 9797d69abe1574dd89310c834e52d358137669b8 + +# Install it +Rscript -e "devtools::install()" +``` + +## Coverage of Most Referenced Functions +Let's examine the relationship between a function's total number of descendants and the unit test coverage of that function's code. + + +```{r fakeDetail1, eval=FALSE} +# Run pkgnet +library(pkgnet) +report2 <- CreatePackageReport( + pkg_name = "lubridate" + , pkg_path = "~/pkgnet_example/lubridate" +) + +# Extract Nodes Table +funcNodes <- report2$FunctionReporter$nodes + +# List Coverage For Most Referenced Functions +mostRef <- funcNodes[order(numDescendants, decreasing = TRUE)][1:10] +mostRef[,list(`Function` = node + , `Descendant Count` = numDescendants + , `Coverage Ratio` = coverageRatio + , `Total Lines` = totalLines)] + +``` + +|Function | Descendant Count| Coverage Ratio| Total Lines| +|:-----------------------|----------------:|--------------:|-----------:| +|divide_period_by_period | 39| 1| 2| +|days | 22| 1| 1| +|check_duration | 15| 0| 1| +|as.POSIXt | 13| 0| 1| +|eweeks | 13| 0| 2| +|check_interval | 12| 0| 11| +|date<- | 12| NA| NA| +|add_months | 10| 1| 4| +|ceil_multi_unit | 10| 1| 1| +|am | 6| 1| 1| + +Inspecting results such as these can help an R package developer decide which function to cover with unit tests next. + +In this case, `check_duration`, one of the most referenced functions (either directly or indirectly), is not covered by unit tests. However, it appears to be a simple one line function that may not be necessary to cover in unit testing. `check_interval`, on the other hand, might benefit from some unit test coverage as it is a larger, uncovered function with a similar number of dependencies. + +## Discovering Similar Functions +Looking at that same large package, let's say we wanted to explore options for consolidating functions. One approach might be to explore consolidating functions that share the same dependencies. In that case, we could use the `igraph` object to highlight functions with the same dependencies via [Jaccard similarity](https://en.wikipedia.org/wiki/Jaccard_index). + +```{r fakeDetail2, eval=FALSE} +# Get igraph object +funcGraph <- report2$FunctionReporter$pkg_graph +funcNames <- igraph::vertex_attr(funcGraph, name = "name") + +# Jaccard Similarity +sim <- igraph::similarity(graph = funcGraph + , mode = "in" + , method = "jaccard") +diag(sim) <- 0 +sim[sim < 1] <- 0 + +simGraph <- igraph::graph_from_adjacency_matrix(adjmatrix = sim, mode = "undirected") + +# Find groups with same dependencies (similarity == 1) +sameDeps <- igraph::max_cliques(graph = simGraph + , min = 2 + ) + +# Write results +for (i in seq_along(sameDeps)) { + cat(paste0("Group ", i, ": ")) + cat(paste(funcNames[as.numeric(sameDeps[[i]])], collapse = ", ")) + cat("\n") +} +``` + +```{r resultFromFake, echo=FALSE, results='markup'} +cat("Group 1: stamp_time, stamp_date +Group 2: ms, hm +Group 3: new_interval, %--%, int_diff +Group 4: floor_date, quarter, semester +Group 5: picoseconds, microseconds, nanoseconds, milliseconds +Group 6: weeks, days, years, seconds_to_period, seconds, new_period, minutes, hours +Group 7: yq, dmy, ymd_hms, ymd_hm, ymd_h, ymd, ydm_hms, ydm_hm, ydm_h, ydm, pretty_dates, parse_date_time2, parse_date_time, myd, mdy_hms, mdy_hm, mdy_h, mdy, local_time, fast_strptime, dym, dmy_hms, dmy_hm, dmy_h +" +) +``` + +Now, we have identified seven different groups of functions within [lubridate](http://lubridate.tidyverse.org/) that share the _exact same_ dependencies. We could explore each group of functions for potential consolidation. + + + +```{r removeDemoPackage, include=FALSE} +devtools::uninstall(system.file('baseballstats',package="pkgnet")) +``` +