Skip to content

Commit

Permalink
Fixes #383 histogram can plot bars as frequency (#384)
Browse files Browse the repository at this point in the history
* Fixes #383 histogram can plot bars as frequency

* Update documentation about default values of histogram
  • Loading branch information
pchelle authored Oct 6, 2022
1 parent 7b4a939 commit afad758
Show file tree
Hide file tree
Showing 6 changed files with 77 additions and 40 deletions.
8 changes: 7 additions & 1 deletion R/histogram-datamapping.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ HistogramDataMapping <- R6::R6Class(
"HistogramDataMapping",
inherit = XYGDataMapping,
public = list(
#' @field frequency logical defining if histogram displays a frequency in y axis
frequency = NULL,
#' @field stack logical defining if histogram bars should be stacked
stack = NULL,
#' @field bins number of bins or binning values/methods passed on `ggplot2::geom_histogram`
Expand All @@ -17,22 +19,26 @@ HistogramDataMapping <- R6::R6Class(
distribution = NULL,

#' @description Create a new `HistogramDataMapping` object
#' @param frequency logical defining if histogram displays a frequency in y axis
#' @param stack logical defining if histogram bars should be stacked
#' @param bins argument passed on `ggplot2::geom_histogram`
#' @param binwidth width of bins passed on `ggplot2::geom_histogram`. Overwrites `bins`
#' @param distribution Name of distribution to fit to the data.
#' Only 2 distributions are currently available: `"normal"` and `"logNormal"`
#' @param ... parameters inherited from `XYGDataMapping`
#' @return A new `HistogramDataMapping` object
initialize = function(stack = FALSE,
initialize = function(frequency = FALSE,
stack = FALSE,
bins = NULL,
binwidth = NULL,
distribution = NULL,
...) {
super$initialize(...)
validateIsLogical(frequency)
validateIsLogical(stack)
validateIsIncluded(distribution, c("none", "normal", "logNormal"), nullAllowed = TRUE)

self$frequency <- frequency
self$stack <- stack
self$bins <- bins %||% tlfEnv$defaultAggregation$bins
self$binwidth <- binwidth
Expand Down
44 changes: 42 additions & 2 deletions R/plot-histogram.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,21 @@
#' Producing Histograms
#'
#' @inheritParams addScatter
#' @param frequency logical defining if histogram displays a frequency in y axis
#' @param bins Number or edges of bins.
#' If `bins` is provided as a single numeric values, `bin` corresponds to number of bins.
#' The bin edges are then equally spaced within the range of data.
#' If `bins` is provided as an array of numeric values, `bin` corresponds to their edges.
#' Default value, `bins=NULL`, uses the value defined by `dataMapping`
#' @param binwidth Numerical value of defining the width of each bin.
#' If defined, `binwidth` can overwrite `bins` if `bins` was not provided or simply provided as a single value.
#' Default value, `binwidth=NULL`, uses the value defined by `dataMapping`
#' @param stack Logical defining for multiple histograms if their bars are stacked
#' Default value, `stack=NULL`, uses the value defined by `dataMapping`
#' @param distribution Name of distribution to fit to the data.
#' Only 2 distributions are currently available: `"normal"` and `"logNormal"`
#' Use `distribution="none"` to prevent fit of distribution
#' Default value, `distribution=NULL`, uses the value defined by `dataMapping`
#' @param dataMapping
#' A `HistogramDataMapping` object mapping `x` and aesthetic groups to their variable names of `data`.
#' @param plotConfiguration
Expand All @@ -27,6 +33,9 @@
#' # Produce histogram of normally distributed data
#' plotHistogram(x = rnorm(100))
#'
#' # Produce histogram of normally distributed data normalized in y axis
#' plotHistogram(x = rnorm(100), frequency = TRUE)
#'
#' # Produce histogram of normally distributed data with many bins
#' plotHistogram(x = rlnorm(100), bins = 21)
#'
Expand All @@ -37,6 +46,7 @@ plotHistogram <- function(data = NULL,
metaData = NULL,
x = NULL,
dataMapping = NULL,
frequency = NULL,
bins = NULL,
binwidth = NULL,
stack = NULL,
Expand All @@ -61,16 +71,26 @@ plotHistogram <- function(data = NULL,
validateIsNumeric(binwidth, nullAllowed = TRUE)
validateIsLogical(stack, nullAllowed = TRUE)
validateIsIncluded(distribution, c("normal", "logNormal", "none"), nullAllowed = TRUE)
validateIsLogical(frequency, nullAllowed = TRUE)

dataMapping$frequency <- frequency %||% dataMapping$frequency
dataMapping$stack <- stack %||% dataMapping$stack
dataMapping$distribution <- distribution %||% dataMapping$distribution
dataMapping$bins <- bins %||% dataMapping$bins
dataMapping$binwidth <- binwidth %||% dataMapping$binwidth

# Check for default labeling to update plotConfiguration after using .setPlotConfiguration
ylabel <- NULL
if (isEmpty(plotConfiguration)) {
ylabel <- ifelse(dataMapping$frequency, "Relative frequency", "Count")
}

plotConfiguration <- .setPlotConfiguration(
plotConfiguration, HistogramPlotConfiguration,
data, metaData, dataMapping
)
# Update default ylabel based on frequency
plotConfiguration$labels$ylabel <- ylabel %||% plotConfiguration$labels$ylabel
plotObject <- .setPlotObject(plotObject, plotConfiguration)

mapData <- dataMapping$checkMapData(data)
Expand All @@ -89,6 +109,23 @@ plotHistogram <- function(data = NULL,
if (length(dataMapping$bins) > 1) {
edges <- dataMapping$bins
}
# Manage ggplot aes_string property depending on stack and frequency options
# geom_histogram can use computed variables defined between two dots
# see https://ggplot2.tidyverse.org/reference/geom_histogram.html for more info
yAes <- "..count.."

if (dataMapping$frequency) {
# If histogram bars are not stacked, calculate frequency within each data groups
# Since there is no direct computed variable
# ncount variable is scaled by binwidth*dnorm(0) to get an area of ~1
yAes <- paste0("..ncount..*max(..width..)*", stats::dnorm(0))
if (dataMapping$stack) {
# If histogram bars are stacked,
# Calculate overall frequency as count per bin / total
# This results in same histogram shapes no matter the data groups
yAes <- "..count../sum(..count..)"
}
}

aestheticValues <- .getAestheticValuesFromConfiguration(
n = 1,
Expand All @@ -101,6 +138,7 @@ plotHistogram <- function(data = NULL,
data = mapData,
mapping = ggplot2::aes_string(
x = mapLabels$x,
y = yAes,
fill = mapLabels$fill
),
position = position,
Expand Down Expand Up @@ -202,9 +240,10 @@ plotHistogram <- function(data = NULL,
binwidth <- dataMapping$binwidth %||% binwidth

if (dataMapping$stack) {
yScaling <- binwidth * ifelse(dataMapping$frequency, 1, length(x))
dataFit <- data.frame(
x = xFit,
y = length(x) * binwidth * switch(dataMapping$distribution,
y = yScaling * switch(dataMapping$distribution,
"normal" = stats::dnorm(xFit, mean = mean(x, na.rm = TRUE), sd = stats::sd(x, na.rm = TRUE)),
"logNormal" = stats::dlnorm(xFit, meanlog = mean(log(x), na.rm = TRUE), sdlog = stats::sd(log(x), na.rm = TRUE))
),
Expand All @@ -216,11 +255,12 @@ plotHistogram <- function(data = NULL,
dataFit <- NULL
for (groupLevel in levels(data$legendLabels)) {
selectedGroup <- data$legendLabels %in% groupLevel
yScaling <- binwidth * ifelse(dataMapping$frequency, 1, length(x[selectedGroup]))
dataFit <- rbind.data.frame(
dataFit,
data.frame(
x = xFit,
y = length(x[selectedGroup]) * binwidth * switch(dataMapping$distribution,
y = yScaling * switch(dataMapping$distribution,
"normal" = stats::dnorm(
xFit,
mean = mean(x[selectedGroup], na.rm = TRUE),
Expand Down
11 changes: 1 addition & 10 deletions R/plotconfiguration-sub-classes.R
Original file line number Diff line number Diff line change
Expand Up @@ -76,16 +76,7 @@ ResVsTimePlotConfiguration <- R6::R6Class(
#' @family PlotConfiguration classes
HistogramPlotConfiguration <- R6::R6Class(
"HistogramPlotConfiguration",
inherit = PlotConfiguration,
public = list(
#' @description Create a new `HistogramPlotConfiguration` object
#' @param ylabel Histograms default display is "Count"
#' @param ... parameters inherited from `PlotConfiguration`
#' @return A new `HistogramPlotConfiguration` object
initialize = function(ylabel = "Count", ...) {
super$initialize(ylabel = ylabel, ...)
}
)
inherit = PlotConfiguration
)

#' @title QQPlotConfiguration
Expand Down
5 changes: 5 additions & 0 deletions man/HistogramDataMapping.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

30 changes: 7 additions & 23 deletions man/HistogramPlotConfiguration.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 15 additions & 4 deletions man/plotHistogram.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit afad758

Please sign in to comment.