Skip to content

Commit

Permalink
refactor: functionalized data_format conversion, added data.table option
Browse files Browse the repository at this point in the history
  • Loading branch information
ethanbass committed Sep 18, 2024
1 parent 874b248 commit 9b89525
Show file tree
Hide file tree
Showing 44 changed files with 481 additions and 329 deletions.
38 changes: 18 additions & 20 deletions R/call_aston.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
#'
#' @name sp_converter
#' @param path Path to file
#' @param format_out R format. Either \code{matrix} or \code{data.frame}.
#' @param format_out Class of output. Either \code{matrix}, \code{data.frame},
#' or \code{data.table}.
#' @param data_format Whether to return data in \code{wide} or \code{long} format.
#' @param read_metadata Logical. Whether to read metadata and attach it to the
#' chromatogram.
Expand All @@ -17,13 +18,13 @@
#' @import reticulate
#' @export sp_converter

sp_converter <- function(path, format_out = c("matrix", "data.frame"),
data_format = c("wide","long"),
sp_converter <- function(path, format_out = c("matrix", "data.frame", "data.table"),
data_format = c("wide", "long"),
read_metadata = TRUE,
metadata_format = c("chromconverter", "raw")){
check_aston_configuration()
format_out <- match.arg(format_out, c("matrix","data.frame"))
data_format <- match.arg(data_format, c("wide","long"))
format_out <- check_format_out(format_out)
data_format <- match.arg(data_format, c("wide", "long"))
metadata_format <- match.arg(metadata_format, c("chromconverter", "raw"))
metadata_format <- switch(metadata_format,
chromconverter = "masshunter_dad", raw = "raw")
Expand All @@ -34,9 +35,7 @@ sp_converter <- function(path, format_out = c("matrix", "data.frame"),
if (data_format == "long"){
x <- reshape_chrom(x, data_format = "long")
}
if (format_out == "matrix"){
x <- as.matrix(x)
}
x <- convert_chrom_format(x, format_out = format_out)
if (read_metadata){
meta <- read_masshunter_metadata(path)
x <- attach_metadata(x, meta, format_in = metadata_format,
Expand All @@ -55,7 +54,8 @@ sp_converter <- function(path, format_out = c("matrix", "data.frame"),
#'
#' @name uv_converter
#' @param path Path to file
#' @param format_out R format. Either \code{matrix} or \code{data.frame}.
#' @param format_out Class of output. Either \code{matrix}, \code{data.frame},
#' or \code{data.table}.
#' @param data_format Whether to return data in \code{wide} or \code{long} format.
#' @param correction Logical. Whether to apply empirical correction. Defaults is
#' TRUE.
Expand All @@ -66,12 +66,12 @@ sp_converter <- function(path, format_out = c("matrix", "data.frame"),
#' @return A chromatogram in \code{data.frame} format (retention time x wavelength).
#' @import reticulate
#' @export uv_converter
uv_converter <- function(path, format_out = c("matrix","data.frame"),
uv_converter <- function(path, format_out = c("matrix","data.frame","data.table"),
data_format = c("wide","long"),
correction = TRUE, read_metadata = TRUE,
metadata_format = c("chromconverter", "raw")){
check_aston_configuration()
format_out <- match.arg(format_out, c("matrix","data.frame"))
format_out <- check_format_out(format_out)
data_format <- match.arg(data_format, c("wide","long"))
metadata_format <- match.arg(metadata_format, c("chromconverter", "raw"))
metadata_format <- switch(metadata_format,
Expand All @@ -84,13 +84,11 @@ uv_converter <- function(path, format_out = c("matrix","data.frame"),
if (data_format == "long"){
x <- reshape_chrom(x, data_format = "long")
}
if (format_out == "matrix"){
x <- as.matrix(x)
}
x <- convert_chrom_format(x, format_out = format_out)
if (correction){
# multiply by empirical correction value
correction_value <- 0.9536743164062551070259132757200859487056732177734375
x <- apply(x,2,function(xx)xx*correction_value)
x <- apply(x, 2, function(xx)xx*correction_value)
}
if (read_metadata){
meta <- read_chemstation_metadata(path)
Expand All @@ -107,15 +105,17 @@ uv_converter <- function(path, format_out = c("matrix","data.frame"),
#' @name trace_converter
#' @title generic converter for other types of files
#' @param path Path to file
#' @param format_out R format. Either \code{matrix} or \code{data.frame}.
#' @param format_out Class of output. Either \code{matrix}, \code{data.frame},
#' or \code{data.table}.
#' @param data_format Whether to return data in \code{wide} or \code{long} format.
#' @return A chromatogram in \code{data.frame} format (retention time x wavelength).
#' @import reticulate
#' @noRd
trace_converter <- function(path, format_out = c("matrix", "data.frame"),
data_format = c("wide", "long")){
check_aston_configuration()
format_out <- match.arg(format_out, c("matrix", "data.frame"))
format_out <- check_format_out(format_out)
format_out <- match.arg(format_out, c("matrix", "data.frame", "data.table"))
data_format <- match.arg(data_format, c("wide", "long"))
trace_file <- reticulate::import("aston.tracefile")
pd <- reticulate::import("pandas")
Expand All @@ -125,9 +125,7 @@ trace_converter <- function(path, format_out = c("matrix", "data.frame"),
if (data_format == "long"){
x <- reshape_chrom(x, data_format = "long")
}
if (format_out == "matrix"){
x <- as.matrix(x)
}
x <- convert_chrom_format(x, format_out = format_out)
x
}

Expand Down
22 changes: 9 additions & 13 deletions R/call_entab.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@
#' Converts files using Entab parsers
#' @param path Path to file
#' @param data_format Whether to return data in \code{wide} or \code{long} format.
#' @param format_out Class of output. Either \code{matrix}, \code{data.frame},
#' or \code{data.table}.
#' @param format_in Format of input.
#' @param format_out R format. Either \code{matrix} or \code{data.frame}.
#' @param read_metadata Whether to read metadata from file.
#' @param metadata_format Format to output metadata. Either \code{chromconverter}
#' or \code{raw}.
Expand All @@ -12,16 +13,15 @@
#' @export

call_entab <- function(path, data_format = c("wide", "long"),
format_in = "",
format_out = c("matrix", "data.frame"),
read_metadata = TRUE,
format_out = c("matrix", "data.frame", "data.table"),
format_in = "", read_metadata = TRUE,
metadata_format = c("chromconverter", "raw")){
if (!requireNamespace("entab", quietly = TRUE)){
stop("The entab R package must be installed to use entab parsers:
install.packages('entab', repos='https://ethanbass.github.io/drat/')",
call. = FALSE)
}
format_out <- match.arg(format_out, c("matrix", "data.frame"))
format_out <- check_format_out(format_out)
data_format <- match.arg(data_format, c("wide", "long"))

metadata_format <- match.arg(tolower(metadata_format), c("chromconverter", "raw"))
Expand All @@ -35,23 +35,19 @@ call_entab <- function(path, data_format = c("wide", "long"),
if (length(signal.idx) == 1){
colnames(x)[signal.idx] <- "wavelength"
}
colnames(x) <- c("rt", "lambda", "intensity")
if (data_format == "wide"){
x <- reshape_chrom_wide(x, time_var = "time", lambda_var = "wavelength",
x <- reshape_chrom_wide(x, time_var = "rt", lambda_var = "lambda",
value_var = "intensity")
if (format_out == "matrix"){
x <- as.matrix(x)
}
}
} else if (grepl("fid$", file_format)){
if (data_format == "wide"){
x <- data.frame(row.names = x$time, intensity = x$intensity)
}
if (format_out == "matrix"){
x <- as.matrix(x)
}
} else if (grepl("ms$", file_format)){
colnames(x)[1] <- "rt"
colnames(x)[c(1,3)] <- c("rt", "intensity")
}
x <- convert_chrom_format(x, format_out = format_out)
if (read_metadata){
meta <- r$metadata()
meta$run_date <- as.POSIXct(eval(meta$run_date))
Expand Down
16 changes: 10 additions & 6 deletions R/call_openchrom.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,9 @@
#' @param path_out directory to export converted files.
#' @param format_in Either `msd` for mass spectrometry data, `csd` for flame
#' ionization data, or `wsd` for DAD/UV data.
#' @param format_out R format. Either \code{matrix} or \code{data.frame}.
#' @param format_out R format. Either \code{matrix}, \code{data.frame} or
#' \code{data.table}.
#' @param data_format Whether to return data in \code{wide} or \code{long} format.
#' @param export_format Either \code{mzml}, \code{csv}, \code{cdf}, \code{animl}.
#' Defaults to \code{mzml}.
#' @param return_paths Logical. If TRUE, the function will return a character
Expand All @@ -51,11 +53,12 @@
#' @export

call_openchrom <- function(files, path_out = NULL, format_in,
format_out = c("matrix","data.frame"),
export_format = c("mzml", "csv", "cdf", "animl"),
return_paths = FALSE,
format_out = c("matrix", "data.frame", "data.table"),
data_format = c("wide", "long"),
export_format = c("mzml", "csv", "cdf", "animl"),
return_paths = FALSE,
verbose = getOption("verbose")){
format_out <- match.arg(format_out, c("matrix","data.frame"))
format_out <- check_format_out(format_out)
if (length(files) == 0){
stop("Files not found.")
}
Expand Down Expand Up @@ -87,7 +90,8 @@ call_openchrom <- function(files, path_out = NULL, format_in,
} else{
file_reader <- switch(export_format,
"csv" = read.csv,
"cdf" = read_cdf,
"cdf" = purrr::partial(read_cdf, format_out = format_out,
data_format = data_format),
"animl" = warning("An animl parser is not currently available in chromConverter"),
"mzml" = read_mzml)
lapply(new_files, function(x){
Expand Down
45 changes: 28 additions & 17 deletions R/call_rainbow.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,16 @@
#' @param path Path to file
#' @param format_in Format of the supplied files. Either \code{agilent_d},
#' \code{waters_raw}, or \code{chemstation}.
#' @param format_out R format. Either \code{matrix} or \code{data.frame}.
#' @param format_out R format. Either \code{matrix}, \code{data.frame}, or
#' \code{data.table}.
#' @param data_format Whether to return data in wide or long format.
#' @param what What types of data to return (e.g. \code{MS}, \code{UV}, \code{CAD},
#' \code{ELSD}). This argument only applies if \code{by == "detector"}.
#' @param by How to order the list that is returned. Either \code{detector}
#' (default) or \code{name}.
#' @param read_metadata Logical. Whether to attach metadata. Defaults to TRUE.
#' @param metadata_format Format to output metadata. Either \code{chromconverter}
#' or \code{raw}.
#' @param collapse Logical. Whether to collapse lists that only contain a single
#' element.
#' @param precision Number of decimals to round mz values. Defaults to 1.
Expand All @@ -30,15 +33,19 @@ call_rainbow <- function(path,
format_in = c("agilent_d", "waters_raw", "masshunter",
"chemstation", "chemstation_uv",
"chemstation_fid"),
format_out = c("matrix", "data.frame"),
format_out = c("matrix", "data.frame", "data.table"),
data_format = c("wide", "long"),
by = c("detector","name"), what = NULL,
read_metadata = TRUE, collapse = TRUE,
precision = 1){
read_metadata = TRUE,
metadata_format = c("chromconverter", "raw"),
collapse = TRUE, precision = 1){
check_rb_configuration()
by <- match.arg(by, c("detector","name"))
format_out <- match.arg(format_out, c("matrix","data.frame"))
by <- match.arg(by, c("detector", "name"))
format_out <- check_format_out(format_out)
data_format <- match.arg(data_format, c("wide", "long"))
metadata_format <- match.arg(tolower(metadata_format),
c("chromconverter", "raw"))
metadata_format <- switch(metadata_format, "chromconverter" = "rainbow", "")

if (grepl("chemstation", format_in)){
format_in <- "chemstation"
Expand Down Expand Up @@ -66,7 +73,8 @@ call_rainbow <- function(path,
xx <- lapply(x$by_detector[dtr.idx], function(dtr){
dtr_dat <- lapply(dtr, function(xx){
extract_rb_data(xx, format_out = format_out, data_format = data_format,
read_metadata = read_metadata)
read_metadata = read_metadata, meta = x$metadata,
metadata_format = metadata_format, source_file = path)
})
names(dtr_dat) <- extract_rb_names(dtr)
if (collapse) dtr_dat <- collapse_list(dtr_dat)
Expand All @@ -75,12 +83,14 @@ call_rainbow <- function(path,
} else if (by == "name"){
xx <- lapply(x$datafiles, function(xx){
extract_rb_data(xx, format_out = format_out, data_format = data_format,
read_metadata = read_metadata)
read_metadata = read_metadata, meta = x$metadata,
metadata_format = metadata_format, source_file = path)
})
names(xx) <- names(x$by_name)
} else{
xx <- extract_rb_data(x, format_out = format_out, data_format = data_format,
read_metadata = read_metadata)
read_metadata = read_metadata, meta = x$metadata,
metadata_format = metadata_format, source_file = path)
}
xx
}
Expand All @@ -91,7 +101,10 @@ call_rainbow <- function(path,
#' @noRd
extract_rb_data <- function(xx, format_out = "matrix",
data_format = c("wide", "long"),
read_metadata = TRUE){
read_metadata = TRUE,
metadata_format = "rainbow",
meta = NULL,
source_file){
data_format <- match.arg(data_format, c("wide", "long"))
data <- xx$data
try(rownames(data) <- xx$xlabels)
Expand All @@ -102,14 +115,12 @@ extract_rb_data <- function(xx, format_out = "matrix",
"lambda")
data <- reshape_chrom(data, data_format = "long", names_to = names_to)
}
if (format_out == "data.frame"){
data <- as.data.frame(data)
}
data <- convert_chrom_format(data, format_out = format_out)
if (read_metadata){
try(attr(data, "detector") <- xx$detector)
try(attr(data, "metadata") <- xx$metadata)
attr(data, "parser") <- "rainbow"
attr(data, "data_format") <- data_format
meta <- c(meta, xx$metadata, detector = xx$detector)
data <- attach_metadata(data, meta = meta, format_in = metadata_format,
format_out = format_out, data_format = data_format,
parser = "rainbow", source_file = source_file)
}
data
}
Expand Down
9 changes: 6 additions & 3 deletions R/read_cdf.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
#' Parser for Analytical Data Interchange (ANDI) netCDF files.
#'
#' @param path Path to ANDI netCDF file.
#' @param format_out R format. Either \code{matrix} or \code{data.frame}.
#' @param format_out Class of output. Either \code{matrix}, \code{data.frame},
#' or \code{\link[data.table]{data.table}}.
#' @param data_format Whether to return data in \code{wide} or \code{long} format.
#' For 2D files, "long" format returns the retention time as the first column of
#' the data.frame or matrix while "wide" format returns the retention time as the
Expand Down Expand Up @@ -56,7 +57,8 @@ read_cdf <- function(path, format_out = c("matrix", "data.frame", "data.table"),

#' Read ANDI chrom file
#' @param path Path to file.
#' @param format_out R format. Either \code{matrix} or \code{data.frame}.
#' @param format_out Class of output. Either \code{matrix}, \code{data.frame},
#' or \code{data.table}.
#' @param data_format Whether to return data in \code{wide} or \code{long} format.
#' For 2D files, "long" format returns the retention time as the first column of
#' the data.frame or matrix while "wide" format returns the retention time as the
Expand Down Expand Up @@ -120,7 +122,8 @@ read_andi_chrom <- function(path, format_out = c("matrix", "data.frame", "data.t

#' Read ANDI MS file
#' @param path Path to file.
#' @param format_out R format. Either \code{matrix} or \code{data.frame}.
#' @param format_out Class of output. Either \code{matrix}, \code{data.frame},
#' or \code{data.table}.
#' @param data_format Whether to return the total ion chromatogram in \code{wide}
#' or \code{long} format. The "long" format returns the retention time as the
#' first column of the data.frame or matrix while "wide" format returns the
Expand Down
Loading

0 comments on commit 9b89525

Please sign in to comment.