diff --git a/R/call_aston.R b/R/call_aston.R index 891c756..b7833a7 100644 --- a/R/call_aston.R +++ b/R/call_aston.R @@ -7,7 +7,8 @@ #' #' @name sp_converter #' @param path Path to file -#' @param format_out R format. Either \code{matrix} or \code{data.frame}. +#' @param format_out Class of output. Either \code{matrix}, \code{data.frame}, +#' or \code{data.table}. #' @param data_format Whether to return data in \code{wide} or \code{long} format. #' @param read_metadata Logical. Whether to read metadata and attach it to the #' chromatogram. @@ -17,13 +18,13 @@ #' @import reticulate #' @export sp_converter -sp_converter <- function(path, format_out = c("matrix", "data.frame"), - data_format = c("wide","long"), +sp_converter <- function(path, format_out = c("matrix", "data.frame", "data.table"), + data_format = c("wide", "long"), read_metadata = TRUE, metadata_format = c("chromconverter", "raw")){ check_aston_configuration() - format_out <- match.arg(format_out, c("matrix","data.frame")) - data_format <- match.arg(data_format, c("wide","long")) + format_out <- check_format_out(format_out) + data_format <- match.arg(data_format, c("wide", "long")) metadata_format <- match.arg(metadata_format, c("chromconverter", "raw")) metadata_format <- switch(metadata_format, chromconverter = "masshunter_dad", raw = "raw") @@ -34,9 +35,7 @@ sp_converter <- function(path, format_out = c("matrix", "data.frame"), if (data_format == "long"){ x <- reshape_chrom(x, data_format = "long") } - if (format_out == "matrix"){ - x <- as.matrix(x) - } + x <- convert_chrom_format(x, format_out = format_out) if (read_metadata){ meta <- read_masshunter_metadata(path) x <- attach_metadata(x, meta, format_in = metadata_format, @@ -55,7 +54,8 @@ sp_converter <- function(path, format_out = c("matrix", "data.frame"), #' #' @name uv_converter #' @param path Path to file -#' @param format_out R format. Either \code{matrix} or \code{data.frame}. +#' @param format_out Class of output. Either \code{matrix}, \code{data.frame}, +#' or \code{data.table}. #' @param data_format Whether to return data in \code{wide} or \code{long} format. #' @param correction Logical. Whether to apply empirical correction. Defaults is #' TRUE. @@ -66,12 +66,12 @@ sp_converter <- function(path, format_out = c("matrix", "data.frame"), #' @return A chromatogram in \code{data.frame} format (retention time x wavelength). #' @import reticulate #' @export uv_converter -uv_converter <- function(path, format_out = c("matrix","data.frame"), +uv_converter <- function(path, format_out = c("matrix","data.frame","data.table"), data_format = c("wide","long"), correction = TRUE, read_metadata = TRUE, metadata_format = c("chromconverter", "raw")){ check_aston_configuration() - format_out <- match.arg(format_out, c("matrix","data.frame")) + format_out <- check_format_out(format_out) data_format <- match.arg(data_format, c("wide","long")) metadata_format <- match.arg(metadata_format, c("chromconverter", "raw")) metadata_format <- switch(metadata_format, @@ -84,13 +84,11 @@ uv_converter <- function(path, format_out = c("matrix","data.frame"), if (data_format == "long"){ x <- reshape_chrom(x, data_format = "long") } - if (format_out == "matrix"){ - x <- as.matrix(x) - } + x <- convert_chrom_format(x, format_out = format_out) if (correction){ # multiply by empirical correction value correction_value <- 0.9536743164062551070259132757200859487056732177734375 - x <- apply(x,2,function(xx)xx*correction_value) + x <- apply(x, 2, function(xx)xx*correction_value) } if (read_metadata){ meta <- read_chemstation_metadata(path) @@ -107,7 +105,8 @@ uv_converter <- function(path, format_out = c("matrix","data.frame"), #' @name trace_converter #' @title generic converter for other types of files #' @param path Path to file -#' @param format_out R format. Either \code{matrix} or \code{data.frame}. +#' @param format_out Class of output. Either \code{matrix}, \code{data.frame}, +#' or \code{data.table}. #' @param data_format Whether to return data in \code{wide} or \code{long} format. #' @return A chromatogram in \code{data.frame} format (retention time x wavelength). #' @import reticulate @@ -115,7 +114,8 @@ uv_converter <- function(path, format_out = c("matrix","data.frame"), trace_converter <- function(path, format_out = c("matrix", "data.frame"), data_format = c("wide", "long")){ check_aston_configuration() - format_out <- match.arg(format_out, c("matrix", "data.frame")) + format_out <- check_format_out(format_out) + format_out <- match.arg(format_out, c("matrix", "data.frame", "data.table")) data_format <- match.arg(data_format, c("wide", "long")) trace_file <- reticulate::import("aston.tracefile") pd <- reticulate::import("pandas") @@ -125,9 +125,7 @@ trace_converter <- function(path, format_out = c("matrix", "data.frame"), if (data_format == "long"){ x <- reshape_chrom(x, data_format = "long") } - if (format_out == "matrix"){ - x <- as.matrix(x) - } + x <- convert_chrom_format(x, format_out = format_out) x } diff --git a/R/call_entab.R b/R/call_entab.R index 5c3696d..042bb1e 100644 --- a/R/call_entab.R +++ b/R/call_entab.R @@ -2,8 +2,9 @@ #' Converts files using Entab parsers #' @param path Path to file #' @param data_format Whether to return data in \code{wide} or \code{long} format. +#' @param format_out Class of output. Either \code{matrix}, \code{data.frame}, +#' or \code{data.table}. #' @param format_in Format of input. -#' @param format_out R format. Either \code{matrix} or \code{data.frame}. #' @param read_metadata Whether to read metadata from file. #' @param metadata_format Format to output metadata. Either \code{chromconverter} #' or \code{raw}. @@ -12,16 +13,15 @@ #' @export call_entab <- function(path, data_format = c("wide", "long"), - format_in = "", - format_out = c("matrix", "data.frame"), - read_metadata = TRUE, + format_out = c("matrix", "data.frame", "data.table"), + format_in = "", read_metadata = TRUE, metadata_format = c("chromconverter", "raw")){ if (!requireNamespace("entab", quietly = TRUE)){ stop("The entab R package must be installed to use entab parsers: install.packages('entab', repos='https://ethanbass.github.io/drat/')", call. = FALSE) } - format_out <- match.arg(format_out, c("matrix", "data.frame")) + format_out <- check_format_out(format_out) data_format <- match.arg(data_format, c("wide", "long")) metadata_format <- match.arg(tolower(metadata_format), c("chromconverter", "raw")) @@ -35,23 +35,19 @@ call_entab <- function(path, data_format = c("wide", "long"), if (length(signal.idx) == 1){ colnames(x)[signal.idx] <- "wavelength" } + colnames(x) <- c("rt", "lambda", "intensity") if (data_format == "wide"){ - x <- reshape_chrom_wide(x, time_var = "time", lambda_var = "wavelength", + x <- reshape_chrom_wide(x, time_var = "rt", lambda_var = "lambda", value_var = "intensity") - if (format_out == "matrix"){ - x <- as.matrix(x) } - } } else if (grepl("fid$", file_format)){ if (data_format == "wide"){ x <- data.frame(row.names = x$time, intensity = x$intensity) } - if (format_out == "matrix"){ - x <- as.matrix(x) - } } else if (grepl("ms$", file_format)){ - colnames(x)[1] <- "rt" + colnames(x)[c(1,3)] <- c("rt", "intensity") } + x <- convert_chrom_format(x, format_out = format_out) if (read_metadata){ meta <- r$metadata() meta$run_date <- as.POSIXct(eval(meta$run_date)) diff --git a/R/call_openchrom.R b/R/call_openchrom.R index f905e73..8febb9c 100644 --- a/R/call_openchrom.R +++ b/R/call_openchrom.R @@ -28,7 +28,9 @@ #' @param path_out directory to export converted files. #' @param format_in Either `msd` for mass spectrometry data, `csd` for flame #' ionization data, or `wsd` for DAD/UV data. -#' @param format_out R format. Either \code{matrix} or \code{data.frame}. +#' @param format_out R format. Either \code{matrix}, \code{data.frame} or +#' \code{data.table}. +#' @param data_format Whether to return data in \code{wide} or \code{long} format. #' @param export_format Either \code{mzml}, \code{csv}, \code{cdf}, \code{animl}. #' Defaults to \code{mzml}. #' @param return_paths Logical. If TRUE, the function will return a character @@ -51,11 +53,12 @@ #' @export call_openchrom <- function(files, path_out = NULL, format_in, - format_out = c("matrix","data.frame"), - export_format = c("mzml", "csv", "cdf", "animl"), - return_paths = FALSE, + format_out = c("matrix", "data.frame", "data.table"), + data_format = c("wide", "long"), + export_format = c("mzml", "csv", "cdf", "animl"), + return_paths = FALSE, verbose = getOption("verbose")){ - format_out <- match.arg(format_out, c("matrix","data.frame")) + format_out <- check_format_out(format_out) if (length(files) == 0){ stop("Files not found.") } @@ -87,7 +90,8 @@ call_openchrom <- function(files, path_out = NULL, format_in, } else{ file_reader <- switch(export_format, "csv" = read.csv, - "cdf" = read_cdf, + "cdf" = purrr::partial(read_cdf, format_out = format_out, + data_format = data_format), "animl" = warning("An animl parser is not currently available in chromConverter"), "mzml" = read_mzml) lapply(new_files, function(x){ diff --git a/R/call_rainbow.R b/R/call_rainbow.R index 2a06d19..af765d1 100644 --- a/R/call_rainbow.R +++ b/R/call_rainbow.R @@ -10,13 +10,16 @@ #' @param path Path to file #' @param format_in Format of the supplied files. Either \code{agilent_d}, #' \code{waters_raw}, or \code{chemstation}. -#' @param format_out R format. Either \code{matrix} or \code{data.frame}. +#' @param format_out R format. Either \code{matrix}, \code{data.frame}, or +#' \code{data.table}. #' @param data_format Whether to return data in wide or long format. #' @param what What types of data to return (e.g. \code{MS}, \code{UV}, \code{CAD}, #' \code{ELSD}). This argument only applies if \code{by == "detector"}. #' @param by How to order the list that is returned. Either \code{detector} #' (default) or \code{name}. #' @param read_metadata Logical. Whether to attach metadata. Defaults to TRUE. +#' @param metadata_format Format to output metadata. Either \code{chromconverter} +#' or \code{raw}. #' @param collapse Logical. Whether to collapse lists that only contain a single #' element. #' @param precision Number of decimals to round mz values. Defaults to 1. @@ -30,15 +33,19 @@ call_rainbow <- function(path, format_in = c("agilent_d", "waters_raw", "masshunter", "chemstation", "chemstation_uv", "chemstation_fid"), - format_out = c("matrix", "data.frame"), + format_out = c("matrix", "data.frame", "data.table"), data_format = c("wide", "long"), by = c("detector","name"), what = NULL, - read_metadata = TRUE, collapse = TRUE, - precision = 1){ + read_metadata = TRUE, + metadata_format = c("chromconverter", "raw"), + collapse = TRUE, precision = 1){ check_rb_configuration() - by <- match.arg(by, c("detector","name")) - format_out <- match.arg(format_out, c("matrix","data.frame")) + by <- match.arg(by, c("detector", "name")) + format_out <- check_format_out(format_out) data_format <- match.arg(data_format, c("wide", "long")) + metadata_format <- match.arg(tolower(metadata_format), + c("chromconverter", "raw")) + metadata_format <- switch(metadata_format, "chromconverter" = "rainbow", "") if (grepl("chemstation", format_in)){ format_in <- "chemstation" @@ -66,7 +73,8 @@ call_rainbow <- function(path, xx <- lapply(x$by_detector[dtr.idx], function(dtr){ dtr_dat <- lapply(dtr, function(xx){ extract_rb_data(xx, format_out = format_out, data_format = data_format, - read_metadata = read_metadata) + read_metadata = read_metadata, meta = x$metadata, + metadata_format = metadata_format, source_file = path) }) names(dtr_dat) <- extract_rb_names(dtr) if (collapse) dtr_dat <- collapse_list(dtr_dat) @@ -75,12 +83,14 @@ call_rainbow <- function(path, } else if (by == "name"){ xx <- lapply(x$datafiles, function(xx){ extract_rb_data(xx, format_out = format_out, data_format = data_format, - read_metadata = read_metadata) + read_metadata = read_metadata, meta = x$metadata, + metadata_format = metadata_format, source_file = path) }) names(xx) <- names(x$by_name) } else{ xx <- extract_rb_data(x, format_out = format_out, data_format = data_format, - read_metadata = read_metadata) + read_metadata = read_metadata, meta = x$metadata, + metadata_format = metadata_format, source_file = path) } xx } @@ -91,7 +101,10 @@ call_rainbow <- function(path, #' @noRd extract_rb_data <- function(xx, format_out = "matrix", data_format = c("wide", "long"), - read_metadata = TRUE){ + read_metadata = TRUE, + metadata_format = "rainbow", + meta = NULL, + source_file){ data_format <- match.arg(data_format, c("wide", "long")) data <- xx$data try(rownames(data) <- xx$xlabels) @@ -102,14 +115,12 @@ extract_rb_data <- function(xx, format_out = "matrix", "lambda") data <- reshape_chrom(data, data_format = "long", names_to = names_to) } - if (format_out == "data.frame"){ - data <- as.data.frame(data) - } + data <- convert_chrom_format(data, format_out = format_out) if (read_metadata){ - try(attr(data, "detector") <- xx$detector) - try(attr(data, "metadata") <- xx$metadata) - attr(data, "parser") <- "rainbow" - attr(data, "data_format") <- data_format + meta <- c(meta, xx$metadata, detector = xx$detector) + data <- attach_metadata(data, meta = meta, format_in = metadata_format, + format_out = format_out, data_format = data_format, + parser = "rainbow", source_file = source_file) } data } diff --git a/R/read_cdf.R b/R/read_cdf.R index 3b3f31e..c56215c 100644 --- a/R/read_cdf.R +++ b/R/read_cdf.R @@ -3,7 +3,8 @@ #' Parser for Analytical Data Interchange (ANDI) netCDF files. #' #' @param path Path to ANDI netCDF file. -#' @param format_out R format. Either \code{matrix} or \code{data.frame}. +#' @param format_out Class of output. Either \code{matrix}, \code{data.frame}, +#' or \code{\link[data.table]{data.table}}. #' @param data_format Whether to return data in \code{wide} or \code{long} format. #' For 2D files, "long" format returns the retention time as the first column of #' the data.frame or matrix while "wide" format returns the retention time as the @@ -56,7 +57,8 @@ read_cdf <- function(path, format_out = c("matrix", "data.frame", "data.table"), #' Read ANDI chrom file #' @param path Path to file. -#' @param format_out R format. Either \code{matrix} or \code{data.frame}. +#' @param format_out Class of output. Either \code{matrix}, \code{data.frame}, +#' or \code{data.table}. #' @param data_format Whether to return data in \code{wide} or \code{long} format. #' For 2D files, "long" format returns the retention time as the first column of #' the data.frame or matrix while "wide" format returns the retention time as the @@ -120,7 +122,8 @@ read_andi_chrom <- function(path, format_out = c("matrix", "data.frame", "data.t #' Read ANDI MS file #' @param path Path to file. -#' @param format_out R format. Either \code{matrix} or \code{data.frame}. +#' @param format_out Class of output. Either \code{matrix}, \code{data.frame}, +#' or \code{data.table}. #' @param data_format Whether to return the total ion chromatogram in \code{wide} #' or \code{long} format. The "long" format returns the retention time as the #' first column of the data.frame or matrix while "wide" format returns the diff --git a/R/read_chemstation_ch.R b/R/read_chemstation_ch.R index bd162e9..46867ab 100644 --- a/R/read_chemstation_ch.R +++ b/R/read_chemstation_ch.R @@ -9,7 +9,8 @@ #' #' @importFrom bitops bitAnd bitShiftL #' @param path Path to \code{.ch} file -#' @param format_out Matrix or data.frame. +#' @param format_out Class of output. Either \code{matrix}, \code{data.frame}, +#' or \code{data.table}. #' @param data_format Whether to return data in \code{wide} or \code{long} format. #' @param read_metadata Logical. Whether to attach metadata. #' @param metadata_format Format to output metadata. Either \code{chromconverter} @@ -32,12 +33,12 @@ #' @export #' @md -read_chemstation_ch <- function(path, format_out = c("matrix", "data.frame"), +read_chemstation_ch <- function(path, format_out = c("matrix", "data.frame", "data.table"), data_format = c("wide", "long"), read_metadata = TRUE, metadata_format = c("chromconverter", "raw"), scale = TRUE){ - format_out <- match.arg(format_out, c("matrix", "data.frame")) + format_out <- check_format_out(format_out) data_format <- match.arg(data_format, c("wide", "long")) metadata_format <- match.arg(metadata_format, c("chromconverter", "raw")) metadata_format <- switch(metadata_format, chromconverter = "chemstation", @@ -112,15 +113,10 @@ read_chemstation_ch <- function(path, format_out = c("matrix", "data.frame"), if (scale){ data <- data * scaling_factor + intercept } + data <- format_2d_chromatogram(rt = times, int = data, + data_format = data_format, + format_out = format_out) - if (data_format == "wide"){ - data <- data.frame(Intensity = data, row.names = times) - } else if (data_format == "long"){ - data <- data.frame(RT = times, Intensity = data) - } - if (format_out == "matrix"){ - data <- as.matrix(data) - } if (read_metadata){ meta_slots <- switch(version, "8" = 10, "81" = 10, @@ -193,9 +189,7 @@ get_nchar <- function(f){ #' \href{https://github.com/chemplexity/chromatography}{Chromatography Toolbox} #' ((c) James Dillon 2014). #' @noRd - decode_double_delta <- function(file, offset){ - seek(file, 0, 'end') fsize <- seek(file, NA, "current") @@ -264,7 +258,6 @@ decode_double_array_8byte <- function(file, offset){ #' \href{https://github.com/chemplexity/chromatography}{Chromatography Toolbox} #' ((c) James Dillon 2014). #' @noRd - decode_delta <- function(file, offset){ seek(file, 0, 'end') fsize <- seek(file, NA, "current") @@ -441,7 +434,8 @@ get_agilent_offsets <- function(version){ #' @importFrom utils unzip #' @param path Path to \code{.dx} file. #' @param path_out Path to directory to export unzipped files. -#' @param format_out Matrix or data.frame. +#' @param format_out Class of output. Either \code{matrix}, \code{data.frame}, +#' or \code{data.table}. #' @param data_format Whether to return data in \code{wide} or \code{long} format. #' @param read_metadata Logical. Whether to attach metadata. #' @author Ethan Bass @@ -450,10 +444,10 @@ get_agilent_offsets <- function(version){ #' @author Ethan Bass #' @export read_agilent_dx <- function(path, path_out = NULL, - format_out = c("matrix","data.frame"), + format_out = c("matrix", "data.frame", "data.table"), data_format = c("wide","long"), read_metadata = TRUE){ - format_out <- match.arg(format_out, c("matrix","data.frame")) + format_out <- check_format_out(format_out) data_format <- match.arg(data_format, c("wide","long")) files <- unzip(path, list = TRUE) files.idx <- grep(".ch$", files$Name, ignore.case = TRUE) diff --git a/R/read_chemstation_csv.R b/R/read_chemstation_csv.R index ea71f52..1d05899 100644 --- a/R/read_chemstation_csv.R +++ b/R/read_chemstation_csv.R @@ -7,19 +7,17 @@ #' @name read_chemstation_csv #' @importFrom utils tail read.csv #' @param path Path to file -#' @param format_out R format. Either \code{matrix} or \code{data.frame}. +#' @param format_out Class of output. Either \code{matrix}, \code{data.frame}, +#' or \code{data.table}. #' @return A chromatogram in the format specified by \code{format_out} #' (retention time x wavelength). #' @author Ethan Bass #' @export -read_chemstation_csv <- function(path, format_out = c("matrix","data.frame")){ - format_out <- match.arg(format_out, c("matrix", "data.frame")) +read_chemstation_csv <- function(path, format_out = c("matrix", "data.frame", "data.table")){ + format_out <- check_format_out(format_out) x <- read.csv(path, row.names = 1, header = TRUE, fileEncoding = "utf-16LE", check.names = FALSE) - if (format_out == "matrix"){ - x <- as.matrix(x) - } - x + convert_chrom_format(x, format_out = format_out) } diff --git a/R/read_chemstation_report.R b/R/read_chemstation_report.R index 5058b37..21a9b06 100644 --- a/R/read_chemstation_report.R +++ b/R/read_chemstation_report.R @@ -77,7 +77,8 @@ read_chemstation_report <- function(path, data_format = c("chromatographr", "ori metadata <- gsub("^\\s+","", metadata) metadata <- gsub("\\s+\\:\\s+", " : ", metadata) metadata <- merge_lines(metadata) - metadata <- unlist(strsplit(metadata, "(? 0) + colnames(x)[int_idx] <- "intensity" + x + }) if (data_format == "wide"){ data <- reshape_chroms(data, data_format = "wide") } @@ -67,9 +73,7 @@ read_mzml <- function(path, format_out = c("matrix", "data.frame"), if (data_format == "long"){ data <- reshape_chrom(data) } - if (format_out == "data.frame"){ - data <- as.data.frame(data) - } + data <- convert_chrom_format(data, format_out = format_out) } data } diff --git a/R/read_shimadzu_gcd.R b/R/read_shimadzu_gcd.R index e9bd5ab..62379f1 100644 --- a/R/read_shimadzu_gcd.R +++ b/R/read_shimadzu_gcd.R @@ -21,7 +21,8 @@ #' floating-point numbers. The retention times can be (approximately?) derived #' from the number of values and the sampling interval encoded in the header. #' @param path Path to GCD file. -#' @param format_out Matrix or data.frame. +#' @param format_out Class of output. Either \code{matrix}, \code{data.frame}, +#' or \code{data.table}. #' @param data_format Either \code{wide} (default) or \code{long}. #' @param read_metadata Logical. Whether to attach metadata. #' @param metadata_format Format to output metadata. Either \code{chromconverter} @@ -35,13 +36,13 @@ #' yet able to interpret much metadata from the files. #' @export -read_shimadzu_gcd <- function(path, format_out = c("matrix", "data.frame"), +read_shimadzu_gcd <- function(path, format_out = c("matrix", "data.frame", "data.table"), data_format = c("wide", "long"), read_metadata = TRUE, metadata_format = c("chromconverter","raw")){ - format_out <- match.arg(format_out, c("matrix", "data.frame")) + format_out <- match.arg(format_out, c("matrix", "data.frame", "data.table")) data_format <- match.arg(data_format, c("wide", "long")) - metadata_format <- match.arg(metadata_format, c("chromconverter","raw")) + metadata_format <- match.arg(metadata_format, c("chromconverter", "raw")) metadata_format <- switch(metadata_format, "chromconverter" = "shimadzu_lcd", "raw") olefile_installed <- reticulate::py_module_available("olefile") @@ -60,14 +61,9 @@ read_shimadzu_gcd <- function(path, format_out = c("matrix", "data.frame"), DI <- read_sz_2DDI(path, idx = idx) x <- decode_shimadzu_gcd(path, stream = stream) - - if (data_format == "wide"){ - x <- data.frame(int = x$int, row.names = x$rt) - } - - if (format_out == "matrix"){ - x <- as.matrix(x) - } + x <- format_2d_chromatogram(rt = x$rt, int = x$int, + data_format = data_format, + format_out = format_out) if (read_metadata){ x <- attach_metadata(x, c(meta,DI), format_in = metadata_format, source_file = path, data_format = data_format, diff --git a/R/read_shimadzu_lcd.R b/R/read_shimadzu_lcd.R index c078c87..a40195a 100644 --- a/R/read_shimadzu_lcd.R +++ b/R/read_shimadzu_lcd.R @@ -51,12 +51,12 @@ #' the acquisition times diverge slightly from the ASCII file. #' @export -read_shimadzu_lcd <- function(path, what, format_out = c("matrix", "data.frame"), +read_shimadzu_lcd <- function(path, what, format_out = c("matrix", "data.frame", "data.table"), data_format = c("wide", "long"), read_metadata = TRUE, metadata_format = c("chromconverter", "raw"), scale = TRUE){ - format_out <- match.arg(format_out, c("matrix", "data.frame")) + format_out <- check_format_out(format_out) data_format <- match.arg(data_format, c("wide", "long")) metadata_format <- match.arg(metadata_format, c("chromconverter", "raw")) metadata_format <- switch(metadata_format, @@ -155,10 +155,7 @@ read_sz_lcd_3d <- function(path, format_out = "matrix", if (data_format == "long"){ dat <- reshape_chrom(dat, data_format = "wide") } - - if (format_out == "data.frame"){ - dat <- as.data.frame(dat) - } + dat <- convert_chrom_format(dat, format_out = format_out) if (read_metadata){ meta <- read_sz_file_properties(path) meta <- c(meta, DI) @@ -216,12 +213,12 @@ read_sz_lcd_3d <- function(path, format_out = "matrix", #' \code{wide} or \code{long} format according to the value of \code{data_format}. #' @export -read_sz_lcd_2d <- function(path, format_out = "matrix", +read_sz_lcd_2d <- function(path, format_out = "data.frame", data_format = "wide", read_metadata = TRUE, metadata_format = "shimadzu_lcd", scale = TRUE){ - if (data_format == "long"){ + if (data_format == "long" && format_out == "matrix"){ format_out <- "data.frame" } existing_streams <- check_streams(path, what = "chromatogram") @@ -246,13 +243,11 @@ read_sz_lcd_2d <- function(path, format_out = "matrix", dat <- dat*DI$detector.vf } if (data_format == "long"){ - dat <- data.frame(rt = times, int = dat$int, detector = DI$DETN, - channel = DI$DSCN, wavelength = DI$ADN, + dat <- data.frame(rt = times, intensity = dat$int, detector = DI$DETN, + channel = DI$DSCN, lambda = DI$ADN, unit = DI$detector.unit) } - if (format_out == "matrix"){ - dat <- as.matrix(dat) - } + dat <- convert_chrom_format(dat, format_out = format_out) if (read_metadata){ dat <- attach_metadata(dat, c(meta, DI), format_in = metadata_format, source_file = path, data_format = data_format, @@ -262,7 +257,7 @@ read_sz_lcd_2d <- function(path, format_out = "matrix", }) names(dat) <- sapply(dat, function(x){ - det <- gsub("Detector ", "", attr(x,"detector")) + det <- gsub("Detector ", "", attr(x, "detector")) wv <- attr(x, "wavelength") ifelse(wv == "", det, paste(det, wv, sep = ", ")) }) @@ -301,7 +296,7 @@ read_sz_lcd_2d <- function(path, format_out = "matrix", #' yet able to interpret much metadata from the files. #' @export -read_sz_tic <- function(path, format_out = c("matrix", "data.frame"), +read_sz_tic <- function(path, format_out = "data.frame", data_format = c("wide", "long"), read_metadata = TRUE){ path_tic <- check_streams(path, what = "tic") @@ -311,11 +306,9 @@ read_sz_tic <- function(path, format_out = c("matrix", "data.frame"), dat <- decode_sz_tic(f) if (data_format == "wide"){ row.names(dat) <- dat[, "rt"] - dat <- dat[,"int", drop=FALSE] - } - if (format_out == "data.frame"){ - dat <- as.data.frame(dat) + dat <- dat[, "intensity", drop = FALSE] } + dat <- convert_chrom_format(dat, format_out = format_out) dat } @@ -338,16 +331,17 @@ decode_sz_tic <- function(f){ count <- count + 1 } mat[,1] <- mat[,1]/1000 - colnames(mat) <- c("rt", "index", "int") + colnames(mat) <- c("rt", "index", "intensity") mat } +#' Read Shimadzu chromatogram #' @noRd read_sz_chrom <- function(path, stream){ path_raw <- export_stream(path, stream = stream) f <- file(path_raw, "rb") on.exit(close(f)) - data.frame(int = decode_sz_block(f)) + data.frame(intensity = decode_sz_block(f)) } #' Read 'Shimadzu' "Method" stream @@ -390,7 +384,7 @@ read_sz_method <- function(path, stream = c("GUMM_Information", "ShimadzuPDA.1", #' (more reliably?) from the 2D Data Item. #' @author Ethan Bass #' @noRd -get_sz_times <- function(sz_method, what = c("pda","chromatogram"), nval){ +get_sz_times <- function(sz_method, what = c("pda", "chromatogram"), nval){ what <- match.arg(what, c("pda", "chromatogram")) fields <- switch(what, "pda" = c("StTm", "EdTm"), "chromatogram" = c("ACQ$StartTm#1", "ACQ$EndTm#1")) @@ -459,7 +453,7 @@ read_sz_wavelengths <- function(path){ "Wavelength Table")) f <- file(path_wavtab, "rb") on.exit(close(f)) - n_lambda <- readBin(f, what="integer", size = 4) + n_lambda <- readBin(f, what = "integer", size = 4) count <- 1 lambdas <- sapply(seq_len(n_lambda), function(i){ readBin(f, what = "integer", size = 4)/100 @@ -659,8 +653,8 @@ read_sz_3DDI <- function(path){ meta <- as.list(xml2::xml_text(nodes[-rm])) names(meta) <- xml2::xml_name(nodes[-rm]) - meta[c("WVB","WVE","WLS")] <- - lapply(meta[c("WVB","WVE","WLS")], function(x){ + meta[c("WVB", "WVE", "WLS")] <- + lapply(meta[c("WVB", "WVE", "WLS")], function(x){ sz_float(x)/100 }) meta <- c(meta, read_sz_2DDI(xml2::xml_find_all(doc, @@ -685,16 +679,16 @@ read_sz_2DDI <- function(path, read_file = TRUE, idx = 1){ meta <- xml2::xml_text(nodes[-ddi_idx]) names(meta) <- xml2::xml_name(nodes[-ddi_idx]) - meta[c("CF","GF","AT","DLT")] <- - lapply(meta[c("CF","GF","AT","DLT")], function(x) sz_float(x)) + meta[c("CF", "GF", "AT", "DLT")] <- + lapply(meta[c("CF", "GF", "AT", "DLT")], function(x) sz_float(x)) meta <- c(meta, extract_axis_metadata(nodes)) meta$time.vf <- ifelse(is.na(meta$time.vf), 60000, meta$time.vf) meta$detector.vf <- 1/meta$detector.vf - meta[c("AT","DLT","Rate")] <- - lapply(meta[c("AT","DLT","Rate")], function(x) as.numeric(x)/meta$time.vf) + meta[c("AT", "DLT", "Rate")] <- + lapply(meta[c("AT", "DLT", "Rate")], function(x) as.numeric(x)/meta$time.vf) meta } @@ -711,7 +705,7 @@ extract_axis_metadata <- function(x){ } else NA }) - names(ax) <- c("detector","time") + names(ax) <- c("detector", "time") unlist(lapply(ax, function(x){ if (inherits(x, "xml_node")){ list(vf = xml2::xml_find_all(x, "VF") |> xml2::xml_text() |> sz_float(), diff --git a/R/read_shimadzu_qgd.R b/R/read_shimadzu_qgd.R index 8956a94..e967cb3 100644 --- a/R/read_shimadzu_qgd.R +++ b/R/read_shimadzu_qgd.R @@ -30,8 +30,9 @@ #' @param read_metadata Logical. Whether to attach metadata. #' @param metadata_format Format to output metadata. Either \code{chromconverter} #' or \code{raw}. -#' @return A 2D chromatogram from the chromatogram stream in \code{matrix} or -#' \code{data.frame} format, according to the value of \code{format_out}. +#' @return A 2D chromatogram from the chromatogram stream in \code{matrix}, +#' \code{data.frame}, or \code{data.table} format, according to the value of +#' \code{format_out}. #' The chromatograms will be returned in \code{wide} or \code{long} format #' according to the value of \code{data_format}. #' @note This parser is experimental and may still need some work. It is not @@ -39,14 +40,14 @@ #' @author Ethan Bass #' @export -read_shimadzu_qgd <- function(path, what = c("tic", "ms"), - format_out = c("matrix", "data.frame"), +read_shimadzu_qgd <- function(path, what = c("MS1", "TIC"), + format_out = c("matrix", "data.frame", "data.table"), data_format = c("wide", "long"), read_metadata = TRUE, metadata_format = c("chromconverter", "raw")){ - format_out <- match.arg(format_out, c("matrix", "data.frame")) + format_out <- check_format_out(format_out) data_format <- match.arg(data_format, c("wide", "long")) - what <- match.arg(what, c("tic","ms"), several.ok = TRUE) + what <- match.arg(toupper(what), c("MS1", "TIC"), several.ok = TRUE) metadata_format <- match.arg(metadata_format, c("chromconverter", "raw")) metadata_format <- switch(metadata_format, "chromconverter" = "shimadzu_lcd", "raw") @@ -55,19 +56,22 @@ read_shimadzu_qgd <- function(path, what = c("tic", "ms"), configure_python_environment(parser = "olefile") } - if ("tic" %in% what){ - tic <- read_qgc_tic(path, format_out = format_out, data_format = data_format) + if ("TIC" %in% what){ + TIC <- read_qgc_tic(path, format_out = format_out, + data_format = data_format) } - if ("ms" %in% what){ - ms <- read_qgd_ms_stream(path, format_out = format_out) + if ("MS1" %in% what){ + MS1 <- read_qgd_ms_stream(path, format_out = format_out) } dat <- mget(what) if (read_metadata){ meta <- try(read_qgd_fp(path)) - dat <- attach_metadata(dat, meta, format_in = metadata_format, + dat <- lapply(dat, function(x){ + attach_metadata(x, meta, format_in = metadata_format, source_file = path, data_format = data_format, format_out = format_out) + }) } dat } @@ -75,7 +79,7 @@ read_shimadzu_qgd <- function(path, what = c("tic", "ms"), #' Read QGC total ion chromatogram #' @author Ethan Bass #' @noRd -read_qgc_tic <- function(path, format_out = c("matrix", "data.frame"), +read_qgc_tic <- function(path, format_out = "data.frame", data_format = c("wide", "long"), read_metadata = TRUE){ @@ -95,15 +99,8 @@ read_qgc_tic <- function(path, format_out = c("matrix", "data.frame"), rts <- read_qgd_retention_times(path) - if (data_format == "wide"){ - dat <- matrix(int, nrow = nval, ncol = 1, dimnames = list(rts, "int")) - } else if (data_format == "long"){ - dat <- cbind(rts, int) - colnames(dat) <- c("rt", "int") - } - if (format_out == "data.frame"){ - dat <- as.data.frame(dat) - } + dat <- format_2d_chromatogram(rt = rts, int = int, format_out = format_out, + data_format = data_format) dat } @@ -124,7 +121,7 @@ read_qgd_ms_block <- function(f){ readBin(f, "integer", size = 4, endian = "little", n = 2) #skip mat <- matrix(NA, nrow = nval, ncol = 4, - dimnames = list(NULL, c("scan", "rt", "mz", "int"))) + dimnames = list(NULL, c("scan", "rt", "mz", "intensity"))) # we have to add a byte of 00s for odd numbers of bytes because R can't deal # with integers that have odd numbers of bytes add_byte <- n_bytes %% 2 == 1 @@ -146,8 +143,6 @@ read_qgd_ms_block <- function(f){ mat } -# what are time units? - #' Read 'Shimadzu QGD' retention times #' Retention times are stored in the "GCMS Raw Data/Retention Time" stream as #' a series of 4-byte, little-endian integers. @@ -162,7 +157,7 @@ read_qgd_retention_times <- function(path){ n_val <- last_byte/4 seek(f, 0, origin = "start") - rts <- readBin(f, what = "integer", size = 4, n = n_val, endian = "little")/60 + rts <- readBin(f, what = "integer", size = 4, n = n_val, endian = "little") rts } @@ -171,8 +166,8 @@ read_qgd_retention_times <- function(path){ #' @param path Path to 'Shimadzu' QGD file. #' @author Ethan Bass #' @noRd -read_qgd_ms_stream <- function(path, format_out = c("matrix", "data.frame")){ - format_out <- match.arg(format_out, c("matrix", "data.frame")) +read_qgd_ms_stream <- function(path, format_out = "data.frame"){ + format_out <- check_format_out(format_out) rts <- read_qgd_retention_times(path) @@ -183,11 +178,9 @@ read_qgd_ms_stream <- function(path, format_out = c("matrix", "data.frame")){ xx <- lapply(seq_along(rts), function(i){ read_qgd_ms_block(f) }) - mat <- do.call(rbind, xx) - if (format_out == "data.frame"){ - mat <- as.data.frame(mat) - } - mat + dat <- do.call(rbind, xx) + dat <- convert_chrom_format(dat, format_out = format_out) + dat } diff --git a/R/read_varian_sms.R b/R/read_varian_sms.R index f4cfc8c..b24739e 100644 --- a/R/read_varian_sms.R +++ b/R/read_varian_sms.R @@ -51,13 +51,13 @@ #' this file format. #' @export -read_varian_sms <- function(path, what = c("chrom", "MS1"), - format_out = c("matrix", "data.frame"), +read_varian_sms <- function(path, what = c("MS1", "TIC", "BPC"), + format_out = c("matrix", "data.frame", "data.table"), data_format = c("wide", "long"), read_metadata = TRUE, collapse = TRUE){ - what <- match.arg(what, c("chroms", "MS1"), several.ok = TRUE) - format_out <- match.arg(format_out, c("matrix", "data.frame")) + what <- match.arg(what, c("MS1", "TIC", "BPC", "chroms"), several.ok = TRUE) + format_out <- check_format_out(format_out) data_format <- match.arg(data_format, c("wide", "long")) f <- file(path, "rb") @@ -65,19 +65,31 @@ read_varian_sms <- function(path, what = c("chrom", "MS1"), meta <- read_varian_msdata_header(f) - chroms <- read_varian_chromatograms(f, n_time = meta$n_scan) + chroms <- read_varian_chromatograms(f, n_time = meta$n_scan, + format_out = format_out, + data_format = "long") skip_null_bytes(f) acq_delay <- max(which(chroms[, "tic"] == 0)) n_scans <- nrow(chroms) - acq_delay if ("MS1" %in% what){ - MS1 <- read_varian_ms_stream(f, n_scans = n_scans) + MS1 <- read_varian_ms_stream(f, n_scans = n_scans, format_out = format_out) MS1[,1] <- chroms[(MS1[,1] + acq_delay), "rt"] - colnames(MS1) <- c('rt', 'mz', 'int') + colnames(MS1) <- c("rt", "mz", "intensity") } + if (any(what == "TIC")){ + TIC <- format_2d_chromatogram(rt = chroms[,"rt"], int = chroms[,"tic"], + data_format = "long", + format_out = format_out) + } + if (any(what == "BPC")){ + BPC <- format_2d_chromatogram(rt = chroms[,"rt"], int = chroms[,"bpc"], + data_format = "long", + format_out = format_out) + } dat <- mget(what) if (collapse) dat <- collapse_list(dat) @@ -90,9 +102,11 @@ read_varian_sms <- function(path, what = c("chrom", "MS1"), meta <- read_mod_metadata(f, offsets, meta) - dat <- attach_metadata(dat, meta, format_in = "varian_sms", - format_out = format_out, data_format = data_format, + dat <- lapply(dat, function(x){ + attach_metadata(x, meta, format_in = "varian_sms", + format_out = format_out, data_format = "long", source_file = path) + }) } dat } @@ -137,22 +151,30 @@ read_mod_metadata <- function(f, offsets, meta){ #' Read 'Varian Workstation' Chromatograms #' @param f Connection to a 'Varian' SMS file opened to the beginning of the #' chromatogram. +#' @param format_out Matrix or data.frame. +#' @param data_format Either \code{wide} (default) or \code{long}. #' @author Ethan Bass #' @noRd -read_varian_chromatograms <- function(f, n_time){ - mat <- matrix(NA, nrow = n_time, ncol = 5) - colnames(mat) <- c("scan", "rt", "tic", "bpc", "ion_time") +read_varian_chromatograms <- function(f, n_time, format_out = "data.frame", + data_format = "wide"){ + dat <- matrix(NA, nrow = n_time, ncol = 5) + colnames(dat) <- c("scan", "rt", "tic", "bpc", "ion_time") for (i in seq_len(n_time)){ - mat[i, "scan"] <- readBin(f, what="integer", size = 4, endian = "little") - mat[i, "rt"] <- readBin(f, what = "double", size = 8, endian = "little") - mat[i, "ion_time"] <- readBin(f, what = "integer", size = 2, signed = FALSE, + dat[i, "scan"] <- readBin(f, what = "integer", size = 4, endian = "little") + dat[i, "rt"] <- readBin(f, what = "double", size = 8, endian = "little") + dat[i, "ion_time"] <- readBin(f, what = "integer", size = 2, signed = FALSE, endian = "little") - mat[i, "tic"] <- readBin(f, what = "integer", size = 4, endian = "little") + dat[i, "tic"] <- readBin(f, what = "integer", size = 4, endian = "little") readBin(f, what = "raw", n = 6) # skip six unidentified bytes - mat[i, "bpc"] <- readBin(f, what="integer", size = 4, endian = "little") + dat[i, "bpc"] <- readBin(f, what = "integer", size = 4, endian = "little") readBin(f, what = "raw", n = 11) # skip 11 unidentified bytes } - mat + if (data_format == "wide"){ + rownames(dat) <- dat[,"rt"] + dat <- dat[,-2] + } + dat <- convert_chrom_format(dat, format_out = format_out) + dat } #' Read 'Varian' MS stream @@ -160,14 +182,19 @@ read_varian_chromatograms <- function(f, n_time){ #' mass spectra stream. #' @author Ethan Bass #' @noRd -read_varian_ms_stream <- function(f, n_scans){ +read_varian_ms_stream <- function(f, n_scans, format_out = "data.frame", + data_format = "wide"){ xx <- lapply(seq_len(n_scans), function(i){ xx <- read_varian_ms_block(f) cbind(scan = i, xx) }) - do.call(rbind, xx) + dat <- do.call(rbind, xx) + convert_chrom_format(dat, format_out = format_out) + dat } + + #' Read 'Varian' MS block #' @author Ethan Bass #' @noRd @@ -294,11 +321,13 @@ read_varian_msdata_header <- function(f){ u1 <- readBin(f, what = "integer", size = 2, endian = "little", signed = FALSE) - t2 <- readBin(f, what = "raw", n=4, endian = "little") - t2 <- as.POSIXct(strtoi(paste(c(t2[2],t2[1], t2[3:4]), collapse = ""), 16)) + t2 <- readBin(f, what = "raw", n = 4, endian = "little") + t2 <- as.POSIXct(strtoi(paste(c(t2[2], t2[1], t2[3:4]), collapse = ""), 16), + tz = "UTC") - t1 <- readBin(f, what = "raw", n=4, endian = "little") - t1 <- as.POSIXct(strtoi(paste(c(t1[2],t1[1], t1[3:4]), collapse = ""), 16)) + t1 <- readBin(f, what = "raw", n = 4, endian = "little") + t1 <- as.POSIXct(strtoi(paste(c(t1[2], t1[1], t1[3:4]), collapse = ""), 16), + tz = "UTC") u2 <- readBin(f, what = "integer", size = 2, endian = "little", signed = FALSE) @@ -331,7 +360,7 @@ read_varian_msdata_header <- function(f){ readBin(f, what = "raw", n = 12) #skip # reader segment headers - seg_no <- readBin(f, what="integer", size = 2) + seg_no <- readBin(f, what = "integer", size = 2) segment_metadata <- list() i <- 1 while(seg_no == i){ @@ -369,7 +398,7 @@ read_varian_msdata_header <- function(f){ endian = "little", signed = FALSE) i <- i + 1 } - readBin(f, what="raw", n = 6) + readBin(f, what = "raw", n = 6) mget(c("ion_time", "emission_current", "dac", "u1", "t1", "t2", "u2", "n_scan", "max_ric_scan", "max_ric_val", "u3", "u4", "u5", "segment_metadata")) } diff --git a/R/read_waters_arw.R b/R/read_waters_arw.R index 5ab7467..5012fd9 100644 --- a/R/read_waters_arw.R +++ b/R/read_waters_arw.R @@ -8,7 +8,8 @@ #' @name read_waters_arw #' @importFrom utils tail read.csv #' @param path Path to file -#' @param format_out R format. Either \code{matrix} or \code{data.frame}. +#' @param format_out Class of output. Either \code{matrix}, \code{data.frame}, +#' or \code{data.table}. #' @param data_format Whether to return data in \code{wide} or \code{long} format. #' @param read_metadata Whether to read metadata from file. #' @param metadata_format Format to output metadata. Either \code{chromconverter} @@ -18,11 +19,11 @@ #' @author Ethan Bass #' @export -read_waters_arw <- function(path, format_out = c("matrix", "data.frame"), +read_waters_arw <- function(path, format_out = c("matrix", "data.frame", "data.table"), data_format = c("wide", "long"), read_metadata = TRUE, metadata_format = c("chromconverter", "raw")){ - format_out <- match.arg(format_out, c("matrix", "data.frame")) + format_out <- check_format_out(format_out) data_format <- match.arg(data_format, c("wide", "long")) metadata_format <- match.arg(metadata_format, c("chromconverter", "raw")) metadata_format <- switch(metadata_format, @@ -42,12 +43,10 @@ read_waters_arw <- function(path, format_out = c("matrix", "data.frame"), } else if (ncol(x) == 1){ colnames(x) <- "Intensity" if (data_format == "long"){ - x <- data.frame(RT = rownames(x), Intensity = x[,1]) + x <- data.frame(rt = rownames(x), intensity = x[,1]) } } - if (format_out == "matrix"){ - x <- as.matrix(x) - } + x <- convert_chrom_format(x, format_out = format_out) if (read_metadata){ meta <- try(read_waters_metadata(path)) if (!inherits(meta, "try-error")){ diff --git a/R/read_waters_raw.R b/R/read_waters_raw.R index 1f481d3..08b9822 100644 --- a/R/read_waters_raw.R +++ b/R/read_waters_raw.R @@ -3,7 +3,8 @@ #' Parser for reading 'Waters MassLynx (.raw) files into R. #' #' @param path Path to \code{.raw} file. -#' @param format_out Matrix or data.frame. +#' @param format_out Class of output. Either \code{matrix}, \code{data.frame}, +#' or \code{data.table}. #' @param data_format Either \code{wide} (default) or \code{long}. #' @param read_metadata Logical. Whether to attach metadata. #' @param metadata_format Format to output metadata. Either \code{chromconverter} @@ -11,21 +12,21 @@ #' @return A chromatogram in the format specified by \code{format_out} #' (retention time x wavelength). #' @note For now this parser only reads 1D chromatograms (not mass spectra or -#' DAD data). +#' DAD data) and does not support parsing of metadata from 'Waters' RAW files. #' @author Ethan Bass #' @export -read_waters_raw <- function(path, format_out = c("matrix", "data.frame"), +read_waters_raw <- function(path, format_out = c("matrix", "data.frame", "data.table"), data_format = c("wide", "long"), read_metadata = TRUE, metadata_format = c("chromconverter", "raw")){ - format_out <- match.arg(format_out, c("matrix", "data.frame")) + format_out <- check_format_out(format_out) data_format <- match.arg(data_format, c("wide", "long")) metadata_format <- match.arg(metadata_format, c("chromconverter", "raw")) uv_paths <- list.files(path, pattern="_CHRO", full.names = TRUE) - meta_path <- grep("\\.INF", uv_paths, value=TRUE) + meta_path <- grep("\\.INF", uv_paths, value = TRUE) uv_paths <- grep("\\.INF", uv_paths, invert = TRUE, value = TRUE) dat <- lapply(uv_paths, read_waters_chro, format_out = format_out, @@ -34,9 +35,9 @@ read_waters_raw <- function(path, format_out = c("matrix", "data.frame"), meta <- readLines(meta_path, skipNul = TRUE, warn = FALSE, encoding = "Latin-1") - meta <- iconv(meta, sub="") + meta <- iconv(meta, sub = "") meta <- strsplit(meta,"\\([0-9]\\)")[[1]][-1] - meta <- gsub("^ |\\$CC\\$","", sapply(strsplit(meta, ","), function(x) x[1])) + meta <- gsub("^ |\\$CC\\$", "", sapply(strsplit(meta, ","), function(x) x[1])) names(dat) <- meta dat @@ -60,12 +61,11 @@ read_waters_raw <- function(path, format_out = c("matrix", "data.frame"), #magic 80000100 08000200 -read_waters_chro <- function(path, format_out = c("matrix", "data.frame"), +read_waters_chro <- function(path, format_out = "data.frame", data_format = c("wide", "long"), read_metadata = TRUE, metadata_format = c("chromconverter", "raw")){ - format_out <- match.arg(format_out, c("matrix", "data.frame")) data_format <- match.arg(data_format, c("wide", "long")) metadata_format <- match.arg(metadata_format, c("chromconverter", "raw")) # metadata_format <- switch(metadata_format, @@ -80,16 +80,10 @@ read_waters_chro <- function(path, format_out = c("matrix", "data.frame"), seek(f, 128, "start") start <- seek(f, 128, "start") - x<-readBin(f, "numeric", size = 4, n = (end-start)/4) + x <- readBin(f, "numeric", size = 4, n = (end - start)/4) times <- x[seq(1, length(x), by = 2)] int <- x[seq(2, length(x), by = 2)] - if (data_format == "long"){ - dat <- data.frame(times = times, int = int) - } else { - dat <- data.frame(row.names = times, int = int) - } - if (format_out == "matrix"){ - dat <- as.matrix(dat) - } + dat <- format_2d_chromatogram(rt = times, int = int, + data_format = data_format, format_out = format_out) dat } diff --git a/R/reshape_chroms.R b/R/reshape_chroms.R index 2e3221e..518ca78 100644 --- a/R/reshape_chroms.R +++ b/R/reshape_chroms.R @@ -21,14 +21,7 @@ reshape_chroms <- function(x, idx, sample_var = "sample", lambdas = NULL, idx <- seq_along(x) } dat <- lapply(idx, function(i){ - if (is.null(lambdas)){ - if (data_format == "wide"){ - lambda.idx <- grep("lambda", colnames(x[[i]])) - lambdas <- unique(as.data.frame(x[[i]])[, lambda.idx]) - } else if (data_format == "long"){ - lambdas <- colnames(x[[i]]) - } - } + xx <- reshape_chrom(x[[i]], lambdas = lambdas, data_format = data_format, ...) if (data_format == "long"){ @@ -62,7 +55,7 @@ reshape_chrom <- function(x, data_format, ...){ #' @return A chromatographic matrix in long format. #' @author Ethan Bass #' @noRd -reshape_chrom_long <- function(x, lambdas, format_out = NULL, names_to = "lambda"){ +reshape_chrom_long <- function(x, lambdas = NULL, format_out = NULL, names_to = "lambda"){ if (!is.null(attr(x, "data_format")) && attr(x, "data_format") == "long"){ warning("The data already appear to be in long format!", immediate. = TRUE) } @@ -74,10 +67,10 @@ reshape_chrom_long <- function(x, lambdas, format_out = NULL, names_to = "lambda xx <- as.data.frame(x) if (ncol(x) == 1){ - data <- data.frame(RT = as.numeric(rownames(xx)), Intensity = xx[,1], + data <- data.frame(rt = as.numeric(rownames(xx)), int = xx[,1], row.names = NULL) } else { - if (!missing(lambdas)){ + if (!is.null(lambdas)){ xx <- xx[,lambdas, drop = FALSE] } data <- data.frame(tidyr::pivot_longer(data.frame(rt = rownames(xx), xx, @@ -96,19 +89,16 @@ reshape_chrom_long <- function(x, lambdas, format_out = NULL, names_to = "lambda #' Reshapes a single chromatogram from long to wide format #' @noRd -reshape_chrom_wide <- function(x, lambdas, lambda_var = "lambda", time_var="rt", - value_var = "int", drop){ +reshape_chrom_wide <- function(x, lambdas = NULL, lambda_var = "lambda", + time_var = "rt", value_var = "intensity", drop = NULL){ if (!is.null(attr(x, "data_format")) && attr(x, "data_format") == "wide"){ warning("The data already appear to be in wide format!", immediate. = TRUE) } - if (missing(drop)){ + if (is.null(drop)){ drop <- colnames(x)[which(sapply(x, is.character))] } - if (missing(value_var)){ - value_var <- colnames(x)[grep("int|abs", colnames(x),ignore.case = TRUE)] - } - if (!missing(lambdas)){ - x <- x[which(x[,lambda_var] %in% lambdas),] + if (!is.null(lambdas)){ + x <- x[which(x[[lambda_var]] %in% lambdas),] } x <- as.data.frame(x) data <- data.frame(tidyr::pivot_wider(x, id_cols = !!time_var, diff --git a/R/utils.R b/R/utils.R index 38416f2..8ade928 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,4 +1,46 @@ -utils::globalVariables(names = c('.')) +utils::globalVariables(names = c('.', "rt")) + +#' Check Format Out Argument +#' @noRd +check_format_out <- function(format_out){ + match.arg(format_out, c("matrix", "data.frame", "data.table")) +} + +#' Convert chromatogram format +#' @author Ethan Bass +#' @noRd +convert_chrom_format <- function(x, format_out){ + if (inherits(x, format_out)){ + return(x) + } else if (format_out == "matrix"){ + return(as.matrix(x)) + } else if (format_out == "data.frame"){ + return(as.data.frame(x)) + } else if (format_out == "data.table"){ + return(data.table::as.data.table(x)) + } +} + +#' Format 2D chromatogram +#' @noRd +format_2d_chromatogram <- function(rt, int, data_format, format_out){ + data_format <- match.arg(data_format, c("wide","long")) + format_out <- match.arg(format_out, c("matrix", "data.frame", "data.table")) + if (data_format == "wide" && any(duplicated(rt))){ + stop("Some row names are duplicated. Please use `long` format instead.") + } + if (data_format == "long"){ + dat <- data.frame(rt = rt, intensity = int) + } else{ + dat <- data.frame(intensity = int, row.names = rt) + } + if (format_out == "matrix"){ + dat <- as.matrix(dat) + } else if (format_out == "data.table"){ + data.table::setDT(dat) + } + dat +} #' Get filetype #' @noRd @@ -28,9 +70,9 @@ get_filetype <- function(path, out = c("format_in", "filetype")){ "x01/x38/x00/x00" = "chemstation_8", #81 "x03/x31/x38/x31" = "chemstation_181", #181 "x01/xa1/x46/x00" = "ThermoRAW", - "xd0/xcf/x11/xe0" = "ShimadzuLCD", - "x1c/x00/x09/x03" = "VarianSMS", - "x80/x00/x01/x00" = "WatersRAW", + "xd0/xcf/x11/xe0" = "shimadzu_ole", + "x1c/x00/x09/x03" = "varian_sms", + "x80/x00/x01/x00" = "waters_raw", "x43/x44/x46/x01" = "cdf" ) if (is.null(filetype)){ @@ -43,6 +85,12 @@ get_filetype <- function(path, out = c("format_in", "filetype")){ magic2 <- paste(magic2, collapse = "") filetype <- switch(magic2, "OL" = "openlab_131", "LC" = "chemstation_131") + } else if (filetype == "shimadzu_ole"){ + filetype <- paste("shimadzu", tolower(fs::path_ext(path)),sep = "_") + # fp <- read_sz_file_properties(path) + # filetype <- switch(fp$FileProperty.dwFileType, + # "67108895" = "shimadzu_lcd", + # "67108975" = "shimadzu_gcd") } format_in <- switch(filetype, "AgilentChemstationMS" = "chemstation", @@ -52,9 +100,7 @@ get_filetype <- function(path, out = c("format_in", "filetype")){ # "chemstation_131" = "chemstation_uv", # "openlab_131" = "chemstation_uv", "ThermoRAW" = "thermoraw", - "ShimadzuLCD" = "shimadzu_lcd", "VarianSMS" = "varian_sms", - "WatersRAW" = "waters_raw", filetype ) switch(out, "filetype" = filetype, "format_in" = format_in) @@ -63,7 +109,7 @@ get_filetype <- function(path, out = c("format_in", "filetype")){ #' Check parser #' @noRd check_parser <- function(format_in, parser = NULL, find = FALSE){ - allowed_formats <- list(openchrom = c("msd","csd","wsd"), + allowed_formats <- list(openchrom = c("msd", "csd", "wsd"), chromconverter = c("agilent_dx", "cdf", "chemstation_csv", "chemstation_ch", "chemstation_fid", "chemstation_uv", "chromeleon_uv", @@ -74,7 +120,8 @@ check_parser <- function(format_in, parser = NULL, find = FALSE){ "mzml", "mzxml", "mdf", "shimadzu_ascii", "shimadzu_dad", "shimadzu_fid", "shimadzu_gcd", - "shimadzu_lcd", "varian_sms", + "shimadzu_qgd", "shimadzu_lcd", + "varian_sms", "waters_arw", "waters_raw", "waters_chro"), aston = c("chemstation_uv", "chemstation_131", @@ -117,17 +164,25 @@ check_parser <- function(format_in, parser = NULL, find = FALSE){ } } } - possible_parsers[1] + parser <- possible_parsers[1] } else{ if (!(format_in %in% allowed_formats[[tolower(parser)]])){ stop("Mismatched arguments!", "\n\n", "The ", paste0(sQuote(format_in), " format can be converted using the following parsers: ", - paste(sQuote(names(allowed_formats)[grep(format_in, allowed_formats)]), collapse = ", "), ". \n \n", + paste(sQuote(names(allowed_formats)[grep(format_in, allowed_formats)]), + collapse = ", "), ". \n \n", "The ", sQuote(parser), " parser can take the following formats as inputs: \n", - paste(sQuote(allowed_formats[[parser]]), collapse=", "), ". \n \n", + paste(sQuote(allowed_formats[[parser]]), + collapse=", "), ". \n \n", "Please double check your arguments and try again.")) } } + if (parser == "entab" & !requireNamespace("entab", quietly = TRUE)) { + stop("The entab R package must be installed to use entab parsers: + install.packages('entab', repos='https://ethanbass.github.io/drat/')", + call. = FALSE) + } + return(parser) } #' Remove unicode characters @@ -175,11 +230,14 @@ format_to_extension <- function(format_in){ "shimadzu_fid" = "\\.txt$", "shimadzu_dad" = "\\.txt$", "shimadzu_lcd" = "\\.lcd$", + "shimadzu_gcd" = "\\.gcd$", + "shimadzu_qgd" = "\\.qgd", "chromeleon_uv" = "\\.txt$", "thermoraw" = "\\.raw$", "cdf" = "\\.cdf$", "mzml" = "\\.mzml$", "mzxml" = "\\.mzxml$", + "varian_sms" = "\\.sms$", "waters_arw" = "\\.arw$", "waters_raw" = "\\.raw$", "msd" = "\\.", diff --git a/man/call_entab.Rd b/man/call_entab.Rd index 56c2d56..a4e0d36 100644 --- a/man/call_entab.Rd +++ b/man/call_entab.Rd @@ -8,8 +8,8 @@ Converts files using Entab parsers} call_entab( path, data_format = c("wide", "long"), + format_out = c("matrix", "data.frame", "data.table"), format_in = "", - format_out = c("matrix", "data.frame"), read_metadata = TRUE, metadata_format = c("chromconverter", "raw") ) @@ -19,9 +19,10 @@ call_entab( \item{data_format}{Whether to return data in \code{wide} or \code{long} format.} -\item{format_in}{Format of input.} +\item{format_out}{Class of output. Either \code{matrix}, \code{data.frame}, +or \code{data.table}.} -\item{format_out}{R format. Either \code{matrix} or \code{data.frame}.} +\item{format_in}{Format of input.} \item{read_metadata}{Whether to read metadata from file.} diff --git a/man/call_openchrom.Rd b/man/call_openchrom.Rd index 25a41cb..68566d1 100644 --- a/man/call_openchrom.Rd +++ b/man/call_openchrom.Rd @@ -8,7 +8,8 @@ call_openchrom( files, path_out = NULL, format_in, - format_out = c("matrix", "data.frame"), + format_out = c("matrix", "data.frame", "data.table"), + data_format = c("wide", "long"), export_format = c("mzml", "csv", "cdf", "animl"), return_paths = FALSE, verbose = getOption("verbose") @@ -22,7 +23,10 @@ call_openchrom( \item{format_in}{Either \code{msd} for mass spectrometry data, \code{csd} for flame ionization data, or \code{wsd} for DAD/UV data.} -\item{format_out}{R format. Either \code{matrix} or \code{data.frame}.} +\item{format_out}{R format. Either \code{matrix}, \code{data.frame} or +\code{data.table}.} + +\item{data_format}{Whether to return data in \code{wide} or \code{long} format.} \item{export_format}{Either \code{mzml}, \code{csv}, \code{cdf}, \code{animl}. Defaults to \code{mzml}.} diff --git a/man/call_rainbow.Rd b/man/call_rainbow.Rd index 4581098..1a6b939 100644 --- a/man/call_rainbow.Rd +++ b/man/call_rainbow.Rd @@ -8,11 +8,12 @@ call_rainbow( path, format_in = c("agilent_d", "waters_raw", "masshunter", "chemstation", "chemstation_uv", "chemstation_fid"), - format_out = c("matrix", "data.frame"), + format_out = c("matrix", "data.frame", "data.table"), data_format = c("wide", "long"), by = c("detector", "name"), what = NULL, read_metadata = TRUE, + metadata_format = c("chromconverter", "raw"), collapse = TRUE, precision = 1 ) @@ -23,7 +24,8 @@ call_rainbow( \item{format_in}{Format of the supplied files. Either \code{agilent_d}, \code{waters_raw}, or \code{chemstation}.} -\item{format_out}{R format. Either \code{matrix} or \code{data.frame}.} +\item{format_out}{R format. Either \code{matrix}, \code{data.frame}, or +\code{data.table}.} \item{data_format}{Whether to return data in wide or long format.} @@ -35,6 +37,9 @@ call_rainbow( \item{read_metadata}{Logical. Whether to attach metadata. Defaults to TRUE.} +\item{metadata_format}{Format to output metadata. Either \code{chromconverter} +or \code{raw}.} + \item{collapse}{Logical. Whether to collapse lists that only contain a single element.} diff --git a/man/read_agilent_dx.Rd b/man/read_agilent_dx.Rd index 04f7604..ea28cf5 100644 --- a/man/read_agilent_dx.Rd +++ b/man/read_agilent_dx.Rd @@ -7,7 +7,7 @@ read_agilent_dx( path, path_out = NULL, - format_out = c("matrix", "data.frame"), + format_out = c("matrix", "data.frame", "data.table"), data_format = c("wide", "long"), read_metadata = TRUE ) @@ -17,7 +17,8 @@ read_agilent_dx( \item{path_out}{Path to directory to export unzipped files.} -\item{format_out}{Matrix or data.frame.} +\item{format_out}{Class of output. Either \code{matrix}, \code{data.frame}, +or \code{data.table}.} \item{data_format}{Whether to return data in \code{wide} or \code{long} format.} diff --git a/man/read_cdf.Rd b/man/read_cdf.Rd index f7d2609..cace1e4 100644 --- a/man/read_cdf.Rd +++ b/man/read_cdf.Rd @@ -18,7 +18,8 @@ read_cdf( \arguments{ \item{path}{Path to ANDI netCDF file.} -\item{format_out}{R format. Either \code{matrix} or \code{data.frame}.} +\item{format_out}{Class of output. Either \code{matrix}, \code{data.frame}, +or \code{\link[data.table]{data.table}}.} \item{data_format}{Whether to return data in \code{wide} or \code{long} format. For 2D files, "long" format returns the retention time as the first column of diff --git a/man/read_chemstation_ch.Rd b/man/read_chemstation_ch.Rd index 3b001f5..34eb67c 100644 --- a/man/read_chemstation_ch.Rd +++ b/man/read_chemstation_ch.Rd @@ -6,7 +6,7 @@ \usage{ read_chemstation_ch( path, - format_out = c("matrix", "data.frame"), + format_out = c("matrix", "data.frame", "data.table"), data_format = c("wide", "long"), read_metadata = TRUE, metadata_format = c("chromconverter", "raw"), @@ -16,7 +16,8 @@ read_chemstation_ch( \arguments{ \item{path}{Path to \code{.ch} file} -\item{format_out}{Matrix or data.frame.} +\item{format_out}{Class of output. Either \code{matrix}, \code{data.frame}, +or \code{data.table}.} \item{data_format}{Whether to return data in \code{wide} or \code{long} format.} diff --git a/man/read_chemstation_csv.Rd b/man/read_chemstation_csv.Rd index ae0c935..c05d0ab 100644 --- a/man/read_chemstation_csv.Rd +++ b/man/read_chemstation_csv.Rd @@ -4,12 +4,16 @@ \alias{read_chemstation_csv} \title{Read Chemstation CSV} \usage{ -read_chemstation_csv(path, format_out = c("matrix", "data.frame")) +read_chemstation_csv( + path, + format_out = c("matrix", "data.frame", "data.table") +) } \arguments{ \item{path}{Path to file} -\item{format_out}{R format. Either \code{matrix} or \code{data.frame}.} +\item{format_out}{Class of output. Either \code{matrix}, \code{data.frame}, +or \code{data.table}.} } \value{ A chromatogram in the format specified by \code{format_out} diff --git a/man/read_chemstation_uv.Rd b/man/read_chemstation_uv.Rd index 7fe7bab..ef54e58 100644 --- a/man/read_chemstation_uv.Rd +++ b/man/read_chemstation_uv.Rd @@ -6,7 +6,7 @@ \usage{ read_chemstation_uv( path, - format_out = c("matrix", "data.frame"), + format_out = c("matrix", "data.frame", "data.table"), data_format = c("wide", "long"), read_metadata = TRUE, metadata_format = c("chromconverter", "raw"), @@ -16,7 +16,8 @@ read_chemstation_uv( \arguments{ \item{path}{Path to \code{.uv} file.} -\item{format_out}{Matrix or data.frame.} +\item{format_out}{Class of output. Either \code{matrix}, \code{data.frame}, +or \code{data.table}.} \item{data_format}{Either \code{wide} (default) or \code{long}.} diff --git a/man/read_chromeleon.Rd b/man/read_chromeleon.Rd index 1688f83..2b01803 100644 --- a/man/read_chromeleon.Rd +++ b/man/read_chromeleon.Rd @@ -6,7 +6,7 @@ \usage{ read_chromeleon( path, - format_out = c("matrix", "data.frame"), + format_out = c("matrix", "data.frame", "data.table"), data_format = c("wide", "long"), read_metadata = TRUE, metadata_format = c("chromconverter", "raw") @@ -15,7 +15,8 @@ read_chromeleon( \arguments{ \item{path}{Path to file} -\item{format_out}{R format. Either \code{matrix} or \code{data.frame}.} +\item{format_out}{Class of output. Either \code{matrix}, \code{data.frame}, +or \code{data.table}.} \item{data_format}{Whether to return data in \code{wide} or \code{long} format.} diff --git a/man/read_chroms.Rd b/man/read_chroms.Rd index 8a131b3..32e36c9 100644 --- a/man/read_chroms.Rd +++ b/man/read_chroms.Rd @@ -14,7 +14,7 @@ read_chroms( find_files, pattern = NULL, parser = c("", "chromconverter", "aston", "entab", "thermoraw", "openchrom", "rainbow"), - format_out = c("matrix", "data.frame"), + format_out = c("matrix", "data.frame", "data.table"), data_format = c("wide", "long"), path_out = NULL, export_format = c("", "csv", "chemstation_csv", "cdf", "mzml", "animl"), @@ -52,7 +52,8 @@ case file extension will be deduced from \code{format_in}.} \code{chromconverter}, \code{aston}, \code{entab}, \code{thermoraw}, \code{openchrom}, or \code{rainbow}.} -\item{format_out}{Class of output (i.e. data.frame or matrix).} +\item{format_out}{Class of output. Either \code{matrix}, \code{data.frame}, +or \code{\link[data.table]{data.table}}.} \item{data_format}{Whether to output data in wide or long format. Either \code{wide} or \code{long}.} @@ -112,10 +113,10 @@ Provides a unified interface to all chromConverter parsers. Currently recognizes 'Agilent ChemStation' (\code{.uv}, \code{.ch}, \code{.dx}), 'Agilent MassHunter' (\code{.dad}), 'Thermo RAW' (\code{.raw}), 'Waters ARW' (\code{.arw}), 'Waters RAW' (\code{.raw}), 'Chromeleon ASCII' (\code{.txt}), 'Shimadzu ASCII' -(\code{.txt}), 'Shimadzu GCD', and 'Shimadzu LCD' files (preliminary support). -Also, wraps 'OpenChrom' parsers, which include many additional formats. To use 'Entab', -'ThermoRawFileParser', or 'OpenChrom' parsers, they must be manually installed. -Please see the instructions in the +(\code{.txt}), 'Shimadzu GCD', 'Shimadzu LCD' (DAD and chromatogram streams) +and 'Shimadzu QGD' files. Also, wraps 'OpenChrom' parsers, which include many +additional formats. To use 'Entab', 'ThermoRawFileParser', or 'OpenChrom' +parsers, they must be manually installed. Please see the instructions in the \href{https://ethanbass.github.io/chromConverter/}{README} for further details. If paths to individual files are provided, \code{read_chroms} will try to diff --git a/man/read_mdf.Rd b/man/read_mdf.Rd index 4646bba..6b8b819 100644 --- a/man/read_mdf.Rd +++ b/man/read_mdf.Rd @@ -6,7 +6,7 @@ \usage{ read_mdf( path, - format_out = c("matrix", "data.frame"), + format_out = c("matrix", "data.frame", "data.table"), data_format = c("wide", "long"), read_metadata = TRUE ) @@ -14,7 +14,8 @@ read_mdf( \arguments{ \item{path}{The path to a 'Lumex' \code{.mdf} file.} -\item{format_out}{R format. Either \code{matrix} or \code{data.frame}.} +\item{format_out}{Class of output. Either \code{matrix}, \code{data.frame}, +or \code{data.table}.} \item{data_format}{Whether to return data in \code{wide} or \code{long} format.} diff --git a/man/read_mzml.Rd b/man/read_mzml.Rd index bd82f4f..72c94f8 100644 --- a/man/read_mzml.Rd +++ b/man/read_mzml.Rd @@ -6,7 +6,7 @@ \usage{ read_mzml( path, - format_out = c("matrix", "data.frame"), + format_out = c("matrix", "data.frame", "data.table"), data_format = c("long", "wide"), parser = c("RaMS", "mzR"), what = c("MS1", "MS2", "BPC", "TIC", "DAD", "chroms", "metadata", "everything"), @@ -17,9 +17,9 @@ read_mzml( \arguments{ \item{path}{path to file} -\item{format_out}{R format. Only applies if \code{mzR} is selected. -Either \code{matrix} or \code{data.frame}. \code{RaMS} will return -a list of data.tables regardless of what is selected here.} +\item{format_out}{Class of output. Only applies if \code{mzR} is selected. +Either \code{matrix}, \code{data.frame}, or \code{data.table}. \code{RaMS} +will return a list of data.tables regardless of what is selected here.} \item{data_format}{Whether to return data in \code{wide} or \code{long} format.} diff --git a/man/read_shimadzu_gcd.Rd b/man/read_shimadzu_gcd.Rd index 01a3f80..ee77795 100644 --- a/man/read_shimadzu_gcd.Rd +++ b/man/read_shimadzu_gcd.Rd @@ -6,7 +6,7 @@ \usage{ read_shimadzu_gcd( path, - format_out = c("matrix", "data.frame"), + format_out = c("matrix", "data.frame", "data.table"), data_format = c("wide", "long"), read_metadata = TRUE, metadata_format = c("chromconverter", "raw") @@ -15,7 +15,8 @@ read_shimadzu_gcd( \arguments{ \item{path}{Path to GCD file.} -\item{format_out}{Matrix or data.frame.} +\item{format_out}{Class of output. Either \code{matrix}, \code{data.frame}, +or \code{data.table}.} \item{data_format}{Either \code{wide} (default) or \code{long}.} diff --git a/man/read_shimadzu_lcd.Rd b/man/read_shimadzu_lcd.Rd index 41f34ed..b255f9c 100644 --- a/man/read_shimadzu_lcd.Rd +++ b/man/read_shimadzu_lcd.Rd @@ -7,7 +7,7 @@ read_shimadzu_lcd( path, what, - format_out = c("matrix", "data.frame"), + format_out = c("matrix", "data.frame", "data.table"), data_format = c("wide", "long"), read_metadata = TRUE, metadata_format = c("chromconverter", "raw"), diff --git a/man/read_shimadzu_qgd.Rd b/man/read_shimadzu_qgd.Rd index f04c31e..4980447 100644 --- a/man/read_shimadzu_qgd.Rd +++ b/man/read_shimadzu_qgd.Rd @@ -6,8 +6,8 @@ \usage{ read_shimadzu_qgd( path, - what = c("tic", "ms"), - format_out = c("matrix", "data.frame"), + what = c("MS1", "TIC"), + format_out = c("matrix", "data.frame", "data.table"), data_format = c("wide", "long"), read_metadata = TRUE, metadata_format = c("chromconverter", "raw") @@ -30,8 +30,9 @@ streams.} or \code{raw}.} } \value{ -A 2D chromatogram from the chromatogram stream in \code{matrix} or -\code{data.frame} format, according to the value of \code{format_out}. +A 2D chromatogram from the chromatogram stream in \code{matrix}, +\code{data.frame}, or \code{data.table} format, according to the value of +\code{format_out}. The chromatograms will be returned in \code{wide} or \code{long} format according to the value of \code{data_format}. } diff --git a/man/read_sz_lcd_2d.Rd b/man/read_sz_lcd_2d.Rd index b19a122..c66f3cf 100644 --- a/man/read_sz_lcd_2d.Rd +++ b/man/read_sz_lcd_2d.Rd @@ -6,7 +6,7 @@ \usage{ read_sz_lcd_2d( path, - format_out = "matrix", + format_out = "data.frame", data_format = "wide", read_metadata = TRUE, metadata_format = "shimadzu_lcd", diff --git a/man/read_sz_tic.Rd b/man/read_sz_tic.Rd index 2f6ada1..36ada1b 100644 --- a/man/read_sz_tic.Rd +++ b/man/read_sz_tic.Rd @@ -16,7 +16,7 @@ is the intensity.} \usage{ read_sz_tic( path, - format_out = c("matrix", "data.frame"), + format_out = "data.frame", data_format = c("wide", "long"), read_metadata = TRUE ) diff --git a/man/read_varian_sms.Rd b/man/read_varian_sms.Rd index 78f5af4..1632380 100644 --- a/man/read_varian_sms.Rd +++ b/man/read_varian_sms.Rd @@ -6,8 +6,8 @@ \usage{ read_varian_sms( path, - what = c("chrom", "MS1"), - format_out = c("matrix", "data.frame"), + what = c("MS1", "TIC", "BPC"), + format_out = c("matrix", "data.frame", "data.table"), data_format = c("wide", "long"), read_metadata = TRUE, collapse = TRUE diff --git a/man/read_waters_arw.Rd b/man/read_waters_arw.Rd index dc20652..edc4073 100644 --- a/man/read_waters_arw.Rd +++ b/man/read_waters_arw.Rd @@ -6,7 +6,7 @@ \usage{ read_waters_arw( path, - format_out = c("matrix", "data.frame"), + format_out = c("matrix", "data.frame", "data.table"), data_format = c("wide", "long"), read_metadata = TRUE, metadata_format = c("chromconverter", "raw") @@ -15,7 +15,8 @@ read_waters_arw( \arguments{ \item{path}{Path to file} -\item{format_out}{R format. Either \code{matrix} or \code{data.frame}.} +\item{format_out}{Class of output. Either \code{matrix}, \code{data.frame}, +or \code{data.table}.} \item{data_format}{Whether to return data in \code{wide} or \code{long} format.} diff --git a/man/read_waters_raw.Rd b/man/read_waters_raw.Rd index 14efc61..088a7c0 100644 --- a/man/read_waters_raw.Rd +++ b/man/read_waters_raw.Rd @@ -6,7 +6,7 @@ \usage{ read_waters_raw( path, - format_out = c("matrix", "data.frame"), + format_out = c("matrix", "data.frame", "data.table"), data_format = c("wide", "long"), read_metadata = TRUE, metadata_format = c("chromconverter", "raw") @@ -15,7 +15,8 @@ read_waters_raw( \arguments{ \item{path}{Path to \code{.raw} file.} -\item{format_out}{Matrix or data.frame.} +\item{format_out}{Class of output. Either \code{matrix}, \code{data.frame}, +or \code{data.table}.} \item{data_format}{Either \code{wide} (default) or \code{long}.} @@ -33,7 +34,7 @@ Parser for reading 'Waters MassLynx (.raw) files into R. } \note{ For now this parser only reads 1D chromatograms (not mass spectra or -DAD data). +DAD data) and does not support parsing of metadata from 'Waters' RAW files. } \author{ Ethan Bass diff --git a/man/sp_converter.Rd b/man/sp_converter.Rd index 138b852..68d1838 100644 --- a/man/sp_converter.Rd +++ b/man/sp_converter.Rd @@ -6,7 +6,7 @@ \usage{ sp_converter( path, - format_out = c("matrix", "data.frame"), + format_out = c("matrix", "data.frame", "data.table"), data_format = c("wide", "long"), read_metadata = TRUE, metadata_format = c("chromconverter", "raw") @@ -15,7 +15,8 @@ sp_converter( \arguments{ \item{path}{Path to file} -\item{format_out}{R format. Either \code{matrix} or \code{data.frame}.} +\item{format_out}{Class of output. Either \code{matrix}, \code{data.frame}, +or \code{data.table}.} \item{data_format}{Whether to return data in \code{wide} or \code{long} format.} diff --git a/man/uv_converter.Rd b/man/uv_converter.Rd index e937b27..c20e40c 100644 --- a/man/uv_converter.Rd +++ b/man/uv_converter.Rd @@ -6,7 +6,7 @@ \usage{ uv_converter( path, - format_out = c("matrix", "data.frame"), + format_out = c("matrix", "data.frame", "data.table"), data_format = c("wide", "long"), correction = TRUE, read_metadata = TRUE, @@ -16,7 +16,8 @@ uv_converter( \arguments{ \item{path}{Path to file} -\item{format_out}{R format. Either \code{matrix} or \code{data.frame}.} +\item{format_out}{Class of output. Either \code{matrix}, \code{data.frame}, +or \code{data.table}.} \item{data_format}{Whether to return data in \code{wide} or \code{long} format.} diff --git a/man/write_mzml.Rd b/man/write_mzml.Rd new file mode 100644 index 0000000..6b694f3 --- /dev/null +++ b/man/write_mzml.Rd @@ -0,0 +1,67 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/write_mzml.R +\name{write_mzml} +\alias{write_mzml} +\title{Write mzML} +\usage{ +write_mzml( + data, + path_out, + what = NULL, + instrument_info = NULL, + compress = TRUE, + indexed = TRUE, + force = FALSE, + show_progress = TRUE, + verbose = getOption("verbose") +) +} +\arguments{ +\item{data}{List of data.frames or data.tables containing spectral data.} + +\item{path_out}{Path to write mzML file.} + +\item{what}{Which streams to write to mzML: \code{"ms1"}, \code{"ms2"}, +\code{"tic"}, \code{"bpc"}, and/or \code{"dad"}.} + +\item{instrument_info}{Instrument info to write to mzML file.} + +\item{compress}{Logical. Whether to use zlib compression. Defaults to +\code{TRUE}.} + +\item{indexed}{Logical. Whether to write indexed mzML. Defaults to +\code{TRUE}.} + +\item{force}{Logical. Whether to overwrite existing files at \code{path_out}. +Defaults to \code{FALSE}.} + +\item{show_progress}{Logical. Whether to show progress bar. Defaults to +\code{TRUE}.} + +\item{verbose}{Logical. Whether or not to print status messages.} +} +\value{ +Invisibly returns the path to the written mzML file. +} +\description{ +This function constructs mzML files using a streaming XML-writing approach, +generating and writing XML content directly to a file connection. This method +is memory-efficient and fast, making it well-suited for large mzML files. +However, it may be less flexible than DOM-based methods for handling complex +XML structures or large-scale modifications after writing has begun. +} +\details{ +This function constructs mzML files by writing XML strings directly to a file +connection. While this approach is memory-efficient and fast, it may be less +flexible for handling complex XML structures. + +The function supports writing various types of spectral data including MS1, +MS2, TIC (Total Ion Current), BPC (Base Peak Chromatogram), and DAD +(Diode Array Detector) data. + +If indexed = TRUE, the function will create an indexed mzML file, which +allows faster random access to spectra. +} +\author{ +Ethan Bass +}