diff --git a/DESCRIPTION b/DESCRIPTION index 0f19fa6..b843e1f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: chromConverter Title: Chromatographic File Converter -Version: 0.4.0 +Version: 0.4.1 Authors@R: c( person(given = "Ethan", family = "Bass", email = "ethanbass@gmail.com", role = c("aut", "cre"), diff --git a/NEWS.md b/NEWS.md index fa2bf9c..52e7092 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,21 @@ +## chromConverter 0.4.1 + +### New features + +* Added support for "Chemstation" UV (`.ch`) files (version 30). + +### Minor improvements + +* Updated `read_chromeleon` to better deal with comma decimal separators in metadata. +* Updated `read_chromeleon` to deal with more datetime formats. +* Updated `read_chromeleon` to deal with unicode microliters. +* Added tests for rainbow parser and `read_chemstation_ch`. + +### Bug fixes + +* Fixed bug preventing compilation of PDF manual. +* Fixed new bug causing failure to correctly read names of chemstation files from .D directory. + ## chromConverter 0.4.0 ### New features diff --git a/R/aston_parsers.R b/R/aston_parsers.R index 57f497c..35e44ad 100644 --- a/R/aston_parsers.R +++ b/R/aston_parsers.R @@ -121,11 +121,15 @@ trace_converter <- function(file, format_out = c("matrix", "data.frame"), #' #' Configures reticulate to use Aston file parsers. #' @name configure_aston -#' @return No return value. +#' @param return_boolean Logical. Whether to return a Boolean value indicating +#' if the chromConverter environment is correctly configured. +#' @return If \code{return_boolean} is \code{TRUE}, returns a Boolean value +#' indicating whether the chromConverter environment is configured correctly. +#' Otherwise, there is no return value. #' @author Ethan Bass #' @import reticulate #' @export -configure_aston <- function(){ +configure_aston <- function(return_boolean=FALSE){ install <- FALSE # path <- miniconda_path() if (!dir.exists(miniconda_path())){ @@ -146,6 +150,9 @@ configure_aston <- function(){ } } assign_trace_file() + if (return_boolean){ + return(env) + } } #' @noRd diff --git a/R/attach_metadata.R b/R/attach_metadata.R index 272a93c..02fc004 100644 --- a/R/attach_metadata.R +++ b/R/attach_metadata.R @@ -65,14 +65,27 @@ attach_metadata <- function(x, meta, format_in, format_out, data_format, parser parser = "chromConverter", format_out = format_out) }, "chromeleon" = { + datetime.idx <- unlist(sapply(c("Date$","Time$"), function(str) grep(str, names(meta)))) + datetime <- unlist(meta[datetime.idx]) + if (length(datetime > 1)){ + datetime <- paste(datetime, collapse=" ") + } + datetime <- as.POSIXct(datetime, format = c("%m/%d/%Y %H:%M:%S", "%d.%m.%Y %H:%M:%S", + "%m/%d/%Y %H:%M:%S %p %z")) + datetime <- datetime[!is.na(datetime)] + time_interval_unit <- tryCatch({ + get_time_unit(grep("Average Step", names(meta), value = TRUE)[1], + format_in = "chromeleon")}, error = function(err) NA) + time_unit <- tryCatch({ + get_time_unit(grep("Time Min.", names(meta), value = TRUE)[1], + format_in="chromeleon")}, error = function(err) NA) structure(x, instrument = NA, detector = meta$Detector, software = meta$`Generating Data System`, method = meta$`Instrument Method`, batch = NA, operator = meta$`Operator`, - run_datetime = as.POSIXct(paste(meta$`Injection Date`, meta$`Injection Time`), - format = "%m/%d/%Y %H:%M:%S"), + run_datetime = datetime, # run_date = meta$`Injection Date`, # run_time = meta$`Injection Time`, sample_name = meta$Injection, @@ -82,13 +95,9 @@ attach_metadata <- function(x, meta, format_in, format_out, data_format, parser time_range = c(meta$`Time Min. (min)`, meta$`Time Max. (min)`), # start_time = meta$`Time Min. (min)`, # end_time = meta$`Time Max. (min)`, - time_interval = meta$`Average Step (s)`, - time_interval_unit <- get_time_unit( - grep("Average Step", names(meta), value = TRUE)[1], - format_in = "chromeleon"), - time_unit = get_time_unit( - grep("Time Min.", names(meta), value = TRUE)[1], - format_in="chromeleon"), + time_interval = meta[[grep("Average Step", names(meta))]], + time_interval_unit = time_interval_unit, + time_unit = time_unit, # uniform_sampling = meta$`Min. Step (s)` == meta$`Max. Step (s)`, detector_range = NA, detector_unit = meta$`Signal Unit`, diff --git a/R/call_openchrom.R b/R/call_openchrom.R index fc3573f..ec11bd3 100644 --- a/R/call_openchrom.R +++ b/R/call_openchrom.R @@ -140,7 +140,9 @@ write_openchrom_batchfile <- function(files, path_out, #' @param path Path to 'OpenChrom' executable (Optional). The supplied path will #' overwrite the current path. #' @importFrom utils read.table write.table -#' @return Returns path to OpenChrom command-line application +#' @return If \code{cli} is set to \code{"status"}, returns a Boolean value +#' indicating whether 'OpenChrom' is configured correctly. Otherwise, returns +#' the path to OpenChrom command-line application. #' @author Ethan Bass #' @export diff --git a/R/call_rainbow.R b/R/call_rainbow.R index b634598..e279503 100644 --- a/R/call_rainbow.R +++ b/R/call_rainbow.R @@ -113,11 +113,15 @@ extract_rb_names <- function(xx){ #' #' Configures reticulate to use rainbow file parsers. #' @name configure_rainbow -#' @return No return value. +#' @param return_boolean Logical. Whether to return a Boolean value indicating +#' if the chromConverter environment is correctly configured. +#' @return If \code{return_boolean} is \code{TRUE}, returns a Boolean value +#' indicating whether the chromConverter environment is configured correctly. +#' Otherwise, there is no return value. #' @author Ethan Bass #' @import reticulate #' @export -configure_rainbow <- function(){ +configure_rainbow <- function(return_boolean = FALSE){ install <- FALSE if (!dir.exists(miniconda_path())){ install <- readline("It is recommended to install miniconda in your R library to use rainbow parsers. Install miniconda now? (y/n)") @@ -134,6 +138,9 @@ configure_rainbow <- function(){ } } assign_rb_read() + if (return_boolean){ + env + } } #' @noRd diff --git a/R/parsers.R b/R/parsers.R index 53aa4d1..670f705 100644 --- a/R/parsers.R +++ b/R/parsers.R @@ -16,10 +16,13 @@ read_chromeleon <- function(file, format_out = c("matrix","data.frame"), format_out <- match.arg(format_out, c("matrix","data.frame")) data_format <- match.arg(data_format, c("wide","long")) xx <- readLines(file) + xx <- remove_unicode_chars(xx) start <- tail(grep("Data:", xx), 1) - x <- read.csv(file, skip = start, sep="\t") + x <- read.csv(file, skip = start, sep="\t", row.names = NULL) x <- x[,-2, drop = FALSE] + x <- x[,colSums(is.na(x)) < nrow(x)] if (any(grepl(",",as.data.frame(x)[-1,2]))){ + decimal_separator <- "," x <- apply(x, 2, function(x) gsub("\\.", "", x)) x <- apply(x, 2, function(x) gsub(",", ".", x)) } @@ -34,6 +37,9 @@ read_chromeleon <- function(file, format_out = c("matrix","data.frame"), } if (read_metadata){ meta <- try(read_chromeleon_metadata(xx)) + if (decimal_separator == ","){ + meta <- lapply(meta, function(x) gsub(",",".",x)) + } if (!inherits(meta, "try-error")){ x <- attach_metadata(x, meta, format_in = "chromeleon", format_out = format_out, data_format = "wide", parser = "chromConverter", diff --git a/R/read_chemstation_ch.R b/R/read_chemstation_ch.R index 337378a..57443c4 100644 --- a/R/read_chemstation_ch.R +++ b/R/read_chemstation_ch.R @@ -24,11 +24,12 @@ read_chemstation_ch <- function(path, format_out = c("matrix","data.frame"), # HEADER seek(f, 1, "start") version <- readBin(f, "character", n = 1) - version <- match.arg(version, choices = c("8", "81", "130", "179", "181")) + version <- match.arg(version, choices = c("8", "81", "30", "130", "179", "181")) offsets <- get_agilent_offsets(version) decoder <- switch(version, "8" = decode_delta, "81" = decode_double_delta, + "30" = decode_delta, "130" = decode_delta, "181" = decode_double_delta, "179" = decode_double_array) @@ -46,7 +47,7 @@ read_chemstation_ch <- function(path, format_out = c("matrix","data.frame"), seek(f, where = 282, origin = "start") seek(f, where = 282, origin = "start") - if (version %in% c("8","130")){ + if (version %in% c("8", "30", "130")){ xmin <- as.double(readBin(f, "integer", n = 1, size = 4, signed = TRUE, endian = "big")) / 60000 xmax <- as.double(readBin(f, "integer", n = 1, size = 4, signed = TRUE, endian = "big")) / 60000 } else { @@ -74,6 +75,7 @@ read_chemstation_ch <- function(path, format_out = c("matrix","data.frame"), if (read_metadata){ meta_slots <- switch(version, "8" = 9, "81" = 9, + "30" = 11, "130" = 12, "181" = 9, "179" = 9) @@ -81,7 +83,11 @@ read_chemstation_ch <- function(path, format_out = c("matrix","data.frame"), meta <- lapply(offsets[seq_len(meta_slots)], function(offset){ seek(f, where = offset, origin = "start") n <- get_nchar(f) - cc_collapse(readBin(f, "character", n = n)) + if (version == "30"){ + readBin(f, what = "character") + } else{ + cc_collapse(readBin(f, "character", n = n)) + } }) if (read_metadata){ datetime_regex <- "(\\d{2}-[A-Za-z]{3}-\\d{2}, \\d{2}:\\d{2}:\\d{2})|(\\d{2}/\\d{2}/\\d{4} \\d{1,2}:\\d{2}:\\d{2} (?:AM|PM)?)" @@ -177,7 +183,7 @@ decode_double_array <- function(file, offset) { seek(file, offset, "start") signal <- readBin(file, what = "double", size = 4, endian = "little", n = (fsize - offset)) - signal <- signal[seq(2,length(signal),2)] + signal <- signal[seq(2, length(signal), 2)] return(signal) } @@ -264,12 +270,12 @@ get_agilent_offsets <- function(version){ software_revision = 3802, #'utf16' units = 4172, # 'utf16' signal = 4213, # 'utf16' - zero = 4110, # INT32 + intercept = 4110, # INT32 scaling_factor = 4732) #ENDIAN + 'd' } else if (version == 30){ offsets <- list( file_type = 4, # utf16 - # sample_name = 858, # utf16 + sample_name = 24, # utf16 operator = 148, # utf16 date = 178, # utf16 # inlet = 2492, # utf16 @@ -280,8 +286,8 @@ get_agilent_offsets <- function(version){ software_revision = 405, #'utf16' units = 580, # 'utf16' signal = 596, # 'utf16' - zero = 4110, # INT32 - scaling_factor = 4732, + intercept = 636, # INT32 + scaling_factor = 644, data_start = 1024 #ENDIAN + 'd' ) } else if (version %in% c("8","81")){ diff --git a/R/read_chroms.R b/R/read_chroms.R index 629125b..1ef8ef9 100644 --- a/R/read_chroms.R +++ b/R/read_chroms.R @@ -107,7 +107,7 @@ read_chroms <- function(paths, find_files, } if (length(format_in) > 1){ if (!find_files){ - format_in <- get_filetype(ifelse(length(paths)>1, paths[[1]], paths)) + format_in <- get_filetype(paths[1]) } else{ stop("Please specify the file format of your chromatograms by setting the `format_in` argument.") } @@ -269,14 +269,15 @@ read_chroms <- function(paths, find_files, } } } - if (all(grepl(".d$", files, ignore.case = TRUE))){ file_names <- strsplit(files, "/") file_names <- gsub("\\.[Dd]", "", sapply(file_names, function(n){ ifelse(any(grepl("\\.[Dd]", n)), grep("\\.[Dd]", n, value = TRUE), tail(n,1)) })) - } else {file_names <- sapply(strsplit(basename(files),"\\."), function(x) x[1])} + } else { + file_names <- sapply(strsplit(basename(files),"\\."), function(x) x[1]) + } if (parser != "openchrom"){ laplee <- choose_apply_fnc(progress_bar) data <- laplee(X = files, function(file){ diff --git a/R/utils.R b/R/utils.R index 9c9b7b9..861a3dc 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,6 +1,7 @@ utils::globalVariables(names = c('.')) # Globals <- list() +#' Check parser #' @noRd check_parser <- function(format_in, parser=NULL, find = FALSE){ allowed_formats <- list(openchrom = c("msd","csd","wsd"), @@ -42,6 +43,13 @@ check_parser <- function(format_in, parser=NULL, find = FALSE){ } } +#' Remove unicode characters +#' @noRd +remove_unicode_chars <- function(x){ + stringr::str_replace_all(x, "\xb5", "micro") +} + +#' Format extension #' @noRd format_to_extension <- function(format_in){ switch(format_in, @@ -135,6 +143,7 @@ choose_apply_fnc <- function(progress_bar, parallel = FALSE, cl = NULL){ fn } +#' Transfer metadata #'@noRd transfer_metadata <- function (new_object, old_object, exclude = c("names", "row.names", "class", "dim", "dimnames")) @@ -145,6 +154,7 @@ transfer_metadata <- function (new_object, old_object, exclude = c("names", "row new_object } +#' Get filetype #' @noRd get_filetype <- function(file, out = c("format_in", "filetype")){ out <- match.arg(out, c("format_in", "filetype")) @@ -154,7 +164,7 @@ get_filetype <- function(file, out = c("format_in", "filetype")){ filetype <- switch(magic, "x01/x32/x00/x00" = "AgilentChemstationMS", "x02/x02/x00/x00" = "AgilentMasshunterDADHeader", - "x02/x33/x30/x00" = "AgilentChemstationMWD", + # "x02/x33/x30/x00" = "AgilentChemstationMWD", "x02/x33/x31/x00" = "AgilentChemstationDAD", "x02/x38/x31/x00" = "AgilentChemstationFID", #81 "x03/x02/x00/x00" = "AgilentMasshunterDAD", diff --git a/inst/CITATION b/inst/CITATION index 3343464..dddbe3b 100644 --- a/inst/CITATION +++ b/inst/CITATION @@ -5,7 +5,7 @@ citEntry( title = "chromConverter: chromatographic file converter", author = "Ethan Bass", year = "2023", - version = "version 0.4.0", + version = "version 0.4.1", doi = "10.5281/zenodo.6792521", url = "https://ethanbass.github.io/chromConverter/", textVersion = paste("Bass, E. (2022).", diff --git a/man/configure_aston.Rd b/man/configure_aston.Rd index 8bac34e..518e76e 100644 --- a/man/configure_aston.Rd +++ b/man/configure_aston.Rd @@ -4,10 +4,16 @@ \alias{configure_aston} \title{Configure Aston} \usage{ -configure_aston() +configure_aston(return_boolean = FALSE) +} +\arguments{ +\item{return_boolean}{Logical. Whether to return a Boolean value indicating +if the chromConverter environment is correctly configured.} } \value{ -No return value. +If \code{return_boolean} is \code{TRUE}, returns a Boolean value +indicating whether the chromConverter environment is configured correctly. +Otherwise, there is no return value. } \description{ Configures reticulate to use Aston file parsers. diff --git a/man/configure_openchrom.Rd b/man/configure_openchrom.Rd index 671b830..b35eafa 100644 --- a/man/configure_openchrom.Rd +++ b/man/configure_openchrom.Rd @@ -14,7 +14,9 @@ If "false", R will disable CLI. If NULL, R will not modify the ini file.} overwrite the current path.} } \value{ -Returns path to OpenChrom command-line application +If \code{cli} is set to \code{"status"}, returns a Boolean value +indicating whether 'OpenChrom' is configured correctly. Otherwise, returns +the path to OpenChrom command-line application. } \description{ Configure OpenChrom parser diff --git a/man/configure_rainbow.Rd b/man/configure_rainbow.Rd index 570c190..9b4ffe6 100644 --- a/man/configure_rainbow.Rd +++ b/man/configure_rainbow.Rd @@ -4,10 +4,16 @@ \alias{configure_rainbow} \title{Configure rainbow} \usage{ -configure_rainbow() +configure_rainbow(return_boolean = FALSE) +} +\arguments{ +\item{return_boolean}{Logical. Whether to return a Boolean value indicating +if the chromConverter environment is correctly configured.} } \value{ -No return value. +If \code{return_boolean} is \code{TRUE}, returns a Boolean value +indicating whether the chromConverter environment is configured correctly. +Otherwise, there is no return value. } \description{ Configures reticulate to use rainbow file parsers. diff --git a/tests/testthat/helpers.R b/tests/testthat/helpers.R index 593f9da..1e34fe3 100644 --- a/tests/testthat/helpers.R +++ b/tests/testthat/helpers.R @@ -5,7 +5,7 @@ elementwise.all.equal <- Vectorize(function(x, y, ...) {isTRUE(all.equal(x, y, . # helper function to skip tests if we don't have the right python dependencies skip_if_missing_dependecies <- function() { reqs <- c("scipy","numpy", "aston", "pandas") - have_reqs <- sapply(reqs, py_module_available) + have_reqs <- sapply(reqs, reticulate::py_module_available) if (mean(have_reqs) < 1) skip(paste("required packages", reqs[!have_reqs], "not available for testing")) diff --git a/tests/testthat/test-read_chroms.R b/tests/testthat/test-read_chroms.R index 956c06f..3254823 100644 --- a/tests/testthat/test-read_chroms.R +++ b/tests/testthat/test-read_chroms.R @@ -40,6 +40,7 @@ test_that ("extract_metadata function works", { }) test_that("entab parser works", { + skip_on_cran() skip_if_not_installed("entab") file <- "testdata/dad1.uv" x1 <- read_chroms(file, format_in = "chemstation_uv", parser = "entab", @@ -54,7 +55,8 @@ test_that("entab parser works", { test_that("shimadzu parser works", { file <- "testdata/ladder.txt" - x <- read_chroms(file, format_in = "shimadzu_fid", find_files = FALSE, progress_bar = FALSE) + x <- read_chroms(file, format_in = "shimadzu_fid", find_files = FALSE, + progress_bar = FALSE) expect_equal(class(x[[1]])[1], "matrix") expect_equal(attributes(x[[1]])$instrument, "GC-2014") }) @@ -63,11 +65,12 @@ test_that("read_mzml works", { ext_filepath <- system.file("extdata", package = "RaMS") DAD_filepath <- list.files(ext_filepath, full.names = TRUE, pattern = "uv_test_mini.mzML") - dad_long <- read_mzml(DAD_filepath, what = "DAD", verbose=FALSE) + dad_long <- read_mzml(DAD_filepath, what = "DAD", verbose = FALSE) expect_equal(dad_long, RaMS::grabMSdata(files = DAD_filepath, grab_what = "DAD", verbosity = FALSE) ) - dad_wide <- read_mzml(DAD_filepath, what = "DAD", verbose=FALSE, data_format="wide") + dad_wide <- read_mzml(DAD_filepath, what = "DAD", verbose = FALSE, + data_format = "wide") expect_equal(nrow(dad_wide[[1]]), length(unique(dad_long[[1]]$rt))) expect_equal(ncol(dad_wide[[1]]), length(unique(dad_long[[1]]$lambda))) expect_equal(as.numeric(colnames(dad_wide[[1]])), unique(dad_long[[1]]$lambda)) @@ -78,37 +81,38 @@ test_that("get_filetype works as expected", { expect_equal(get_filetype(path_uv), "chemstation_uv") }) -# test_that("thermoraw parser works",{ -# skip_if_not(configure_thermo_parser(check = TRUE)) -# file <- "/Users/ethanbass/Downloads/chrom_files/small.RAW" -# x <- read_chroms(file, format_in = "thermoraw", find_files = FALSE) -# expect_equal(class(x[[1]])[1], "matrix") -# expect_equal(attributes(x[[1]])$instrument, "GC-2014") -# }) - -# test_that("rainbow parser works", { -# skip_if_missing_dependecies() -# file <- "testdata/DAD1.uv" -# x1 <- read_chroms(file, format_in = "chemstation_uv", parser = "rainbow", -# find_files = FALSE, -# read_metadata = TRUE) -# expect_equal(as.numeric(x[[1]][,1]), as.numeric(x1[[1]][,"220"])) -# expect_equal(as.numeric(rownames(x[[1]])), as.numeric(rownames(x1[[1]]))) -# expect_equal(class(x1[[1]])[1], "matrix") -# expect_equal(attr(x1[[1]], "parser"), "rainbow") -# expect_equal(attr(x1[[1]], "data_format"), "wide") -# }) +test_that("rainbow parser works", { + skip_if_missing_dependecies() + skip_on_cran() + x1 <- read_chroms(path_uv, format_in = "chemstation_uv", parser = "rainbow", + find_files = FALSE, + read_metadata = TRUE, + progress_bar = FALSE) + expect_equal(as.numeric(x[[1]][,1]), as.numeric(x1[[1]][,"220"])) + expect_equal(as.numeric(rownames(x[[1]])), as.numeric(rownames(x1[[1]]))) + expect_equal(class(x1[[1]])[1], "matrix") + expect_equal(attr(x1[[1]], "parser"), "rainbow") + expect_equal(attr(x1[[1]], "data_format"), "wide") +}) -# test_that("check_path works on unix/linux", { -# skip_on_os("windows") -# expect_equal(check_path("~/Downloads"), "~/Downloads/") -# expect_equal(check_path("Downloads"), "/Downloads/") -# expect_equal(check_path("~/Downloads/"), "~/Downloads/") -# expect_equal(check_path("/Users/foo/"), "/Users/foo/") -# expect_equal(check_path("Users/foo/"), "/Users/foo/") -# expect_equal(check_path("/Users/foo"), "/Users/foo/") -# expect_equal(check_path("Users/foo"), "/Users/foo/") -# }) +test_that("chemstation_ch parser works", { + skip_if_missing_dependecies() + skip_on_cran() + x1 <- read_chroms("testdata/dad1B.ch", progress_bar = FALSE) + # expect_equal(as.numeric(x[[1]][,1]), as.numeric(x1[[1]][,"220"])) + # expect_equal(as.numeric(rownames(x[[1]])), as.numeric(rownames(x1[[1]]))) + expect_equal(class(x1[[1]])[1], "matrix") + expect_equal(attr(x1[[1]], "parser"), "chromConverter") + expect_equal(attr(x1[[1]], "data_format"), "wide") + expect_equal(attr(x1[[1]], "detector_unit"), "mAU") + expect_equal(attr(x1[[1]], "file_version"), "130") + expect_equal(ncol(x1[[1]]), 1) + x2 <- read_chroms("testdata/dad1B.ch", progress_bar = FALSE, + data_format ="long", format_out="data.frame")[[1]] + expect_equal(ncol(x2), 2) + expect_equal(class(x2), "data.frame") + expect_equal(as.numeric(rownames(x1[[1]])), x2[,1]) +}) test_that("read_chroms exports csvs correctly", { skip_on_cran() @@ -119,8 +123,6 @@ test_that("read_chroms exports csvs correctly", { progress_bar = FALSE) x1_out <- read.csv(fs::path(path_out, "dad1", ext="csv"), row.names=1) expect_equal(x1[[1]], x1_out, ignore_attr = TRUE) - # unlink(fs::path(path_out, "dad1", ext = "csv")) - # unlink(path_out) }) test_that("read_chroms exports cdf files correctly", { @@ -135,3 +137,10 @@ test_that("read_chroms exports cdf files correctly", { expect_equal(x1[[1]], x1_out, ignore_attr = TRUE) }) +# test_that("thermoraw parser works",{ +# skip_if_not(configure_thermo_parser(check = TRUE)) +# file <- "/Users/ethanbass/Library/CloudStorage/Box-Box/chromatography_test_files/thermo_files/small.RAW" +# x <- read_chroms(file, format_in = "thermoraw", find_files = FALSE) +# expect_equal(class(x[[1]])[1], "matrix") +# expect_equal(attributes(x[[1]])$instrument, "GC-2014") +# }) diff --git a/tests/testthat/testdata/dad1B.ch b/tests/testthat/testdata/dad1B.ch new file mode 100644 index 0000000..364480b Binary files /dev/null and b/tests/testthat/testdata/dad1B.ch differ