Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 17 additions & 4 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,21 @@
## chromConverter 0.7.6

* Added support for extraction of DAD and auxiliary instrumental data (stored in `.IT` files) from 'Agilent OpenLab' `.dx` files.
* Fixed misplaced parantheses in `read_agilent_d` causing possible bug.
## chromConverter 0.8.0

* Improved support for 'Agilent Openlab' `.dx` files: extraction of DAD and auxiliary instrumental data (stored in `.IT` files).
* Refactored `read_shimadzu_qgd` for a 1.4x speedup in the parsing of Shimadzu `.qgd` files, cutting execution time by 30%.
* Refactored `read_shimadzu_lcd` for a 2.4x speedup in the parsing of Shimadzu `.lcd` files, cutting execution time by 60%.
* Refactored `write_mzml` for massive speed-up when writing mzML files, especially for large MS data.
* Fixed 'Shimadzu' metadata time zone offsets.
* Fixed misplaced parentheses in `read_agilent_d` causing possible bug.
* Fixed bug in `read_chemstation_uv` causing error for long format data.
* Added more informative error messages for `read_agilent_d`.
* Added additional tests for retention times and `data_format` attribute.
* Added `data_format` and `read_metadata` arguments for `read_chemstation_csv`.
* Fixed incorrect `data_format` attributes for MS data to reflect that they are always returned in long format.
* Fixed documentation to accurately reflect the fact that MS data is always returned in long format.
* Automatically return long format when `data.table` output is selected since data.tables do not have rownames.
* Fixed error due to fractional timezones in Shimadzu metadata (e.g., India +05:30).
* Fixed bug in `write_mzml` causing retention time shifts for BPC and TIC.
* Rewrote `configure_python_environment` function to facilitate configuration of a chromConverter virtual environment or conda environment, though a dedicated environment is no longer required (as of chromConverter v0.7.4).
* Fixed bug in `collapse` argument causing functions to return vector when `format_out` is `data.frame`.
* Fixed bug causing elimination of retention times when `format_out` is `data.table`.
* Enabled `data.table` format in `read_shimadzu_ascii`.
Expand Down
6 changes: 3 additions & 3 deletions R/call_aston.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ sp_converter <- function(path, format_out = c("matrix", "data.frame", "data.tabl
metadata_format = c("chromconverter", "raw")){
check_aston_configuration()
format_out <- check_format_out(format_out)
data_format <- match.arg(data_format, c("wide", "long"))
data_format <- check_data_format(data_format, format_out)
metadata_format <- match.arg(metadata_format, c("chromconverter", "raw"))
metadata_format <- switch(metadata_format,
chromconverter = "masshunter_dad", raw = "raw")
Expand Down Expand Up @@ -74,7 +74,7 @@ uv_converter <- function(path, format_out = c("matrix","data.frame","data.table"
metadata_format = c("chromconverter", "raw")){
check_aston_configuration()
format_out <- check_format_out(format_out)
data_format <- match.arg(data_format, c("wide","long"))
data_format <- check_data_format(data_format, format_out)
metadata_format <- match.arg(metadata_format, c("chromconverter", "raw"))
metadata_format <- switch(metadata_format,
chromconverter = "chemstation_uv", raw = "raw")
Expand Down Expand Up @@ -118,7 +118,7 @@ trace_converter <- function(path, format_out = c("matrix", "data.frame"),
check_aston_configuration()
format_out <- check_format_out(format_out)
format_out <- match.arg(format_out, c("matrix", "data.frame", "data.table"))
data_format <- match.arg(data_format, c("wide", "long"))
data_format <- check_data_format(data_format, format_out)
trace_file <- reticulate::import("aston.tracefile")
pd <- reticulate::import("pandas")
x <- trace_file$TraceFile(path)
Expand Down
2 changes: 1 addition & 1 deletion R/call_entab.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ call_entab <- function(path, data_format = c("wide", "long"),
call. = FALSE)
}
format_out <- check_format_out(format_out)
data_format <- match.arg(data_format, c("wide", "long"))
data_format <- check_data_format(data_format, format_out)

metadata_format <- match.arg(tolower(metadata_format), c("chromconverter", "raw"))
metadata_format <- switch(metadata_format,
Expand Down
2 changes: 2 additions & 0 deletions R/call_openchrom.R
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ call_openchrom <- function(files, path_out = NULL, format_in,
return_paths = FALSE,
verbose = getOption("verbose")){
format_out <- check_format_out(format_out)
data_format <- check_data_format(data_format, format_out)

if (length(files) == 0){
stop("Files not found.")
}
Expand Down
4 changes: 2 additions & 2 deletions R/call_rainbow.R
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ call_rainbow <- function(path,
check_rb_configuration()
by <- match.arg(by, c("detector", "name"))
format_out <- check_format_out(format_out)
data_format <- match.arg(data_format, c("wide", "long"))
data_format <- check_data_format(data_format, format_out)
metadata_format <- match.arg(tolower(metadata_format),
c("chromconverter", "raw"))
metadata_format <- switch(metadata_format, "chromconverter" = "rainbow", "")
Expand Down Expand Up @@ -107,7 +107,7 @@ extract_rb_data <- function(xx, format_out = "matrix",
metadata_format = "rainbow",
meta = NULL,
source_file){
data_format <- match.arg(data_format, c("wide", "long"))
data_format <- check_data_format(data_format, format_out)
data <- xx$data
try(rownames(data) <- xx$xlabels)
colnames(data) <- xx$ylabels
Expand Down
64 changes: 56 additions & 8 deletions R/olefile_utilities.R
Original file line number Diff line number Diff line change
Expand Up @@ -131,19 +131,67 @@ ole_list_streams <- function(path, pattern = NULL, ignore.case = FALSE,
#' ASCII files exported from 'Lab Solutions'.
#' @importFrom bit64 as.integer64
#' @noRd

sztime_to_unixtime <- function(low, high, tz = "UTC") {
if (tz!="UTC"){
tz <- -as.numeric(gsub("'00'", "", tz))
if (tz > 0){
tz <- paste0("+",tz)
}
tz <- paste0("Etc/GMT", tz)
}
tz <- parse_shimadzu_tz(tz)
if (low < 0) {
low <- bit64::as.integer64(low) + 2^32
}
filetime <- bit64::as.integer64(high) * 2^32 + bit64::as.integer64(low)
unix_time <- (filetime / 10000000) - 11644473600
as.POSIXct(unix_time, origin = "1970-01-01", tz = tz)
}

parse_shimadzu_tz <- function(tz){
if (tz != "UTC"){
tz <- convert_fractional_timezone_offset(tz)
if (!grepl("/",tz)){
pattern <- "([+-])(\\d{2})'(\\d{2})"
captures <- regmatches(tz, regexec(pattern, tz))[[1]]
sign <- captures[2]
hours <- as.numeric(captures[3])
minutes <- as.numeric(captures[4])

decimal_hours <- hours + minutes/60

if (sign == "+") {
tz <- paste0("Etc/GMT-", decimal_hours)
} else {
tz <- paste0("Etc/GMT+", decimal_hours)
}
}
}
tz
}

#' @author Ethan Bass
#' @noRd
convert_fractional_timezone_offset <- function(tz) {
clean_offset <- gsub("'", "", tz)

timezone <- switch(clean_offset,
# 30-minute offsets (positive)
"+0330" = "Asia/Tehran",
"+0430" = "Asia/Kabul",
"+0530" = "Asia/Kolkata",
"+0630" = "Asia/Yangon",
"+0930" = "Australia/Adelaide",
"+1030" = "Australia/Adelaide",
"+1230" = "Pacific/Auckland",
"+1330" = "Pacific/Chatham",

# 30-minute offsets (negative)
"-0330" = "America/St_Johns",
"-0430" = "America/Caracas",
"-0930" = "Pacific/Marquesas",

# 45-minute offsets (positive)
"+0545" = "Asia/Kathmandu",
"+0845" = "Australia/Eucla",
"+1245" = "Pacific/Chatham",

# Return NULL for unknown offsets
tz
)

return(timezone)
}
2 changes: 1 addition & 1 deletion R/read_agilent_dx.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ read_agilent_dx <- function (path, what = c("chroms", "dad"), path_out = NULL,
metadata_format = c("chromconverter", "raw"),
collapse = TRUE) {
format_out <- check_format_out(format_out)
data_format <- match.arg(data_format, c("wide", "long"))
data_format <- check_data_format(data_format, format_out)
metadata_format <- match.arg(metadata_format, c("chromconverter", "raw"))
what <- match.arg(what, c("chroms", "dad", "instrument"), several.ok = TRUE)
files <- unzip(path, list = TRUE)
Expand Down
2 changes: 1 addition & 1 deletion R/read_asm.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ read_asm <- function(path, data_format = c("wide", "long"),
read_metadata = TRUE,
metadata_format = c("chromconverter", "raw"),
collapse = TRUE){
data_format <- match.arg(data_format, c("wide", "long"))
format_out <- match.arg(format_out, c("matrix", "data.frame", "data.table"))
data_format <- check_data_format(data_format, format_out)
metadata_format <- match.arg(metadata_format, c("chromconverter", "raw"))
metadata_format <- switch(metadata_format, "chromconverter" = "asm", "raw")

Expand Down
53 changes: 23 additions & 30 deletions R/read_cdf.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,10 @@
#' @param data_format Whether to return data in \code{wide} or \code{long} format.
#' For 2D files, "long" format returns the retention time as the first column of
#' the data.frame or matrix while "wide" format returns the retention time as the
#' rownames of the object.
#' @param what For ANDI chrom files, whether to extract \code{chroms}
#' and/or \code{peak_table}. For ANDI ms files, whether to extract MS1 scans
#' rownames of the object. This argument applies only to 2D chromatograms, since
#' MS data will always be returned in long format.
#' @param what For \code{ANDI chrom} files, whether to extract \code{chroms}
#' and/or \code{peak_table}. For \code{ANDI ms} files, whether to extract MS1 scans
#' (\code{MS1}) or the total ion chromatogram (\code{TIC}).
#' @param read_metadata Whether to read metadata from file.
#' @param metadata_format Format to output metadata. Either \code{chromconverter}
Expand All @@ -31,8 +32,8 @@ read_cdf <- function(path, format_out = c("matrix", "data.frame", "data.table"),
metadata_format = c("chromconverter", "raw"),
collapse = TRUE, ...){
check_for_pkg("ncdf4")
data_format <- match.arg(data_format, c("wide", "long"))
format_out <- match.arg(format_out, c("matrix", "data.frame", "data.table"))
format_out <- check_format_out(format_out)
data_format <- check_data_format(data_format, format_out)
metadata_format <- match.arg(metadata_format, c("chromconverter", "raw"))
nc <- ncdf4::nc_open(path)
if ("ordinate_values" %in% names(nc$var)){
Expand Down Expand Up @@ -147,13 +148,15 @@ read_andi_chrom <- function(path, format_out = c("matrix", "data.frame", "data.t
#' @author Ethan Bass
#' @noRd

read_andi_ms <- function(path, format_out = c("matrix", "data.frame"),
read_andi_ms <- function(path,
format_out = c("matrix", "data.frame", "data.table"),
data_format = c("wide", "long"),
what = c("MS1", "TIC"),
ms_format = c("data.frame", "list"),
read_metadata = TRUE,
metadata_format = "chromconverter",
collapse = TRUE){
format_out <- check_format_out(format_out)
metadata_format <- switch(metadata_format,
chromconverter = "andi_ms", raw = "raw")
ms_format <- match.arg(ms_format, c("data.frame", "list"))
Expand All @@ -162,17 +165,11 @@ read_andi_ms <- function(path, format_out = c("matrix", "data.frame"),
nc <- ncdf4::nc_open(path)
on.exit(ncdf4::nc_close(nc))
if (any(what == "TIC")){
y <- ncdf4::ncvar_get(nc, "total_intensity")
x <- ncdf4::ncvar_get(nc, "scan_acquisition_time")
data = data.frame(rt = x, intensity = y)
if (data_format == "wide"){
rownames(data) <- data[, 1]
data <- data[, -1, drop = FALSE]
}
if (format_out == "matrix"){
data <- as.matrix(data)
}
TIC <- data
y <- ncdf4::ncvar_get(nc, "total_intensity")

TIC <- format_2d_chromatogram(rt = x, int = y, data_format = data_format,
format_out = format_out)
}
if (any(what == "MS1")){
int <- ncdf4::ncvar_get(nc, "intensity_values")
Expand All @@ -196,23 +193,19 @@ read_andi_ms <- function(path, format_out = c("matrix", "data.frame"),
}

data <- mget(what)
if (collapse) data <- collapse_list(data)
if (read_metadata){
meta <- ncdf4::ncatt_get(nc, varid = 0)
meta$detector <- "MS"
if (inherits(data, "list")){
data <- lapply(data, function(xx){
attach_metadata(xx, meta = meta, format_in = metadata_format,
format_out = format_out, data_format = data_format,
parser = "chromconverter", source_file = path,
source_file_format = "andi_ms")
})
} else{
data <- attach_metadata(data, meta = meta, format_in = metadata_format,
format_out = format_out, data_format = data_format,
parser = "chromconverter", source_file = path,
source_file_format = "andi_ms")
}
data <- purrr::imap(data, function(x, h){
attach_metadata(x, meta = meta, format_in = metadata_format,
format_out = format_out,
data_format = ifelse(h == "MS1", "long", data_format),
parser = "chromconverter", source_file = path,
source_file_format = "andi_ms")
})
}
if (collapse){
data <- collapse_list(data)
}
data
}
4 changes: 2 additions & 2 deletions R/read_chemstation_ch.R
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ read_chemstation_ch <- function(path, format_out = c("matrix", "data.frame",
metadata_format = c("chromconverter", "raw"),
scale = TRUE){
format_out <- check_format_out(format_out)
data_format <- match.arg(data_format, c("wide", "long"))
data_format <- check_data_format(data_format, format_out)
metadata_format <- match.arg(metadata_format, c("chromconverter", "raw"))
metadata_format <- switch(metadata_format, chromconverter = "chemstation",
raw = "raw")
Expand Down Expand Up @@ -467,7 +467,7 @@ read_chemstation_it <- function(path, format_out = c("matrix", "data.frame",
metadata_format = c("chromconverter", "raw"),
scale = TRUE){
format_out <- check_format_out(format_out)
data_format <- match.arg(data_format, c("wide", "long"))
data_format <- check_data_format(data_format, format_out)
metadata_format <- match.arg(metadata_format, c("chromconverter", "raw"))
metadata_format <- switch(metadata_format, chromconverter = "chemstation",
raw = "raw")
Expand Down
1 change: 1 addition & 0 deletions R/read_chemstation_csv.R
Original file line number Diff line number Diff line change
Expand Up @@ -42,5 +42,6 @@ read_chemstation_csv <- function(path, format_out = c("matrix", "data.frame",
parser = "chromconverter", source_file = path,
source_file_format = "chemstation_csv")
}
data
}

34 changes: 21 additions & 13 deletions R/read_chemstation_ms.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@
#' streams.
#' @param format_out Class of output. Either \code{matrix}, \code{data.frame},
#' or \code{data.table}.
#' @param data_format Whether to return data in \code{wide} or \code{long} format.
#' @param data_format Either \code{wide} (default) or \code{long}. This argument
#' applies only to TIC data, since MS and BPC data will always be returned in
#' long format.
#' @param read_metadata Logical. Whether to attach metadata. Defaults to \code{TRUE}.
#' @param metadata_format Format to output metadata. Either \code{chromconverter}
#' or \code{raw}.
Expand Down Expand Up @@ -42,11 +44,11 @@ read_chemstation_ms <- function(path, what = c("MS1", "BPC", "TIC"),
metadata_format = c("chromconverter", "raw"),
collapse = TRUE){
format_out <- check_format_out(format_out)
data_format <- match.arg(data_format, c("wide", "long"))
data_format <- check_data_format(data_format, format_out)
metadata_format <- match.arg(metadata_format, c("chromconverter", "raw"))
metadata_format <- switch(metadata_format, chromconverter = "chemstation",
raw = "raw")
match.arg(what, c("MS1", "BPC", "TIC"), several.ok = TRUE)
what <- match.arg(what, c("MS1", "BPC", "TIC"), several.ok = TRUE)
f <- file(path, "rb")
on.exit(close(f))

Expand All @@ -73,9 +75,9 @@ read_chemstation_ms <- function(path, what = c("MS1", "BPC", "TIC"),
dat <- lapply(seq_len(n_rt), function(i){
read_cs_ms_block(f)
})
if (any(what == "MS1"))
if (any(what == "MS1")){
MS1 <- do.call(rbind, lapply(dat, "[[", 1))

}
if (any(what == "BPC")){
BPC <- do.call(rbind, lapply(dat, "[[", 2))
BPC[,2] <- BPC[,2]/20
Expand All @@ -85,7 +87,9 @@ read_chemstation_ms <- function(path, what = c("MS1", "BPC", "TIC"),

if (any(what == "TIC")){
TIC <- do.call(rbind, lapply(dat, "[[", 3))
colnames(TIC) <- c("rt", "intensity")
TIC <- format_2d_chromatogram(rt=TIC[,1], int = TIC[,2],
data_format = data_format,
format_out = format_out)
}

dat <- mget(what)
Expand All @@ -101,15 +105,18 @@ read_chemstation_ms <- function(path, what = c("MS1", "BPC", "TIC"),
read_cs_string(f, type = 1)
})
meta$detector <- "MS"
dat <- lapply(dat, function(x){
dat <- purrr::imap(dat, function(x, h){
attach_metadata(x, meta, format_in = metadata_format,
data_format = data_format, format_out = format_out,
parser = "chromconverter", source_file = path,
source_file_format = paste0("chemstation_", version),
scale = FALSE)
data_format = ifelse(h != "TIC", "long", data_format),
format_out = format_out, parser = "chromconverter",
source_file = path,
source_file_format = paste0("chemstation_", version),
scale = FALSE)
})
}
if (collapse) dat <- collapse_list(dat)
if (collapse){
dat <- collapse_list(dat)
}
dat
}

Expand All @@ -126,7 +133,8 @@ read_cs_ms_block <- function(f){
u1 <- readBin(f, what = "integer", size = 4, endian = "big")
n_row <- readBin(f, what = "integer", size = 4, endian = "big")
bpc <- c(rt, readBin(f, what = "integer", n = 2, size = 2, endian = "big"))
mat <- matrix(NA, nrow = n_row, ncol = 2, dimnames = list(NULL, c("mz", "intensity")))
mat <- matrix(NA, nrow = n_row, ncol = 2,
dimnames = list(NULL, c("mz", "intensity")))
for (i in seq_len(n_row)){
mat[i,] <- readBin(f, what = "integer", size = 2, n = 2,
signed = FALSE, endian = "big")
Expand Down
Loading
Loading