diff --git a/DESCRIPTION b/DESCRIPTION index 725c336c..dac65555 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: gDRimport Type: Package Title: Package for handling the import of dose-response data -Version: 1.11.2 +Version: 1.11.3 Date: 2026-05-04 Authors@R: c( person("Arkadiusz", "Gladki", role=c("aut", "cre"), email="gladki.arkadiusz@gmail.com", diff --git a/NEWS.md b/NEWS.md index a07eae53..32b281cc 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,6 @@ +## gDRimport 1.11.3 - 2026-05-18 +* apply updated gDRstyle rules + ## gDRimport 1.11.2 - 2026-05-04 * replace hardcoded `Duration` values with `NA_real_` in PRISM and PSet importers @@ -324,4 +327,4 @@ * code refactor using lintr ## gDRimport 0.0.1 - 2021-06-22 -* initial release - importing fucntions moved from gDRcore package +* initial release - importing fucntions moved from gDRcore package \ No newline at end of file diff --git a/R/D300.R b/R/D300.R index 5977a2d8..dee5142c 100644 --- a/R/D300.R +++ b/R/D300.R @@ -3,13 +3,13 @@ #' This functions takes a D300 file and generates corresponding template files #' #' @param D300_file character, file path to D300 file -#' @param destination_path character, path to folder where template +#' @param destination_path character, path to folder where template #' files will be generated #' @param metadata_file character, file path to file with mapping from D300 names to Gnumbers. Defaults to NULL. #' @param day0 logical, if TRUE, creates a template file for Day 0 data filled with vehicles in addition to #' the standard plates. Defaults to FALSE. #' @keywords D300 -#' +#' #' @examples #' td3 <- get_test_D300_data()[["f_96w"]] #' o_path <- file.path(tempdir(), "td3") @@ -19,7 +19,7 @@ #' unlink(o_path, recursive = TRUE) #' #' @return -#' Create one Excel file per plate. Each sheet in each plate file describes +#' Create one Excel file per plate. Each sheet in each plate file describes #' the drugs and corrresponding concentrations of what was tested in each well. #' @details #' For example, wells treated with 2 drugs in combination will result in 4 sheets per plate. @@ -37,20 +37,20 @@ import_D300 <- function(D300_file, metadata_file = NULL, day0 = FALSE) { assertthat::assert_that(is.character(destination_path), msg = "'destination_path' must be a character vector") - assertthat::assert_that(assertthat::is.readable(destination_path), + assertthat::assert_that(assertthat::is.readable(destination_path), msg = "'destination_path' must be a readable path") - + # Parse the D300 file first D300 <- parse_D300_xml(D300_file) D300 <- fill_NA(D300, from = "D300_Barcode", with = "D300_Plate_N") - + idfs <- list( untreated_tags = gDRutils::get_env_identifiers("untreated_tag"), drug_identifier = gDRutils::get_env_identifiers("drug"), conc_identifier = gDRutils::get_env_identifiers("concentration")) #standard identifiers - + has_meta <- !is.null(metadata_file) - + # Conditionally process metadata if provided if (has_meta) { Gnums <- parse_D300_metadata_file(metadata_file) @@ -60,12 +60,12 @@ import_D300 <- function(D300_file, treatment <- D300 treatment[[idfs$drug_identifier]] <- treatment$Name } - + req_cols <- c("Row", "Col") if (!all(present <- req_cols %in% colnames(treatment))) { - stop(sprintf("missing required columns from D300 file: '%s'", paste0(req_cols[!present], collapse = ", "))) + stop(sprintf("missing required columns from D300 file: '%s'", toString(req_cols[!present]))) } - + if (day0) { # Safely extract actual plate dimensions from the XML Dimension tag (e.g. "(8,12)") # This works reliably regardless of whether metadata was merged or not. @@ -73,19 +73,19 @@ import_D300 <- function(D300_file, dims <- as.integer(strsplit(gsub("\\(|\\)", "", dim_str), ",")[[1]]) nrow_plate <- dims[1] ncol_plate <- dims[2] - + # Fallback to absolute max if dimension string is somehow corrupt if (is.na(nrow_plate) || is.na(ncol_plate)) { nrow_plate <- max(as.numeric(treatment$Row), na.rm = TRUE) ncol_plate <- max(as.numeric(treatment$Col), na.rm = TRUE) } - + wb <- openxlsx::createWorkbook() - + # Initialize with empty strings drug_mat <- matrix("", nrow = nrow_plate, ncol = ncol_plate) conc_mat <- matrix("", nrow = nrow_plate, ncol = ncol_plate) - + for (m in seq_len(nrow_plate)) { for (n in seq_len(ncol_plate)) { if (has_meta) { @@ -105,49 +105,49 @@ import_D300 <- function(D300_file, } } } - + openxlsx::addWorksheet(wb, idfs$drug_identifier) openxlsx::writeData(wb, sheet = 1, data.table::data.table(drug_mat), colNames = FALSE) - + openxlsx::addWorksheet(wb, idfs$conc_identifier) openxlsx::writeData(wb, sheet = 2, data.table::data.table(conc_mat), colNames = FALSE) - + fname <- "trt_day0.xlsx" openxlsx::saveWorkbook(wb, file.path(destination_path, fname), overwrite = TRUE) } - + # Sort only the plate list numerically to ensure trt_1, trt_2 files generate in chronological order uplates <- sort(as.numeric(unique(treatment$D300_Plate_N))) - + existing_files <- list.files(destination_path, pattern = "^trt_P\\d+\\.xlsx$") - + # Calculate the starting offset max_idx <- 0 if (length(existing_files) > 0) { nums <- as.numeric(gsub("trt_P|\\.xlsx", "", existing_files)) max_idx <- max(nums, na.rm = TRUE) } - + for (i in seq_along(uplates)) { wb <- openxlsx::createWorkbook() idx <- treatment$D300_Plate_N == uplates[i] # Filter to 1 plate. trt_filt <- treatment[idx, ] - - # create a list with Gnumber and Concentration + + # create a list with Gnumber and Concentration trt_filt$gn_conc <- apply(trt_filt, 1, function(x) list(x[idfs$drug_identifier], x[idfs$conc_identifier])) - trt_gnumber_conc <- data.table::dcast(trt_filt, Row ~ Col, - value.var = c("gn_conc"), + trt_gnumber_conc <- data.table::dcast(trt_filt, Row ~ Col, + value.var = c("gn_conc"), fun.aggregate = list) rownames_trt_gnumber_conc <- trt_gnumber_conc$Row trt_gnumber_conc <- trt_gnumber_conc[, setdiff(colnames(trt_gnumber_conc), "Row"), with = FALSE] - - # count number of drugs,conc in each well + + # count number of drugs,conc in each well trt_n_drugs <- apply(trt_gnumber_conc, c(1, 2), function(x) length(x[[1]])) - + # Extract actual plate dimensions from the XML Dimension tag (e.g. "(8,12)") dim_str <- trt_filt$Dimension[1] dims <- as.integer(strsplit(gsub("\\(|\\)", "", dim_str), ",")[[1]]) - + trt_info <- list( max_drugs_per_well = max(trt_n_drugs), col_idx = strtoi(colnames(trt_gnumber_conc)), @@ -156,45 +156,45 @@ import_D300 <- function(D300_file, plate_ncol = dims[2], has_metadata = has_meta ) - save_drug_info_per_well(trt_info, trt_gnumber_conc, wb, idfs) + save_drug_info_per_well(trt_info, trt_gnumber_conc, wb, idfs) current_file_num <- max_idx + i fname <- sprintf("trt_P%d.xlsx", current_file_num) - + openxlsx::saveWorkbook(wb, file.path(destination_path, fname), overwrite = TRUE) } } #' for each drug create a Gnumber and Concentration information for each well -#' +#' #' @param trt_info list with treatment info #' @param trt_gnumber_conc list with treatment data #' @param wb pointer to xlsx workbook #' @param idfs charvec with identifiers #' @keywords D300 -#' +#' #' @return \code{NULL} invisibly. -#' +#' save_drug_info_per_well <- function(trt_info, trt_gnumber_conc, wb, idfs) { - + # Toggle dimensions based on metadata presence to preserve legacy unit tests nrow <- if (trt_info$has_metadata) max(trt_info$row_idx) else trt_info$plate_nrow ncol <- if (trt_info$has_metadata) max(trt_info$col_idx) else trt_info$plate_ncol nwells <- nrow * ncol - + for (j in seq_len(trt_info$max_drugs_per_well)) { - + drug_sname <- idfs$drug_identifier conc_sname <- idfs$conc_identifier if (j != 1L) { drug_sname <- paste0(drug_sname, "_", j) conc_sname <- paste0(conc_sname, "_", j) } - + # Initialize with empty strings to guarantee cells are created in Excel conc_mat <- matrix(rep("", nwells), nrow = nrow, ncol = ncol) drug_mat <- matrix(rep("", nwells), nrow = nrow, ncol = ncol) - + if (trt_info$has_metadata) { # ------------------------------------------------------------- # LEGACY LOGIC: Used for unit tests and when Metadata is supplied @@ -222,13 +222,13 @@ save_drug_info_per_well <- for (n in seq_len(ncol)) { r_idx <- which(trt_info$row_idx == m) c_idx <- which(trt_info$col_idx == n) - + if (length(r_idx) > 0 && length(c_idx) > 0) { drug_entry <- trt_gnumber_conc[[r_idx, c_idx]] } else { drug_entry <- list() } - + if (length(drug_entry) >= j) { drug <- drug_entry[[j]][[1]] conc <- drug_entry[[j]][[2]] @@ -249,10 +249,10 @@ save_drug_info_per_well <- } } } - + drug_data <- data.table::data.table(drug_mat) conc_data <- data.table::data.table(conc_mat) - + openxlsx::addWorksheet(wb, drug_sname) openxlsx::writeData(wb, sheet = (j * 2) - 1, drug_data, colNames = FALSE) openxlsx::addWorksheet(wb, conc_sname) @@ -268,13 +268,13 @@ merge_D300_w_metadata <- function(D300, Gnums) { } invisible(NULL) } - + merge_trt_col <- "Name" validate_columns(merge_trt_col, D300) - + merge_metadata_col <- "D300_Label" validate_columns(merge_metadata_col, Gnums) - + # Restored default sorting (sort = TRUE implicit) to maintain multi-drug combination order merge(D300, Gnums, by.x = merge_trt_col, by.y = merge_metadata_col, all.x = TRUE) } @@ -292,7 +292,7 @@ merge_D300_w_metadata <- function(D300, Gnums) { #' @keywords D300 #' #' @return data.table representing input \code{D300_file}. -#' +#' #' @examples #' td3 <- get_test_D300_data() #' fs <- td3[["f_96w"]] @@ -303,42 +303,42 @@ merge_D300_w_metadata <- function(D300, Gnums) { parse_D300_xml <- function(D300_file) { assertthat::assert_that(is.character(D300_file), msg = "'D300_file' must be a character vector") assertthat::assert_that(assertthat::is.readable(D300_file), msg = "'D300_file' must be a readable path") - + # Open D300 XML format. - D300_xml.tree <- XML::xmlTreeParse(D300_file, useInternal = TRUE) + D300_xml.tree <- XML::xmlTreeParse(D300_file, useInternal = TRUE) top <- XML::xmlRoot(D300_xml.tree) - + # Safely retrieve units (prevents UseMethod error if node is missing). node_vol <- top[["VolumeUnit"]] vol_unit <- if (!is.null(node_vol)) XML::xmlValue(node_vol) else NA - + node_conc <- top[["ConcentrationUnit"]] conc_unit <- if (!is.null(node_conc)) XML::xmlValue(node_conc) else NA - + node_mol <- top[["MolarityConcentrationUnit"]] mol_conc_unit <- if (!is.null(node_mol)) XML::xmlValue(node_mol) else NA - + # Handle missing ConcentrationUnit in newer D300 software versions if (is.na(conc_unit)) { conc_unit <- mol_conc_unit } - + # Assertions. if (!is.na(conc_unit) && !is.na(mol_conc_unit)) { - assertthat::assert_that(conc_unit == mol_conc_unit, + assertthat::assert_that(conc_unit == mol_conc_unit, msg = "Mismatch between the units for ConcentrationUnit and MolarityConcentrationUnit") } - + # if there is DMSO backfill defined throw a warning, support not yet implemented backfills <- XML::xpathSApply(top, ".//Backfills/Backfill") if (length(backfills) > 0) { warning("Backfill identified in D300 but not supported.") } - + id_col <- "ID" df_drug <- get_D300_xml_drugs(top, id_col) df_trt <- get_D300_xml_treatments(top, id_col, vol_unit, conc_unit) - + # Restored default sorting (sort = TRUE implicit) to maintain multi-drug combination order df_D300 <- merge(df_trt, df_drug, by.x = id_col, by.y = id_col, all.x = TRUE) df_D300 @@ -347,54 +347,54 @@ parse_D300_xml <- function(D300_file) { get_D300_xml_drugs <- function(xml_tree_root, id_col = "ID") { - + drug_cols <- c(id_col, "Name", "Stock_Conc", "Stock_Unit") - + # Safely extract information for every fluid (i.e. drugs) using XPath fluids <- XML::xpathSApply(xml_tree_root, ".//Fluids/Fluid") nfluids <- length(fluids) df_drug <- vector("list", nfluids) - + for (fi in seq_len(nfluids)) { fluid <- fluids[[fi]] id <- XML::xmlAttrs(fluid)[[id_col]] - + node_name <- fluid[["Name"]] name <- if (!is.null(node_name)) XML::xmlValue(node_name) else "" - + node_stock <- fluid[["Concentration"]] stock_conc <- if (!is.null(node_stock)) XML::xmlValue(node_stock) else NA - + node_conc_unit <- fluid[["ConcentrationUnit"]] fluid_conc_unit <- if (!is.null(node_conc_unit)) XML::xmlValue(node_conc_unit) else NA - + df_drug[[fi]] <- data.table::data.table(t(c(id, name, stock_conc, fluid_conc_unit))) colnames(df_drug[[fi]]) <- drug_cols } data.table::rbindlist(df_drug) - } + } get_plate_info <- function(plate, vol_unit) { - + rows_plate <- XML::xmlValue(plate[["Rows"]]) cols_plate <- XML::xmlValue(plate[["Cols"]]) plate_dim <- sprintf("(%s,%s)", rows_plate, cols_plate) assay_vol <- XML::xmlValue(plate[["AssayVolume"]]) desired_unit <- get_muL() assay_vol_conv <- convert_units(assay_vol, from = vol_unit, to = desired_unit) - + node_name <- plate[["Name"]] barcode_plate <- if (!is.null(node_name)) XML::xmlValue(node_name) else "" if (is.na(barcode_plate)) barcode_plate <- "" - + # check if the plate is randomized; should probably be changed node_rand <- plate[["Randomize"]] randomize <- if (!is.null(node_rand)) XML::xmlValue(node_rand) else "" if (!is.na(randomize) && randomize != "") { warning("Randomization of D300 plate possibly detected, but not supported yet.") } - + list( plate_dim = plate_dim, desired_unit = desired_unit, @@ -406,36 +406,36 @@ get_plate_info <- function(plate, vol_unit) { get_D300_xml_treatments <- function(xml_tree_root, id_col = "ID", vol_unit, conc_unit) { - + # define treatment columns - trt_cols <- c("D300_Plate_N", "D300_Barcode", "Dimension", "Row", "Col", + trt_cols <- c("D300_Plate_N", "D300_Barcode", "Dimension", "Row", "Col", "Volume", "Volume_Unit", id_col, "Concentration", "Unit") - - # extract drug dispensing information for each plate + + # extract drug dispensing information for each plate plates <- XML::xpathSApply(xml_tree_root, ".//Plates/Plate") - + pl <- lapply(seq_along(plates), function(pli) { plate <- plates[[pli]] pl_info <- get_plate_info(plate, vol_unit) # plate info - + # extract drug dispensing information for each well using XPath wells <- XML::xpathSApply(plate, ".//Wells/Well") - + wl <- lapply(wells, function(well) { - + well_attr <- XML::xmlAttrs(well) # D300 files are always 0-indexed. Hardcoding the +1 shift to match Excel matrices. row_well <- strtoi(well_attr[["Row"]]) + 1 col_well <- strtoi(well_attr[["Col"]]) + 1 - - # extract information each fluid delivered in well + + # extract information each fluid delivered in well fluids <- XML::xpathSApply(well, ".//Fluid") if (length(fluids) == 0) return(NULL) - + res <- vapply(fluids, function(fluid) { id_fluid <- XML::xmlAttrs(fluid)[[id_col]] conc_fluid <- XML::xmlValue(fluid) - + # define single entry c( pli, @@ -450,13 +450,13 @@ get_D300_xml_treatments <- conc_unit ) }, character(length(trt_cols))) - + t(res) # transpose to correctly match columns }) - + do.call(rbind, wl) }) - + df_trt <- data.table::data.table(do.call(rbind, pl)) colnames(df_trt) <- trt_cols df_trt @@ -467,7 +467,7 @@ get_conversion_factor <- function(from, to = get_muL()) { if (to != get_muL()) { stop(sprintf("conversion to unit '%s' not supported", to)) } - + muL <- get_muL() switch(from, "nL" = 1e-3, @@ -479,7 +479,7 @@ get_conversion_factor <- function(from, to = get_muL()) { convert_units <- function(x, from, to) { - conversion_factor <- get_conversion_factor(from, to) + conversion_factor <- get_conversion_factor(from, to) as.double(x) * conversion_factor } @@ -492,16 +492,16 @@ parse_D300_metadata_file <- function(metadata_file) { if (tools::file_ext(metadata_file) %in% c("xls", "xlsx")) { D300_Gnum_sheets <- readxl::excel_sheets(metadata_file) nsheets <- length(D300_Gnum_sheets) - + # Assertions. assertthat::assert_that(is.character(metadata_file), msg = "'metadata_file' must be a character vector") assertthat::assert_that(assertthat::is.readable(metadata_file), msg = "'metadata_file' must be a readable path") - + if (nsheets != 1L) { futile.logger::flog.error("only one data sheet is supported, found '%s' sheets in '%s'", nsheets, metadata_file) } - + metadata <- read_excel_to_dt(metadata_file, sheet = D300_Gnum_sheets[[1]], col_names = TRUE) diff --git a/R/MAE_to_PSet.R b/R/MAE_to_PSet.R index 21b2bd49..4e1145ba 100644 --- a/R/MAE_to_PSet.R +++ b/R/MAE_to_PSet.R @@ -5,11 +5,11 @@ #' #' @param mae A MultiAssayExperiment object generated by gDR. #' @param pset_name A character string specifying the name of the resulting PharmacoSet object. -#' +#' #' @keywords pset_conversion -#' +#' #' @return A PharmacoSet object. -#' +#' #' @examples #' # Convert a MultiAssayExperiment object to a PharmacoSet object #' m <- 20 @@ -22,7 +22,7 @@ #' colData = S4Vectors::DataFrame(cnames)) #' mae <- MultiAssayExperiment::MultiAssayExperiment(experiments = list("single-agent" = se)) #' convert_MAE_to_PSet(mae, "my_pset") -#' +#' #' @export convert_MAE_to_PSet <- function(mae, pset_name) { @@ -32,17 +32,17 @@ convert_MAE_to_PSet <- function(mae, # Get the unique assay names from all experiments combined assay_names <- unique(unlist(gDRutils::MAEpply(mae, function(x) SummarizedExperiment::assayNames(x)))) - # Create a list of data.tables, one for each assay name, with the data from all Summarized Experiments in the MAE. + # Create a list of data.tables, one for each assay name, with the data from all Summarized Experiments in the MAE. assay_list <- lapply(assay_names, function(x) { - + # Get list of all the Summarized Experiments with all the unique Assays across the entire MAE - # If the summarized experiment does not have a given assay, create it + # If the summarized experiment does not have a given assay, create it se_list <- gDRutils::MAEpply(mae = mae, FUN = .extract_or_create_assay, unify = FALSE, x) - + # Get list of datatables with all assays with name=x converted to a data.table se_dt_list <- lapply(se_list, FUN = gDRutils::convert_se_assay_to_dt, x) - # Combine all the data.tables into one data.table, add the name of the Summarized Experiment + # Combine all the data.tables into one data.table, add the name of the Summarized Experiment # as a "data_type" column so that users can identify which Summarized Experiment the data came from DT <- data.table::rbindlist(se_dt_list, fill = TRUE, idcol = "data_type") @@ -61,8 +61,8 @@ convert_MAE_to_PSet <- function(mae, rownames <- unique(unlist(gDRutils::MAEpply(mae, FUN = rownames))) # convert the rowData of each Summarized Experiment to a data.table and combine them into one data.table rowdata_ <- gDRutils::MAEpply( - mae = mae, - FUN = function(x) data.table::as.data.table(data.frame(SummarizedExperiment::rowData(x)), keep.rownames = TRUE), + mae = mae, + FUN = function(x) data.table::as.data.table(data.frame(SummarizedExperiment::rowData(x)), keep.rownames = TRUE), unify = TRUE) # rename the rownames column to "treatmentid" (required for PharmacoSet Object) data.table::setnames(rowdata_, "rn", "treatmentid") @@ -72,13 +72,13 @@ convert_MAE_to_PSet <- function(mae, colnames <- unique(unlist(gDRutils::MAEpply(mae, FUN = colnames))) # convert the colData of each Summarized Experiment to a data.table and combine them into one data.table coldata_ <- unique(gDRutils::MAEpply( - mae = mae, + mae = mae, FUN = function(x) data.table::as.data.table(data.frame(SummarizedExperiment::colData(x)), keep.rownames = TRUE), unify = TRUE)) # rename the rownames column to "sampleid" (required for PharmacoSet Object) data.table::setnames(coldata_, "rn", "sampleid") - # Create a list of rowname and column name identifiers for each assay name (required for + # Create a list of rowname and column name identifiers for each assay name (required for # TreatmentResponseExperiment object) # in this case, all the identifiers are the same treatmentid & sampleid columns assayIDs <- stats::setNames(lapply(assay_names, function(x) c("treatmentid", "sampleid")), assay_names) @@ -93,28 +93,28 @@ convert_MAE_to_PSet <- function(mae, metadata = list() ) - # add batchid column to coldata_ (required for PSet) + # add batchid column to coldata_ (required for PSet) coldata_$batchid <- 1 # Create PharmacoSet object (implicit return) # The molecularProfiles slot is empty because the gDR object does not contain molecular profiles - # As of this version, curation slot is left empty. + # As of this version, curation slot is left empty. PharmacoGx::PharmacoSet2( - name = pset_name, + name = pset_name, treatment = as.data.frame(rowdata_), # nolint sample = as.data.frame(coldata_), # nolint - molecularProfiles = + molecularProfiles = MultiAssayExperiment::MultiAssayExperiment( MultiAssayExperiment::ExperimentList( emptySE = SummarizedExperiment::SummarizedExperiment( assays = list(), colData = coldata_ - ))), - treatmentResponse = tre, - perturbation = list(), - curation = list(sample = data.frame(), - treatment = data.frame(), - tissue = data.frame()), + ))), + treatmentResponse = tre, + perturbation = list(), + curation = list(sample = data.frame(), + treatment = data.frame(), + tissue = data.frame()), datasetType = "sensitivity") } @@ -122,31 +122,31 @@ convert_MAE_to_PSet <- function(mae, #' Extracts an assay from a SummarizedExperiment object or creates a new one if it does not exist #' -#' This function takes a SummarizedExperiment object and an assay name as input. -#' If the specified assay already exists in the SummarizedExperiment object, it is returned. -#' Otherwise, a new assay with the specified name is created and added to the SummarizedExperiment object. -#' The new assay is initialized with NA values. -#' This is useful for when multiple Summarized Experiments in a given MAE do not have the same assays. -#' And it is necessary to have the same assays in all Summarized Experiments in order to convert the MAE to a PSet. +#' This function takes a SummarizedExperiment object and an assay name as input. +#' If the specified assay already exists in the SummarizedExperiment object, it is returned. +#' Otherwise, a new assay with the specified name is created and added to the SummarizedExperiment object. +#' The new assay is initialized with NA values. +#' This is useful for when multiple Summarized Experiments in a given MAE do not have the same assays. +#' And it is necessary to have the same assays in all Summarized Experiments in order to convert the MAE to a PSet. #' #' @param SE A SummarizedExperiment object #' @param assay_name A character string specifying the name of the assay to extract or create -#' +#' #' @keywords internal #' #' @return A SummarizedExperiment object with the specified assay #' -.extract_or_create_assay <- function(SE, +.extract_or_create_assay <- function(SE, assay_name) { checkmate::assert_class(SE, "SummarizedExperiment") checkmate::assert_string(assay_name) if (assay_name %in% SummarizedExperiment::assayNames(SE)) { SE } else { - SummarizedExperiment::assay(SE, assay_name) <- + SummarizedExperiment::assay(SE, assay_name) <- BumpyMatrix::splitAsBumpyMatrix( - S4Vectors::DataFrame(x = rep(NA, prod(dim(SE)))), - row = rownames(SE), + S4Vectors::DataFrame(x = rep(NA, prod(dim(SE)))), + row = rownames(SE), column = colnames(SE) ) SE diff --git a/R/assert_utils.R b/R/assert_utils.R index f7bac1aa..7b18714f 100644 --- a/R/assert_utils.R +++ b/R/assert_utils.R @@ -7,15 +7,14 @@ #' @examples #' td2 <- get_test_Tecan_data() #' is_readable_v(td2$r_files) -#' +#' #' @export #' #' @return \code{NULL} invisibly. -#' +#' is_readable_v <- function(paths) { checkmate::assert_character(paths) missing_path_string <- paste(paths[as.logical(-file.access(paths, 4))], collapse = ", ", sep = " ") message <- paste0("Following path(s) with no read permission found: '", missing_path_string, "'") assertthat::assert_that(sum(file.access(paths, 4)) == 0, msg = message) } - diff --git a/R/correction.R b/R/correction.R index 7d6fbb61..6c133f67 100644 --- a/R/correction.R +++ b/R/correction.R @@ -1,13 +1,13 @@ #' Get Excel sheets #' #' get sheets for given set of XLS files -#' +#' #' @param files charvec with file paths #' @keywords correction_exception #' #' @return named list where names are the excel filenames #' and the values are the sheets within each file -#' +#' get_xl_sheets <- function(files) { checkmate::assert_character(files) template_sheets <- lapply(files, readxl::excel_sheets) @@ -18,23 +18,23 @@ get_xl_sheets <- function(files) { #' Evaluate if template file with single sheet is present, #' if the name of the sheet is correct and if it can be fixed -#' +#' #' get sheets for given set of XLS files #' @param ts list with template sheets info #' @keywords correction_exception -#' +#' #' @return logical flag -#' +#' .check_against_single_template_sheet <- function(ts) { checkmate::assert_list(ts) # edge case: 'untreated' template with (1) single sheet - # and (2) improperly named (not 'idfs[['drug']]) + # and (2) improperly named (not 'idfs[['drug']]) myv <- vapply(ts, function(x) { length(x) == 1 && x != gDRutils::get_env_identifiers("drug") }, logical(1)) - - # one template file with + + # one template file with status <- any(myv) && !all(myv) if (isTRUE(status)) { attributes(status) <- list(file = names(myv[myv])) @@ -46,12 +46,12 @@ get_xl_sheets <- function(files) { ### XLS templates validation #' Correct names of the template sheets (if required) -#' +#' #' Correct names of the template sheets (if required) -#' +#' #' @param tfiles charvec with paths to template files #' @keywords correction_exception -#' +#' #' @return charvec with paths to corrected sheet names correct_template_sheets <- function(tfiles) { @@ -61,12 +61,12 @@ correct_template_sheets <- function(tfiles) { # no issues with templates sheets, return input data if (!are_template_sheets_valid(ts)) { # iterate through available fixes to try to fix data - # spaces/bad capitalization + # spaces/bad capitalization ts <- fix_typos_with_reference(ts, unlist(gDRutils::get_env_identifiers())) - + # additional prefixes/postfixes in the optional sheets (e.g. "Concentration_2_3", "my_gnumber_2)" ts <- fix_typos_with_reference(ts, get_expected_template_sheets("optional"), method = "grepl") - + # additional prefixes/postfixes in the optional sheets and/or missing underscores ts <- fix_typos_with_reference( @@ -80,8 +80,8 @@ correct_template_sheets <- function(tfiles) { ts, unlist(gDRutils::get_env_identifiers(gDRutils::get_required_identifiers(), simplify = FALSE)), method = "adist") - - # check if there is a template file with single, improperly named sheet + + # check if there is a template file with single, improperly named sheet # fix if possible st1 <- .check_against_single_template_sheet(ts) if (st1) { @@ -99,14 +99,14 @@ correct_template_sheets <- function(tfiles) { #' #' @param type charvec type of the sheets #' @keywords correction_exception -#' +#' #' @return string with type of the sheets -#' +#' get_expected_template_sheets <- function(type = c("all", "core", "optional")) { type <- match.arg(type) - + ctype <- as.character(gDRutils::get_env_identifiers(c("drug", "concentration"), simplify = FALSE)) @@ -117,7 +117,7 @@ get_expected_template_sheets <- "concentration3"), simplify = FALSE )) - + if (type == "all") { c(ctype, otype) } else if (type == "core") { @@ -130,14 +130,14 @@ get_expected_template_sheets <- } #' are template sheet valid? -#' +#' #' are template sheet valid? -#' +#' #' @param ts list with (per file) template sheets #' @keywords correction_exception -#' +#' #' @seealso get_xl_sheets -#' +#' #' @return logical flag are_template_sheets_valid <- function(ts) { @@ -154,13 +154,13 @@ are_template_sheets_valid <- function(ts) { myv[!myv] <- drug %in% ts[!myv][[1]] } cl[[length(cl) + 1]] <- all(myv) - + # at least idfs[['drug']] sheet is present in all files myv2 <- vapply(ts, function(x) { any(gDRutils::get_env_identifiers("drug") %in% x) }, logical(1)) cl[[length(cl) + 1]] <- all(myv2) - + all(unlist(cl)) } @@ -168,34 +168,34 @@ are_template_sheets_valid <- function(ts) { ### TYPOS ### #' Fix typos using reference data -#' +#' #' Fix typos using reference data -#' Evaluate given list of ids and try to update them +#' Evaluate given list of ids and try to update them # if they are similar to any id from 'ref' data -#' +#' #' @param data list of charvec(s) or charvec with data #' @param ref charvec with reference data #' @param method charvec type of the method to be used -#' 'exact' is used to find identical entries from 'ref' in the +#' 'exact' is used to find identical entries from 'ref' in the #' data (after corrections and uppercase'ing) -#' 'grepl' is used to find entries from 'ref' that might be +#' 'grepl' is used to find entries from 'ref' that might be #' somehow pre- or post- fixed -#' @param fix_underscores logical flag fix the issues with underscores in data identfiers? +#' @param fix_underscores logical flag fix the issues with underscores in data identfiers? #' @keywords correction_exception -#' +#' #' @return list or charvec with corrected data -#' +#' fix_typos_with_reference <- function(data, ref, method = c("exact", "grepl", "adist"), fix_underscores = FALSE) { - + stopifnot(is.list(data) || is.character(data)) checkmate::assert_character(ref) method <- match.arg(method) checkmate::assert_flag(fix_underscores) - + out <- if (is.list(data)) { lapply(data, function(x) { fix_typos_with_reference(x, ref, method, fix_underscores) @@ -203,10 +203,10 @@ fix_typos_with_reference <- } else { # remove spaces at the beginning/end cdata <- vapply(data, function(x) gsub("^ +| +$", "", x), character(1)) - + # replace spaces with "_" cdata <- vapply(cdata, function(x) gsub(" +", "_", x), character(1)) - + # update to valid identifier (if found) # convert both v and valid identifiers to upper case before comparison # remove "_" from references if 'fix_underscores' enabled @@ -215,7 +215,7 @@ fix_typos_with_reference <- } else { toupper(ref) } - + cdata <- vapply(cdata, function(x) { idx <- if (method == "exact") { which(ref_uc %in% toupper(x)) @@ -236,20 +236,20 @@ fix_typos_with_reference <- } #' grep wrapper to support multiple patterns -#' +#' #' @param patterns charvec with patterns to be checked #' @param x charvec with data #' @param do_unlist logical_flag unlist the final results? #' @param ... additional argument #' @keywords correction_exception -#' +#' #' @return list of charvec with grep output -#' +#' mgrepl <- function(patterns, x, do_unlist = TRUE, ...) { checkmate::assert_character(patterns) checkmate::assert_character(x) checkmate::assert_flag(do_unlist) - + out <- lapply(patterns, function(p) { grepl(p, x, ...) }) @@ -257,4 +257,3 @@ mgrepl <- function(patterns, x, do_unlist = TRUE, ...) { unlist(out) } } - diff --git a/R/exceptions.R b/R/exceptions.R index c01684f4..f0334dd6 100644 --- a/R/exceptions.R +++ b/R/exceptions.R @@ -6,12 +6,12 @@ #' get_exception_data(1) #' get_exception_data() #' @keywords correction_exception -#' +#' #' @return A data.table row with exception data or all exceptions #' @export get_exception_data <- function(status_code = NULL) { checkmate::assert_number(status_code, null.ok = TRUE) - + #nolint start exception_table <- tibble::tribble( ~status_code, ~title, ~sprintf_text, ~type, ~input_type, @@ -54,7 +54,7 @@ get_exception_data <- function(status_code = NULL) { "37", "Raw Data header", "Invalid header in the result file: (%s)", "error", "raw data", ) #nolint end - + res <- if (!is.null(status_code)) { checkmate::assert_choice(toString(status_code), exception_table$status_code) @@ -62,6 +62,6 @@ get_exception_data <- function(status_code = NULL) { } else { exception_table } - + data.table::as.data.table(res) } diff --git a/R/gDRimport-package.R b/R/gDRimport-package.R index 8b155260..366e9602 100644 --- a/R/gDRimport-package.R +++ b/R/gDRimport-package.R @@ -1,4 +1,4 @@ -#' @note To learn more about functions start with `help(package = "gDRimport")` +#' @note To learn more about functions start with `help(package = "gDRimport")` #' @keywords internal #' @return package help page "_PACKAGE" diff --git a/R/gdr_test_data_class.R b/R/gdr_test_data_class.R index 09875203..e3adc576 100644 --- a/R/gdr_test_data_class.R +++ b/R/gdr_test_data_class.R @@ -1,22 +1,22 @@ -#' gDR Test Data object -#' +#' gDR Test Data object +#' #' Object class `gdr_test_data` is build by function [gDRimport::get_test_data()] -#' +#' #' @slot manifest_path character, path to manifest file #' @slot result_path character, path(s) to results file #' @slot template_path character, path(s) to data.table with template data #' @slot ref_m_df character, data.table with manifest data -#' @slot ref_r1_r2 character, path to reference file with raw data for treated & untreated +#' @slot ref_r1_r2 character, path to reference file with raw data for treated & untreated #' @slot ref_r1 character, path to reference file with raw data for treated #' @slot ref_t1_t2 character, path to reference template file with treated & untreated data #' @slot ref_t1 character, path to reference template file with treated data -#' +#' #' @return object class `gdr_test_data` with primary test data -#' +#' #' @docType class #' @name gdr_test_data-class #' @keywords classes test_data_class -#' +#' #' @export setClass( Class = "gdr_test_data", @@ -41,20 +41,20 @@ setMethod("show", "gdr_test_data", ) #' Method manifest_path -#' +#' #' Method for object gdr_test_data - access to slot `manifest_path` #' @param x object class gdr_test_data #' #' @return value of slot `manifest_path` -#' +#' #' @examples #' td <- get_test_data() #' manifest_file_path <- manifest_path(td) -#' +#' #' @docType methods #' @rdname manifest_path-method #' @keywords methods test_data_class -#' +#' #' @export setGeneric("manifest_path", function(x) standardGeneric("manifest_path")) @@ -67,17 +67,17 @@ setMethod("manifest_path", "gdr_test_data", function(x) x@manifest_path) #' #' Method for object gdr_test_data - access to slot `result_path` #' @param x object class gdr_test_data -#' +#' #' @return value of slot `result_path` -#' +#' #' @examples #' td <- get_test_data() #' result_file_path <- result_path(td) -#' +#' #' @docType methods #' @rdname result_path-method #' @keywords methods test_data_class -#' +#' #' @export setGeneric("result_path", function(x) standardGeneric("result_path")) @@ -93,15 +93,15 @@ setMethod("result_path", "gdr_test_data", function(x) x@result_path) #' @keywords test_data_class #' #' @return value of slot `template_path` -#' +#' #' @examples #' td <- get_test_data() #' template_file_path <- template_path(td) -#' +#' #' @docType methods #' @rdname template_path-method #' @keywords methods test_data_class -#' +#' #' @export setGeneric("template_path", function(x) standardGeneric("template_path")) diff --git a/R/load_files.R b/R/load_files.R index cdad8513..fce5b22e 100755 --- a/R/load_files.R +++ b/R/load_files.R @@ -153,7 +153,7 @@ read_in_manifest_file <- function(manifest_file, available_formats) { } else if (manifest_ext %in% c("text/tsv", "text/tab-separated-values", "tsv")) { - + df <- tryCatch({ stats::na.omit(data.table::fread( x, sep = "\t", header = TRUE, na.strings = c("", "NA"))) @@ -265,11 +265,11 @@ load_templates <- function(df_template_files) { #' load_results <- function(df_results_files, instrument = "EnVision", headers = gDRutils::get_env_identifiers()) { - + stopifnot(any(inherits(df_results_files, "data.table"), checkmate::test_character(df_results_files))) - checkmate::assert_string(instrument, pattern = "^EnVision$|^long_tsv$|^Tecan$|^EnVision_new$|^Incucyte$") + checkmate::assert_string(instrument, pattern = "^EnVision$|^long_tsv$|^Tecan$|^EnVision_new$|^Incucyte$") checkmate::assert_list(headers, null.ok = TRUE) - + if (data.table::is.data.table(df_results_files)) { # for the shiny app results_file <- df_results_files$datapath @@ -279,11 +279,11 @@ load_results <- results_filename <- basename(results_file) } checkmate::assert_file_exists(results_file) - + if (all(endsWith(results_file, ".tsv"))) { instrument <- "long_tsv" } - + if (instrument == "EnVision") { all_results <- load_results_EnVision(results_file, headers = headers) @@ -683,7 +683,7 @@ load_results_tsv <- results_filename <- basename(results_file) all_results <- read_in_result_files(results_file, results_filename, headers) - + cols_subset <- intersect(c(headers[["barcode"]], gDRutils::get_env_identifiers("well_position")), names(all_results)) @@ -737,7 +737,7 @@ read_in_result_files <- function(results_file, results_filename, headers) { futile.logger::flog.error("%s needs to be a column of %s", coln, results_filename[iF]) } } - + cols_subset <- intersect(c(headers[["barcode"]], gDRutils::get_env_identifiers("well_position")), names(df)) if (dim(unique(df[, cols_subset, with = FALSE]))[1] != @@ -788,7 +788,7 @@ load_results_EnVision <- df <- read_EnVision_xlsx(results_file[[iF]], iS) } - barcode_col <- grep(paste0(headers[["barcode"]], collapse = "|"), df)[1] + barcode_col <- grep(paste(headers[["barcode"]], collapse = "|"), df)[1] if (isEdited) { # get the expected plate size @@ -824,7 +824,7 @@ load_results_EnVision <- #' Load results from EnVision_new (CSV and XLSX) #' #' This functions loads and checks the results file(s) from a new Envision instrument -#' in the CSV or XLSX format. Supports multiple plates in a single file or multiple +#' in the CSV or XLSX format. Supports multiple plates in a single file or multiple #' sheets in an Excel file by robustly checking the file structure. #' #' @param results_file character, file path(s) to result file(s) @@ -834,22 +834,22 @@ load_results_EnVision <- #' @return data.table with results data #' load_results_EnVision_new <- function(results_file, headers = gDRutils::get_env_identifiers()) { - + checkmate::assert_character(results_file) checkmate::assert_list(headers, null.ok = TRUE) - + all_results <- data.table::data.table() - + for (iF in seq_along(results_file)) { current_file <- results_file[iF] is_excel <- grepl("\\.xlsx?$", current_file, ignore.case = TRUE) - + lines_list <- list() - + if (is_excel) { futile.logger::flog.info("Reading EnVision_new Excel file %s", current_file) sheets <- readxl::excel_sheets(current_file) - + for (sheet in sheets) { dt <- read_excel_to_dt(current_file, sheet = sheet, col_names = FALSE) dt_char <- dt[, lapply(.SD, function(x) { @@ -863,19 +863,19 @@ load_results_EnVision_new <- function(results_file, headers = gDRutils::get_env_ futile.logger::flog.info("Reading EnVision_new CSV file %s", current_file) lines_list[["csv"]] <- readLines(current_file, warn = FALSE) } - + for (sheet_name in names(lines_list)) { lines <- lines_list[[sheet_name]] - + if (is_excel) { futile.logger::flog.info("Processing sheet '%s' from %s", sheet_name, current_file) } - + clean_lines <- gsub("^\"|\"$", "", lines) clean_lines <- gsub("\"", "", clean_lines) - + data_header_idx <- grep("^[;, \t]*1[;, \t]+2[;, \t]+3", clean_lines) - + if (length(data_header_idx) == 0) { if (is_excel) { futile.logger::flog.warn("Could not find data matrix header in file: %s, sheet: '%s'. Skipping.", @@ -885,18 +885,18 @@ load_results_EnVision_new <- function(results_file, headers = gDRutils::get_env_ stop(sprintf("Could not find data matrix header (e.g., ';1;2;3...') in file: %s", current_file)) } } - + for (idx in seq_along(data_header_idx)) { data_start_line <- data_header_idx[idx] - + if (data_start_line > 1 && grepl("Plate Map", clean_lines[data_start_line - 1], ignore.case = TRUE)) { futile.logger::flog.info("Skipping 'Plate Map' matrix at line %d in file '%s'", data_start_line, current_file) next } - + barcode <- NA search_limit <- max(1, data_start_line - 15) - + for (r in seq(data_start_line - 1, search_limit, by = -1)) { if (grepl("^Plate Barcode[;,]Loop", clean_lines[r], ignore.case = TRUE)) { barcode_line <- clean_lines[r + 1] @@ -904,7 +904,7 @@ load_results_EnVision_new <- function(results_file, headers = gDRutils::get_env_ break } } - + if (is.na(barcode) || barcode == "") { for (r in seq(data_start_line - 1, search_limit, by = -1)) { if (grepl("^Plate Barcode[;,]", clean_lines[r], ignore.case = TRUE)) { @@ -917,14 +917,14 @@ load_results_EnVision_new <- function(results_file, headers = gDRutils::get_env_ } } } - + if (is.na(barcode) || barcode == "") { futile.logger::flog.info("Skipping matrix at line %d in file '%s': no associated 'Plate Barcode' found within 15 lines.", data_start_line, current_file) next } - + n_rows <- 0 for (r in (data_start_line + 1):length(clean_lines)) { if (grepl("^[A-Za-z]+[;,]", clean_lines[r])) { @@ -933,72 +933,72 @@ load_results_EnVision_new <- function(results_file, headers = gDRutils::get_env_ break } } - - if (n_rows == 0) n_rows <- 16 - + + if (n_rows == 0) n_rows <- 16 + data_lines <- lines[data_start_line:(data_start_line + n_rows)] - + tryCatch({ raw_data <- data.table::fread( - text = data_lines, + text = data_lines, header = TRUE, colClasses = "character", blank.lines.skip = FALSE ) }, error = function(e) { - exception_data <- get_exception_data(21) + exception_data <- get_exception_data(21) stop(sprintf(exception_data$sprintf_text, current_file)) }) - + data.table::setnames(raw_data, old = names(raw_data)[1], new = "WellRow") raw_data <- raw_data[, lapply(.SD, as.character)] - + melted_data <- data.table::melt( raw_data, id.vars = "WellRow", variable.name = "WellColumn", value.name = "ReadoutValue" ) - + melted_data[, WellColumn := gsub("^\"|\"$", "", WellColumn)] invalid_cols <- !grepl("^[0-9]+$", melted_data$WellColumn) - melted_data <- melted_data[!invalid_cols] - + melted_data <- melted_data[!invalid_cols] + melted_data[, WellColumn := as.integer(WellColumn)] - + melted_data[, (headers[["barcode"]]) := barcode] - melted_data[, BackgroundValue := 0] - + melted_data[, BackgroundValue := 0] + melted_data[, ReadoutValue := trimws(ReadoutValue)] - - is_empty_or_na <- is.na(melted_data$ReadoutValue) | - melted_data$ReadoutValue == "" | + + is_empty_or_na <- is.na(melted_data$ReadoutValue) | + melted_data$ReadoutValue == "" | toupper(melted_data$ReadoutValue) %in% c("NA", "NAN", "INF", "-INF") - + num_regex <- "^[-+]?([0-9]+(\\.[0-9]*)?|\\.[0-9]+)([eE][-+]?[0-9]+)?$" valid_num_idx <- grepl(num_regex, melted_data$ReadoutValue) - + invalid_idx <- !valid_num_idx & !is_empty_or_na - + if (any(invalid_idx)) { futile.logger::flog.warn("Non-numeric readout values found and coerced to NA in plate %s of %s", barcode, current_file) melted_data[invalid_idx, ReadoutValue := NA_character_] } - + melted_data[is_empty_or_na, ReadoutValue := NA_character_] melted_data[, ReadoutValue := as.numeric(ReadoutValue)] - + futile.logger::flog.info("Plate %s read; %d wells", barcode, nrow(melted_data)) - + all_results <- rbind(all_results, melted_data) } } } - + std_cols <- c(headers[["barcode"]], "WellRow", "WellColumn", "ReadoutValue", "BackgroundValue") data.table::setcolorder(all_results, intersect(std_cols, names(all_results))) - + return(unique(all_results)) } @@ -1358,15 +1358,15 @@ read_in_results_Tecan <- function(results_file, results_sheets, headers) { load_results_Incucyte <- function(results_file, headers = gDRutils::get_env_identifiers()) { - + # identifiers bcode_name <- headers$barcode[1] bcode_names <- c(bcode_name, paste0(bcode_name, ":")) dur_name <- headers$duration - + # Use lapply instead of for loop for better performance and idiomatic R style all_data_list <- lapply(results_file, function(iP) { - + header_dt <- if (grepl(".xlsx$", iP)) { tryCatch({ read_excel_to_dt(iP, n_max = 20) @@ -1382,48 +1382,48 @@ load_results_Incucyte <- stop(sprintf(exception_data$sprintf_text, iP)) }) } - + dstart_idx <- .find_header(header_dt, "Date Time", "missing 'Date Time' column") - barcode_idx <- .find_header(header_dt, bcode_names, sprintf("missing '%s' column", bcode_name)) + barcode_idx <- .find_header(header_dt, bcode_names, sprintf("missing '%s' column", bcode_name)) barcode <- header_dt[barcode_idx, 2][[1]] - + dt_input <- if (grepl(".xlsx$", iP)) { read_excel_to_dt(iP, skip = dstart_idx) } else { data.table::fread(iP, skip = dstart_idx, header = TRUE) } - + dt_input <- data.table::melt( dt_input[, -1], id.vars = 1, variable.name = "Well", value.name = "ReadoutValue" ) - + dt_input[[bcode_name]] <- barcode - + return(dt_input) }) - + all_data <- data.table::rbindlist(all_data_list) - + all_data[, (dur_name) := as.numeric(Elapsed)] all_data[, ReadoutValue := as.numeric(ReadoutValue)] all_data <- all_data[!is.na(get(dur_name)) & !is.na(ReadoutValue)] - + well_rname <- headers$well_position[1] well_cname <- headers$well_position[2] all_data[[well_rname]] <- gsub("([A-Za-z]+).*", "\\1", all_data$Well) all_data[[well_cname]] <- gsub("[A-Za-z]+(.*)", "\\1", all_data$Well) - + # Cleanup intermediate columns cols_to_remove <- c("Well", "Elapsed") all_data[, (cols_to_remove) := NULL] - + return(all_data) } - + #' check_metadata_names #' #' Check whether all metadata names are correct @@ -1686,7 +1686,7 @@ read_in_EnVision_file <- function(file, nrows, seps) { results.list <- list() current.line <- 1 - + while (length(line <- readLines(con, n = 1, warn = FALSE)) > 0 && current.line < nrows) { cleaned.line <- gsub("=\"([^\"]*)\"", "\\1", line) results.list[[current.line]] <- cleaned.line diff --git a/R/prism_to_gdrDF.R b/R/prism_to_gdrDF.R index 6fd9dc1e..256e1278 100644 --- a/R/prism_to_gdrDF.R +++ b/R/prism_to_gdrDF.R @@ -7,7 +7,7 @@ #' #' @return \code{data.table} object with input data for gDR pipeline #' @keywords prism_conversion -#' +#' #' @examples #' prism_data <- system.file("testdata/prism_sa.csv", package = "gDRimport") #' prism_meta <- system.file("testdata/prism_model.csv", package = "gDRimport") @@ -17,19 +17,19 @@ convert_LEVEL5_prism_to_gDR_input <- function(prism_data_path, meta_data_path, readout_min = 1.03) { - + checkmate::check_file_exists(prism_data_path) gDRutils::reset_env_identifiers() idfs <- gDRutils::get_env_identifiers() - + data <- data.table::fread(prism_data_path) meta <- data.table::fread(meta_data_path) - + checkmate::assert_names(names(meta), must.include = c("ModelID", "CCLEName", "OncotreeLineage")) - + # Define the mapping for old column names column_mappings <- list( LFC_cb = c("LFC_cb", "LFC.cb", "LFC", "l2fc"), @@ -38,7 +38,7 @@ convert_LEVEL5_prism_to_gDR_input <- function(prism_data_path, pert2_iname = c("pert2_name", "pert2_iname", "drug2", "compound2"), pert2_dose = c("pert2_dose", "concentration2", "dose2") ) - + # Rename columns based on mapping if default column is missing for (col in names(column_mappings)) { if (!col %in% names(data)) { @@ -50,47 +50,47 @@ convert_LEVEL5_prism_to_gDR_input <- function(prism_data_path, } } } - + if (!"ccle_name" %in% names(data)) { data$ccle_name <- meta$CCLEName[match(data$depmap_id, meta$ModelID)] } - + checkmate::assert_names(names(data), must.include = c("ccle_name", "pert_iname", "pert_dose", "pert_time", "LFC_cb")) - + data <- data[data$ccle_name != "", ] - + if ("pert2_iname" %in% names(data) && "pert2_dose" %in% names(data)) { data[, (idfs$drug) := pert_iname] data[, (idfs$concentration) := as.numeric(pert_dose)] - + data[, (idfs$drug2) := pert2_iname] data[, (idfs$concentration2) := as.numeric(pert2_dose)] - + } else { if (any(grepl("\\|", data$pert_iname))) { separator <- "|" } else { separator <- "_" } - - data[, unlist(idfs[c("drug", "drug2")]) := + + data[, unlist(idfs[c("drug", "drug2")]) := data.table::tstrsplit(data$pert_iname, separator, fixed = TRUE)] - - data[, unlist(idfs[c("concentration", "concentration2")]) := + + data[, unlist(idfs[c("concentration", "concentration2")]) := data.table::tstrsplit(data$pert_dose, separator, fixed = TRUE, type.convert = TRUE)] } - + data <- meta[, .SD, .SDcols = c("ModelID", "CCLEName", "OncotreeLineage")][data, on = .(CCLEName = ccle_name)] - + data[, unlist(idfs[c("cellline_parental_identifier", "cellline_subtype", "cellline_ref_div_time")]) := list("unknown", "unknown", as.numeric(NA))] - + raw_data <- data.table::data.table(clid = data$CCLEName, CellLineName = data$CCLEName, Tissue = ifelse(is.na(data$OncotreeLineage), @@ -122,7 +122,7 @@ convert_LEVEL5_prism_to_gDR_input <- function(prism_data_path, all(raw_data[[idfs$concentration]] == raw_data[[idfs$concentration2]])) { raw_data[, unlist(idfs[c("drug2", "concentration2")]) := NULL] } - + # control data dt_ctrl <- data.table::data.table(clid = unique(raw_data$clid), Gnumber = gDRutils::get_env_identifiers("untreated_tag")[1], @@ -138,7 +138,7 @@ convert_LEVEL5_prism_to_gDR_input <- function(prism_data_path, unique(raw_data[, unlist(idfs[c("cellline", "cellline_name", "cellline_tissue", "cellline_parental_identifier", "cellline_subtype", "cellline_ref_div_time")]), with = FALSE]), all.x = TRUE) - + # rename columns of control data data.table::setnames(dt_ctrl, c("clid", "Duration", "Gnumber", "Gnumber_2", "Concentration", "Concentration_2", "masked"), @@ -165,9 +165,9 @@ convert_LEVEL5_prism_to_gDR_input <- function(prism_data_path, #' @param readout_min minimum ReadoutValue #' #' @return \code{data.table} object with input data for gDR pipeline -#' +#' #' @keywords prism_conversion -#' +#' #' @examples #' prism_data_path <- system.file("testdata/prism_collapsed_LOGFC.csv", package = "gDRimport") #' cell_line_data_path <- system.file("testdata/prism_cell_lines.csv", package = "gDRimport") @@ -181,21 +181,21 @@ convert_LEVEL6_prism_to_gDR_input <- function(prism_data_path, treatment_data_path, meta_data_path, readout_min = 1.03) { - + checkmate::assert_file_exists(prism_data_path) checkmate::assert_file_exists(cell_line_data_path) - + checkmate::assert_file_exists(treatment_data_path) checkmate::assert_file_exists(meta_data_path) - + gDRutils::reset_env_identifiers() idfs <- gDRutils::get_env_identifiers() - + cell_lines <- data.table::fread(cell_line_data_path) treatment <- data.table::fread(treatment_data_path) res <- data.table::fread(prism_data_path) meta <- data.table::fread(meta_data_path) - + checkmate::assert_names(names(meta), must.include = c("ModelID", "CCLEName", @@ -210,24 +210,24 @@ convert_LEVEL6_prism_to_gDR_input <- function(prism_data_path, c("profile_id", "SampleID", "CompoundName", "GeneSymbolOfTargets"), c("column_name", "column_name", "name", "moa"), skip_absent = TRUE) - + checkmate::assert_names(names(cell_lines), must.include = "row_name") checkmate::assert_names(names(treatment), must.include = "column_name") - + if (!"LFC" %in% names(res)) { checkmate::assert_names(names(res), must.include = "V1") res <- data.table::melt(res, id.vars = "V1") data.table::setnames(res, c("V1", "variable"), c("row_name", "column_name")) - + if (all(grepl("::", res$column_name)) && !"dose" %in% names(treatment)) { res[, c("column_name", "dose") := data.table::tstrsplit(column_name, "::", keep = c(1, 2))] } } else { data.table::setnames(res, c("row_id", "profile_id"), c("row_name", "column_name")) } - + # add meta data to cell_line cell_lines <- meta[, .SD, .SDcols = c("ModelID", "CCLEName", @@ -236,14 +236,14 @@ convert_LEVEL6_prism_to_gDR_input <- function(prism_data_path, list("unknown", "unknown", as.numeric(NA))] cell_lines[, (idfs[["cellline_name"]]) := CCLEName] cell_lines[, OncotreeLineage := data.table::fcoalesce(OncotreeLineage, "unknown")] - # rename cell_lines + # rename cell_lines data.table::setnames(cell_lines, c("CCLEName", "OncotreeLineage"), unlist(idfs[c("cellline", "cellline_tissue")])) - + # merge results with cell_line data res$row_name <- gsub("::.*", "", res$row_name) - + full_data <- merge(res, unique(cell_lines[, c("ModelID", unlist(idfs[c("cellline", @@ -255,8 +255,8 @@ convert_LEVEL6_prism_to_gDR_input <- function(prism_data_path, all.x = TRUE, by.x = "row_name", by.y = "ModelID") - - + + # merge results with treatment data full_data <- merge(full_data, unique(treatment[, intersect(names(treatment), @@ -271,7 +271,7 @@ convert_LEVEL6_prism_to_gDR_input <- function(prism_data_path, } full_data$value <- pmin(readout_min, 2 ^ full_data[[value_col]]) full_data <- full_data[!(is.na(name) | is.na(value))] - + # data for conc = 0 untrt_tag <- gDRutils::get_env_identifiers("untreated_tag")[1] dt_ctrl <- data.table::data.table(clid = unique(full_data$clid), @@ -287,7 +287,7 @@ convert_LEVEL6_prism_to_gDR_input <- function(prism_data_path, unique(full_data[, unlist(idfs[c("cellline", "cellline_name", "cellline_tissue", "cellline_parental_identifier", "cellline_subtype", "cellline_ref_div_time")]), with = FALSE]), all.x = TRUE) - + # data for treatment ls_col <- intersect(c("clid", "name", "broad_id", "moa", "dose", "value", unlist(idfs[c("cellline", "cellline_name", "cellline_tissue", @@ -302,7 +302,7 @@ convert_LEVEL6_prism_to_gDR_input <- function(prism_data_path, dt_trt$Duration <- NA_real_ dt_trt$masked <- FALSE data.table::setcolorder(dt_trt, neworder = colnames(dt_ctrl)) - + # merge treatment and control merged_data <- rbind(dt_trt, dt_ctrl) data.table::setnames(merged_data, @@ -311,7 +311,7 @@ convert_LEVEL6_prism_to_gDR_input <- function(prism_data_path, unlist(idfs[c("cellline", "drug", "drug_name", "drug_moa", "duration", "concentration", "masked_tag")])) - + # final merged_data } diff --git a/R/pset_to_gdrDF.R b/R/pset_to_gdrDF.R index 269b9d19..0f703570 100644 --- a/R/pset_to_gdrDF.R +++ b/R/pset_to_gdrDF.R @@ -3,22 +3,22 @@ #' @param pharmacoset PharmacoSet object #' @param run_parallel logical, TRUE (default) if to run functions in Parallel, FALSE to run in serial #' @param workers integer, number of workers defaults to 2L if run_parallel is TRUE -#' +#' #' @keywords pset_conversion -#' +#' #' @return data.table of PharmacoSet's dose response data with column names aligned with gDR standard -#' +#' #' @examples #' pset <- suppressMessages(getPSet( -#' "Tavor_2020", +#' "Tavor_2020", #' psetDir = system.file("extdata/pset", package = "gDRimport"), #' use_local_PSets_list = TRUE #' )) #' dt <- convert_pset_to_df(pset) #' gDRutils::reset_env_identifiers() -#' +#' #' @author Jermiah Joseph -- collaboration with BHKLab -#' +#' #' @export convert_pset_to_df <- function(pharmacoset, run_parallel = TRUE, @@ -29,35 +29,35 @@ convert_pset_to_df <- function(pharmacoset, msg = "pharmacoset paramater must inherit from PharmacoSet class.") assertthat::assert_that(is.integer(workers), msg = "workers parameter must be an integer. Default = 2L") - + # FUNCTION TO MANIPULATE ENV IDENTIFIERS setEnvForPSet() - + # GET DOSE AND VIABILITY DATA & MELT INTO LARGE TABLE dose_response <- .extractDoseResponse(pset = pharmacoset) - + # ADD IN DURATION AND REFERENCE DIVISION TIME dose_response_duration_refdivtime <- .createPseudoData(dose_response) - + # REMOVE NEGATIVE VIABILITIES .removeNegatives(dose_response_duration_refdivtime) } #' Adjust environment variables to meet gDR standards -#' +#' #' @examples #' setEnvForPSet() #' gDRutils::reset_env_identifiers() -#' +#' #' @keywords pset_conversion -#' +#' #' @return \code{NULL} -#' +#' #' @export setEnvForPSet <- function() { ## -- Set environment identifiers to map from our columns to gDR columns gDRutils::reset_env_identifiers() - + pgx_to_gdr_ids <- list( cellline = "Clid", cellline_name = "Clid", @@ -67,29 +67,29 @@ setEnvForPSet <- function() { duration = "Duration", barcode = "Barcode" ) - + invisible(Map(function(x, y) gDRutils::set_env_identifier(x, y), names(pgx_to_gdr_ids), pgx_to_gdr_ids)) } -#' Get PharmacoSet -#' +#' Get PharmacoSet +#' #' @param pset_name string with the name of the PharmacoSet #' @param psetDir string with the temporary directory for the PharmacoSet #' @param canonical logical flag indicating if the PSet canonical #' @param timeout maximum number of seconds allowed for PSet download -#' @param use_local_PSets_list logical flag if PSets list should be used from +#' @param use_local_PSets_list logical flag if PSets list should be used from #' local. If FALSE PSets list will be taken from web. -#' +#' #' @keywords pset_conversion -#' -#' @examples +#' +#' @examples #' suppressMessages(getPSet( -#' "Tavor_2020", +#' "Tavor_2020", #' psetDir = system.file("extdata/pset", package = "gDRimport"), #' use_local_PSets_list = TRUE #' )) -#' +#' #' @return PharmacoSet object #' @export getPSet <- function(pset_name, @@ -100,17 +100,17 @@ getPSet <- function(pset_name, assertthat::assert_that(is.character(pset_name), msg = "pset_name parameter must be a character vector.") - + checkmate::assert_character(getwd()) checkmate::assert_flag(canonical) checkmate::assert_numeric(timeout) - + availPSets <- if (use_local_PSets_list) { qs2::qs_read(system.file("extdata", "data_for_unittests", "PSets.qs2", package = "gDRimport")) } else { PharmacoGx::availablePSets(canonical = canonical) - } - + } + pset_name_param <- if (pset_name %in% availPSets$"Dataset Name") { availPSets[availPSets$"Dataset Name" == pset_name, "PSet Name"] } else if (pset_name %in% availPSets$"PSet Name") { @@ -120,8 +120,8 @@ getPSet <- function(pset_name, " does not exist in the available PSets. Try one of the following:\n", paste(availPSets$`PSet Name`, collapse = "\n")) } - - # Check if PSet exists in directories where PSets are stored. + + # Check if PSet exists in directories where PSets are stored. # Read in if exists, download otherwise pset <- if (file.exists(file.path(psetDir, paste0(pset_name_param, ".qs2")))) { message("PSet exists in user-provided directory, reading .qs2 file") @@ -136,18 +136,18 @@ getPSet <- function(pset_name, #' Get dose and viability readouts and melt into large data table #' @keywords internal -#' +#' #' @return data.table with dose-response data -#' +#' .extractDoseResponse <- function(pset) { checkmate::assert_class(pset, "PharmacoSet") - + tre <- pset@treatmentResponse raw_tr <- tre$raw info_dt <- data.table::as.data.table(tre$info, keep.rownames = TRUE) duration <- unique(tre$info$duration.hours) - - # use output of get_env_identifiers() + + # use output of get_env_identifiers() env_ids <- gDRutils::get_env_identifiers() # Determine how many doses there are raw_tr_dims <- dim(raw_tr) @@ -157,19 +157,19 @@ getPSet <- function(pset_name, } else { viability <- data.table::as.data.table(raw_tr[, seq_len(raw_tr_dims[2]), 2], TRUE) viability[[env_ids$untreated_tag[1]]] <- 100 - viability.m <- data.table::melt(viability, + viability.m <- data.table::melt(viability, measure.vars = c(2:length(viability)), variable.name = "Dose", value.name = "ReadoutValue") - + doses <- data.table::as.data.table(raw_tr[, seq_len(raw_tr_dims[2]), 1], TRUE) doses[[env_ids$untreated_tag[1]]] <- 0 - doses.m <- data.table::melt(doses, + doses.m <- data.table::melt(doses, measure.vars = c(2:length(doses)), variable.name = "Dose", value.name = env_ids$concentration) } - # CHECK IF SAME SIZE and MERGE + # CHECK IF SAME SIZE and MERGE if (length(doses.m) == length(viability.m)) { merged_dt <- viability.m[doses.m, on = intersect(names(viability.m), names(doses.m))] @@ -182,7 +182,7 @@ getPSet <- function(pset_name, data.table::setnames(merged_dt, treatment_cols, c(env_ids$cellline, env_ids$drug_name)) merged_dt[Dose == env_ids$untreated_tag[1], env_ids$drug_name := env_ids$untreated_tag[1]] merged_dt[, Dose := NULL] - + if (!is.null(duration)) { merged_dt[, (env_ids$duration) := duration] } @@ -191,17 +191,17 @@ getPSet <- function(pset_name, #' Add in pseudo-data for duration and cell reference division time #' @keywords internal -#' +#' #' @return data.table .createPseudoData <- function(dt) { - + checkmate::assert_data_table(dt) barcode <- gDRutils::get_env_identifiers("barcode")[1] duration <- gDRutils::get_env_identifiers("duration") refDivTime <- gDRutils::get_env_identifiers("cellline_ref_div_time") - - + + if (!duration %in% names(dt)) { dt[, (duration) := NA_real_] } @@ -211,16 +211,16 @@ getPSet <- function(pset_name, if (!barcode %in% names(dt)) { colnames(dt)[1] <- barcode } - + dt } #' Remove negative viabilities #' @keywords internal -#' +#' #' @return data.table with positive values in column `ReadoutValue` -#' +#' .removeNegatives <- function(dataset) { checkmate::assert_data_table(dataset) dataset[dataset$ReadoutValue > 0] diff --git a/R/testdata.R b/R/testdata.R index 0ea50202..024e86b8 100644 --- a/R/testdata.R +++ b/R/testdata.R @@ -1,17 +1,17 @@ #' get primary test data #' -#' @examples +#' @examples #' get_test_data() -#' +#' #' @keywords test_data_class #' @export #' #' @return object class "gdr_test_data" with with input data (manifest/template/result paths) #' and related reference data (qs2 file paths) get_test_data <- function() { - + ddir <- system.file(package = "gDRimport", "extdata", "data1") - + ## define manifest ref data bcode_tbl <- expand.grid(c("201904190", "201904197"), letters[seq_len(6)]) bcode_v <- paste0(bcode_tbl$Var1, bcode_tbl$Var2) @@ -24,7 +24,7 @@ get_test_data <- function() { Template = rep(templates_v, 6), clid = paste0("CL000", clids_n) ) - + new("gdr_test_data", manifest_path = file.path(ddir, "manifest.xlsx"), result_path = @@ -47,7 +47,7 @@ get_test_data <- function() { #' get test Tecan data #' -#' @examples +#' @examples #' get_test_Tecan_data() # #' @keywords test_data @@ -72,7 +72,7 @@ get_test_Tecan_data <- function() { #' get test D300 data #' -#' @examples +#' @examples #' get_test_D300_data() # #' @keywords test_data @@ -104,7 +104,7 @@ get_test_D300_data <- function() { #' get test EnVision data #' -#' @examples +#' @examples #' get_test_EnVision_data() # #' @keywords test_data @@ -126,7 +126,7 @@ get_test_EnVision_data <- function() { #' get test tsv data #' -#' @examples +#' @examples #' get_test_tsv_data() # #' @keywords test_data diff --git a/R/utils.R b/R/utils.R index a10e94d8..d7d94540 100644 --- a/R/utils.R +++ b/R/utils.R @@ -6,11 +6,11 @@ #' @param dictionary a named array #' @keywords utils #' -#' @examples +#' @examples #' standardize_record_values(c("Vehicle", "vehcle")) -#' +#' #' @return a named array with updated names -#' +#' #' @export #' standardize_record_values <- function(x, dictionary = DICTIONARY) { @@ -53,17 +53,17 @@ read_ref_data <- function(inDir, prefix = "ref") { #' @param results_file path to results data #' @keywords utils #' -#' @examples +#' @examples #' td2 <- get_test_Tecan_data() #' detect_file_format(td2$r_files[1]) -#' +#' #' @return string of the detected file format -#' +#' #' @export detect_file_format <- function(results_file) { checkmate::assert_character(results_file) results_data <- rio::import(results_file, fill = TRUE) - + if (all(c("System", "User", "Plate", "Plate-ID (Stacker)") %in% results_data[, 1])) { "Tecan" } else if (any(grepl("Instrument Results from", c(names(results_data)[1], @@ -72,7 +72,7 @@ detect_file_format <- function(results_file) { na.rm = TRUE)) { "EnVision_new" } else if ("Repeat Barcode" %in% c( - do.call(paste, results_data[, 2:3]), + do.call(paste, results_data[, 2:3]), paste(names(results_data[, 2:3]), collapse = " ") ) || any(grepl("Repeat,Barcode", results_data[, 1]))) { "EnVision" diff --git a/inst/scripts/pset_to_gdr_TestScript.R b/inst/scripts/pset_to_gdr_TestScript.R index 965bd7d4..b5488644 100644 --- a/inst/scripts/pset_to_gdr_TestScript.R +++ b/inst/scripts/pset_to_gdr_TestScript.R @@ -1,4 +1,4 @@ -## +## # collaboration with BHKLab # co-author: Jermiah Joseph @@ -30,7 +30,7 @@ gDRutils::convert_se_assay_to_dt(se[[1]],"RawTreated") gDRutils::convert_se_assay_to_dt(se[[1]],"Controls") ########################################################## -# RUNNING EACH STEP SEPARATELY IF NEEDED. +# RUNNING EACH STEP SEPARATELY IF NEEDED. ########################################################## ############################# diff --git a/tests/testthat.R b/tests/testthat.R index ec002672..31130de4 100644 --- a/tests/testthat.R +++ b/tests/testthat.R @@ -2,4 +2,3 @@ library(testthat) library(gDRimport) test_check("gDRimport") - diff --git a/tests/testthat/test-D300.R b/tests/testthat/test-D300.R index 5bf3e703..2c25040e 100644 --- a/tests/testthat/test-D300.R +++ b/tests/testthat/test-D300.R @@ -2,7 +2,7 @@ context("import_doses") td3 <- get_test_D300_data() test_that("parse_D300_xml", { - + # valid output returned for the D300 96 well plate example fs <- td3[["f_96w"]] dose_df <- parse_D300_xml(fs[["d300"]]) @@ -10,7 +10,7 @@ test_that("parse_D300_xml", { data.table::setorder(dose_df, Row, Col, D300_Plate_N) data.table::setorder(ref_dose_df, Row, Col, D300_Plate_N) expect_identical(dose_df, ref_dose_df) - + # valid output returned for the D300 384 well plate example fs2 <- td3[["f_384w"]] dose_df <- parse_D300_xml(fs2[["d300"]]) @@ -18,20 +18,20 @@ test_that("parse_D300_xml", { data.table::setorder(dose_df, Row, Col, D300_Plate_N) data.table::setorder(ref_dose_df, Row, Col, D300_Plate_N) expect_identical(dose_df, ref_dose_df) - + # expected error(s) returned err_msg1 <- "'D300_file' must be a readable path" expect_error(parse_D300_xml("/non/existent_file"), err_msg1) - + }) test_that("parse_D300_metadata_file works as expected", { - + fs <- td3[["f_96w"]] - Gnum_96w_file <- parse_D300_metadata_file(fs$Gnum) + Gnum_96w_file <- parse_D300_metadata_file(fs$Gnum) ref_Gnum_96w_file <- qs2::qs_read(fs$ref_Gnum) expect_equal(Gnum_96w_file, ref_Gnum_96w_file) - + fs2 <- td3[["f_384w"]] Gnum_384w_file <- parse_D300_metadata_file(fs2$Gnum) ref_Gnum_384w_file <- qs2::qs_read(fs2$ref_Gnum) @@ -39,32 +39,32 @@ test_that("parse_D300_metadata_file works as expected", { }) test_that("import_D300 works with standard metadata", { - + on.exit({ lapply(names(td3), function(x) { unlink(td3[[x]][["dest_path"]], recursive = TRUE) }) }) - + #for 96 and 384 well plates for (k in seq_along(td3)) { - + # validate output files for D300 examples fs <- td3[[k]] dest_path <- fs$dest_path ref_path <- fs$ref_output_path D300_file <- fs$d300 Gnum_file <- fs$Gnum - + # create directory if not existing if (!file.exists(dest_path)) { dir.create(dest_path, recursive = TRUE) - } - + } + # run import_D300 import_D300(D300_file, dest_path, Gnum_file) - - # test every output file against reference file + + # test every output file against reference file fs_files <- list.files(path = dest_path) idx <- c(1, length(fs_files)) # test with first and last file for (i in idx) { @@ -72,11 +72,11 @@ test_that("import_D300 works with standard metadata", { ref_file_path <- file.path(ref_path, fs_files[i]) #load sheets output_sheets <- readxl::excel_sheets(output_file_path) - ref_sheets <- readxl::excel_sheets(ref_file_path) - #test sheet names are identical + ref_sheets <- readxl::excel_sheets(ref_file_path) + #test sheet names are identical expect_equal(output_sheets, ref_sheets) #test content of sheets is identical - for (j in seq_len(length(output_sheets))) { + for (j in seq_along(output_sheets)) { output_sheet <- read_excel_to_dt(output_file_path, sheet = output_sheets[[j]], col_names = FALSE) @@ -92,7 +92,7 @@ test_that("import_D300 works with standard metadata", { } }), .SDcols = names(ref_sheet)] expect_equal(output_sheet, ref_sheet) - } + } } } }) @@ -101,7 +101,7 @@ test_that("import_D300 works with standard metadata", { test_that("get_conversion_factor works as expected", { expect_error(get_conversion_factor("nL", "mL"), regexp = "conversion to unit 'mL' not supported") expect_error(get_conversion_factor("L", "µL"), regexp = "unsupported conversion factor: 'L'") - + expect_equal(get_conversion_factor("nL", "µL"), 1e-3) }) @@ -118,7 +118,7 @@ test_that("fill_NA works as expected", { df <- data.table::data.table(a = rep(NA, n), b = seq(n)) obs <- fill_NA(df, "a", "b") expect_equal(obs$a, df$b) - + obs2 <- fill_NA(df, "b", "a") expect_equal(obs2$b, df$b) expect_equal(obs2$a, df$a) diff --git a/tests/testthat/test-assert_utils.R b/tests/testthat/test-assert_utils.R index 2c39227e..ea115370 100644 --- a/tests/testthat/test-assert_utils.R +++ b/tests/testthat/test-assert_utils.R @@ -7,7 +7,7 @@ test_that("assert_utils", { err_msg1 <- "Following path(s) with no read permission found: '/non/existent/file'" expect_error(is_readable_v(c(manifest_path(td1), "/non/existent/file")), err_msg1, fixed = TRUE) - + td2 <- get_test_Tecan_data() expect_true(is_readable_v(td2$m_file)) expect_true(is_readable_v(td2$r_files)) @@ -15,5 +15,5 @@ test_that("assert_utils", { err_msg1 <- "Following path(s) with no read permission found: '/non/existent/file'" expect_error(is_readable_v(c(td2$m_file, "/non/existent/file")), err_msg1, fixed = TRUE) - + }) diff --git a/tests/testthat/test-correction.R b/tests/testthat/test-correction.R index 680ce951..3deb3aca 100644 --- a/tests/testthat/test-correction.R +++ b/tests/testthat/test-correction.R @@ -82,7 +82,7 @@ test_that("correct_template_sheets works as expected", { openxlsx::write.xlsx(tfiles_empty[[x]], file.path(tempdir(), x)) }) correctedList <- correct_template_sheets(file.path(tempdir(), names(tfiles))) - + tfilesCorrect <- list("template1.xlsx" = c("Gnumber", "Concentration", "Media"), "template2.xlsx" = c("Gnumber", "Concentration"), "template3.xlsx" = c("Gnumber", "Concentration", "Gnumber_2", "Concentration_2"), diff --git a/tests/testthat/test-exceptions.R b/tests/testthat/test-exceptions.R index 18d515d1..4b00b9c1 100644 --- a/tests/testthat/test-exceptions.R +++ b/tests/testthat/test-exceptions.R @@ -7,22 +7,22 @@ test_that("check get_exception_data works correctly", { c("status_code", "title", "sprintf_text", "type", "input_type")) expect_equal(obs$sprintf_text, "There were errors loading manifest. Check error message below:\n```\n%s\n```") - + expect_true(all(dim(gDRimport::get_exception_data()) >= 5)) - + obs <- gDRimport::get_exception_data(10) expect_is(obs, "data.table") - + obs <- gDRimport::get_exception_data() expect_gt(NROW(obs), 1) expect_is(obs, "data.table") - + expect_error( gDRimport::get_exception_data("two"), "Assertion on 'status_code' failed: Must be of type 'number'", fixed = TRUE ) - + expect_error( gDRimport::get_exception_data(0), paste0( diff --git a/tests/testthat/test-gDRmae_to_pset.R b/tests/testthat/test-gDRmae_to_pset.R index 150bf746..4e1bfe31 100644 --- a/tests/testthat/test-gDRmae_to_pset.R +++ b/tests/testthat/test-gDRmae_to_pset.R @@ -3,18 +3,18 @@ test_that("convert_MAE_to_PSet works as expected", { n <- 10 rnames <- LETTERS[1:m] cnames <- letters[1:n] - + # Normal matrix. ref_gr_value <- matrix(runif(m * n), nrow = m, ncol = n, dimnames = list(rnames, cnames)) se <- SummarizedExperiment::SummarizedExperiment(assays = list(RefGRvalue = ref_gr_value), rowData = S4Vectors::DataFrame(rnames), colData = S4Vectors::DataFrame(cnames)) - + mae <- MultiAssayExperiment::MultiAssayExperiment(experiments = list("single-agent" = se)) pset <- convert_MAE_to_PSet(mae, "my_pset") expect_equal(class(pset@treatmentResponse)[1], "TreatmentResponseExperiment") - expect_equal(sort(rownames(pset@treatmentResponse)), + expect_equal(sort(rownames(pset@treatmentResponse)), sort(unlist(lapply(MultiAssayExperiment::experiments(mae), rownames), use.names = FALSE))) expect_equal(pset@sample$sampleid, cnames) @@ -22,16 +22,16 @@ test_that("convert_MAE_to_PSet works as expected", { se1 <- SummarizedExperiment::SummarizedExperiment(assays = list(RefGRvalue = ref_gr_value[1:10, ]), rowData = S4Vectors::DataFrame(rnames)[1:10, , drop = FALSE], colData = S4Vectors::DataFrame(cnames)) - + se2 <- SummarizedExperiment::SummarizedExperiment(assays = list(RefGRvalue = ref_gr_value[11:20, ]), rowData = S4Vectors::DataFrame(rnames)[11:20, , drop = FALSE], colData = S4Vectors::DataFrame(cnames)) maeTwoExperiments <- MultiAssayExperiment::MultiAssayExperiment(experiments = list("single-agent" = se1, "matrix" = se2)) - + pset_mae2Exp <- convert_MAE_to_PSet(maeTwoExperiments, "my2exp") expect_equal(class(pset_mae2Exp@treatmentResponse)[1], "TreatmentResponseExperiment") - expect_equal(sort(rownames(pset_mae2Exp@treatmentResponse)), + expect_equal(sort(rownames(pset_mae2Exp@treatmentResponse)), sort(unlist(lapply(MultiAssayExperiment::experiments(maeTwoExperiments), rownames), use.names = FALSE))) expect_equal(pset_mae2Exp@sample$sampleid, cnames) expect_equal(pset_mae2Exp@treatment$treatmentid, diff --git a/tests/testthat/test-load_files.R b/tests/testthat/test-load_files.R index 3c58ad93..f8878b79 100644 --- a/tests/testthat/test-load_files.R +++ b/tests/testthat/test-load_files.R @@ -1,57 +1,57 @@ context("load_files") test_that("load_manifest works as expected", { - + td1 <- get_test_data() - + m_df <- load_manifest(manifest_path(td1)) expect_identical(td1@ref_m_df, m_df$data) - + td2 <- get_test_Tecan_data() - + m_df <- load_manifest(td2$m_file) - ref_m_df <- qs2::qs_read(td2$ref_m_df) + ref_m_df <- qs::qread(td2$ref_m_df) expect_equal(m_df, ref_m_df) - + err_msg1 <- "Assertion on 'manifest_file' failed: File does not exist: '/non/existent_file'." expect_error(load_manifest("/non/existent_file"), err_msg1) - + err_msg2 <- "'manifest_file' must be a character vector" expect_error(load_manifest(c(2, 3)), err_msg2) - + err_msg3 <- "Barcodes in Manifest must be unique!" expect_error(load_manifest(c(manifest_path(td1), manifest_path(td1))), err_msg3) - + }) test_that("load_results works as expected", { - + td1 <- get_test_data() - + headers <- gDRutils::get_env_identifiers() headers$barcode <- headers$barcode[[1]] - + res_tbl <- load_results(df_results_files = c(result_path(td1)), headers = headers) ref_tbl <- data.table::fread(td1@ref_r1_r2) expect_equal(res_tbl, ref_tbl) - + df_results <- data.table::data.table(datapath = result_path(td1), name = basename(result_path(td1))) res_df_tbl <- load_results(df_results, headers = headers) expect_equal(res_df_tbl, ref_tbl) - + res_tbl2 <- load_results(df_results_files = c(result_path(td1)[1]), headers = headers) ref_tbl2 <- data.table::fread(td1@ref_r1) expect_equal(res_tbl2, ref_tbl2) - + td2 <- get_test_Tecan_data() - + res_tbl3 <- load_results(df_results_files = c(td2$r_files), instrument = "Tecan", headers = headers) - ref_tbl3 <- qs2::qs_read(td2$ref_r_df) + ref_tbl3 <- qs::qread(td2$ref_r_df) expect_equal(res_tbl3, ref_tbl3) - + err_msg1 <- "Assertion on 'results_file' failed: File does not exist: '/non/existent_file'." expect_error(load_results(c(result_path(td1)[1], "/non/existent_file")), err_msg1) - + err_msg_2a <- "Assertion on 'instrument' failed: " err_msg2b <- "Must comply to pattern '^EnVision$|^long_tsv$|^Tecan$|^EnVision_new$|^Incucyte$'." err_msg2 <- paste0(err_msg_2a, err_msg2b) @@ -59,23 +59,23 @@ test_that("load_results works as expected", { }) test_that("load_templates works as expected", { - + td1 <- get_test_data() - + t_tbl <- load_templates(df_template_files = c(template_path(td1))) ref_tbl <- data.table::fread(td1@ref_t1_t2, colClasses = "character") expect_equal(t_tbl, ref_tbl) - + df_templates <- data.table::data.table(datapath = template_path(td1), name = basename(template_path(td1))) res_t_tbl <- load_templates(df_templates) expect_equal(res_t_tbl, ref_tbl) - + td2 <- get_test_Tecan_data() - + res_t_tbl3 <- load_templates(df_template_files = c(td2$t_files)) - ref_tbl3 <- .standardize_untreated_values(qs2::qs_read(td2$ref_t_df)) + ref_tbl3 <- .standardize_untreated_values(qs::qread(td2$ref_t_df)) expect_equal(res_t_tbl3, ref_tbl3) - + err_msg1 <- "Assertion on 'template_file' failed: File does not exist: '/non/existent_file'." expect_error(load_templates(c(template_path(td1)[1], "/non/existent_file")), err_msg1) }) @@ -84,65 +84,65 @@ test_that("load_templates with no untreated conditions works as expected", { err_msg <- "No untreated controls were found in the treatment. Please upload the appropriate treatment." expect_error(load_templates(system.file("extdata/data_for_unittests/Template_7daytreated.xlsx", package = "gDRimport")), err_msg) - + }) test_that("load_data works as expected", { - + td1 <- get_test_data() - + l_tbl <- load_data(manifest_path(td1), template_path(td1), result_path(td1)) expect_identical(td1@ref_m_df, l_tbl$manifest) ref_tbl <- .standardize_untreated_values(data.table::fread(td1@ref_t1_t2)) expect_equal(l_tbl$treatments, ref_tbl) ref_tbl <- data.table::fread(td1@ref_r1_r2) expect_equal(l_tbl$data, ref_tbl) - + td2 <- get_test_Tecan_data() - + l_tbl2 <- load_data(td2$m_file, td2$t_files, td2$r_files, instrument = "Tecan") - ref_m_df <- qs2::qs_read(td2$ref_m_df) + ref_m_df <- qs::qread(td2$ref_m_df) expect_equal(ref_m_df$data, l_tbl2$manifest) - ref_t_df <- .standardize_untreated_values(qs2::qs_read(td2$ref_t_df)) + ref_t_df <- .standardize_untreated_values(qs::qread(td2$ref_t_df)) expect_equal(ref_t_df, l_tbl2$treatments) - ref_r_df <- qs2::qs_read(td2$ref_r_df) + ref_r_df <- qs::qread(td2$ref_r_df) expect_equal(l_tbl2$data, ref_r_df) - + td4 <- get_test_EnVision_data() l_tbl4 <- load_data(td4$m_file, td4$t_files, td4$r_files, instrument = "EnVision") - ref_l <- qs2::qs_read(td4$ref_l_path) + ref_l <- qs::qread(td4$ref_l_path) expect_equal(ref_l$manifest, l_tbl4$manifest) expect_equal(ref_l$treatments, l_tbl4$treatments) expect_equal(ref_l$data, l_tbl4$data) - + err_msg1 <- "'manifest_file' must be a readable path" expect_error(load_data("/non/existent_file", template_path(td1), result_path(td1)), err_msg1) - + err_msg2 <- "'manifest_file' must be a character vector" expect_error(load_data(c(2, 3), template_path(td1), result_path(td1)), err_msg2) - + err_msg3 <- "Barcodes in Manifest must be unique!" expect_error(load_manifest(c(manifest_path(td1), manifest_path(td1))), err_msg3) - + err_msg4 <- "Following path(s) with no read permission found: '/non/existent_file'" - expect_error(load_data(manifest_path(td1), c(result_path(td1)[1], "/non/existent_file"), + expect_error(load_data(manifest_path(td1), c(result_path(td1)[1], "/non/existent_file"), result_path(td1)), err_msg4, fixed = TRUE) - + err_msg_5a <- "Assertion on 'instrument' failed: " err_msg5b <- "Must comply to pattern '^EnVision$|^long_tsv$|^Tecan$|^EnVision_new$|^Incucyte$'." err_msg5 <- paste0(err_msg_5a, err_msg5b) - expect_error(load_data(manifest_path(td1), template_path(td1), result_path(td1), "invalid_instrument"), + expect_error(load_data(manifest_path(td1), template_path(td1), result_path(td1), "invalid_instrument"), err_msg5, fixed = TRUE) - + err_msg6 <- "Assertion on 'results_file' failed: File does not exist: '/non/existent_file'." - expect_error(load_data(manifest_path(td1), template_path(td1), c(result_path(td1)[1], "/non/existent_file")), + expect_error(load_data(manifest_path(td1), template_path(td1), c(result_path(td1)[1], "/non/existent_file")), err_msg6) - + td5 <- get_test_tsv_data() l_tbl5 <- load_data(td5$m_file, td5$t_files, td5$r_files) - ref_l <- qs2::qs_read(td5$ref_l_path) + ref_l <- qs::qread(td5$ref_l_path) expect_equal(ref_l$manifest, l_tbl5$manifest) expect_equal(ref_l$treatments, l_tbl5$treatments) expect_equal(ref_l$data, l_tbl5$data) @@ -182,7 +182,7 @@ test_that(".check_file_structure works as expected", { gaps <- min(which(full_rows)[(diff(which(full_rows)) > 20)] + 1, dim(df)[1]) df <- - df[full_rows_index[full_rows_index <= gaps], ] + df[full_rows_index[full_rows_index <= gaps], ] if (ncol(df) < n_col) { df[, (ncol(df) + 1):n_col] <- NA } @@ -193,9 +193,9 @@ test_that(".check_file_structure works as expected", { ref_bckgrd <- 4 readout_offset <- 1 + ref_bckgrd barcode_col <- 3 - expect_null(.check_file_structure(df, basename(results_filename[[iF]]), + expect_null(.check_file_structure(df, basename(results_filename[[iF]]), iS, readout_offset, n_row, n_col, iB, barcode_col)) - + df2 <- read_excel_to_dt(system.file("extdata/data1/RawData_day7.xlsx", package = "gDRimport")) expect_error(.check_file_structure(df2, basename(results_filename[[iF]]), iS, readout_offset, n_row, n_col, iB, barcode_col)) @@ -233,7 +233,7 @@ test_that(".fill_empty_wells works as expected", { test_that(".standardize_untreated_values works as expected", { untreated_tags <- gDRutils::get_env_identifiers("untreated_tag") - + df_test <- data.table::data.table(a = c(untreated_tags[[1]], untreated_tags[[2]], toupper(untreated_tags[[1]]), tolower(untreated_tags[[2]]))) df_corrected <- .standardize_untreated_values(df_test) @@ -241,17 +241,17 @@ test_that(".standardize_untreated_values works as expected", { }) test_that("check_metadata_names works as expected", { - + td1 <- get_test_data() m_file <- manifest_path(td1) m_data <- read_excel_to_dt(m_file) - + result <- check_metadata_names(col_df = colnames(m_data)) expect_equal(result, colnames(m_data)) - + result <- check_metadata_names(col_df = colnames(m_data), m_file, df_type = "manifest") expect_equal(result, colnames(m_data)) - + t_file <- template_path(td1)[[1]] t_data <- correct_template_sheets(t_file) result <- check_metadata_names(col_df = t_data[[1]]) @@ -260,44 +260,44 @@ test_that("check_metadata_names works as expected", { expect_equal(result, t_data[[1]]) result <- check_metadata_names(col_df = t_data[[1]], df_type = "template_treatment") expect_equal(result, t_data[[1]]) - + expect_error(check_metadata_names(col_df = t_data[[1]][-1], df_type = "template_treatment")) expect_error(check_metadata_names(col_df = t_data[[1]][-3], df_type = "template_treatment"), "Treatment file") - + t_data_with_space <- t_data[[1]] t_data_with_space[1] <- "Gnumber " expect_no_error(check_metadata_names(col_df = t_data_with_space)) - + t_data_with_number <- t_data[[1]] t_data_with_number[5] <- "1" expect_error( - check_metadata_names(col_df = t_data_with_number), + check_metadata_names(col_df = t_data_with_number), "cannot contain special characters or start with a number" ) - + t_data_with_duplication <- t_data[[1]] t_data_with_duplication[5] <- "Gnumber" t_data_with_duplication[6] <- "GnumbeR" expect_no_error(check_metadata_names(col_df = t_data_with_duplication)) - + t_data_with_resticted_name <- t_data[[1]] t_data_with_resticted_name[5] <- "Tissue" expect_error(check_metadata_names(col_df = t_data_with_resticted_name), "Metadata field name") - + expect_error(check_metadata_names(col_df = NULL), "Assertion on 'col_df' failed") expect_error(check_metadata_names(col_df = colnames(m_data), df_name = NULL), "Assertion on 'df_name'") expect_error(check_metadata_names(col_df = colnames(m_data), df_type = 5), "Assertion on 'df_type'") - + }) test_that("load_results_Incucyte works as expected", { - + headers <- gDRutils::get_env_identifiers() bcode_name <- headers$barcode[1] d_name <- headers$duration well_rname <- headers$well_position[1] well_cname <- headers$well_position[2] - + file_csv_1_path <- tempfile(fileext = ".csv") file_csv_2_na_path <- tempfile(fileext = ".csv") file_tsv_3_path <- tempfile(fileext = ".tsv") @@ -306,7 +306,7 @@ test_that("load_results_Incucyte works as expected", { file_bad_barcode_path <- tempfile(fileext = ".csv") file_custom_header_path <- tempfile(fileext = ".csv") file_colon_header_path <- tempfile(fileext = ".csv") - + writeLines( c( "Vessel Name: Test Plate 1,,,", @@ -321,7 +321,7 @@ test_that("load_results_Incucyte works as expected", { ), file_csv_1_path ) - + writeLines( c( "Vessel Name: Test Plate 2,,,", @@ -335,7 +335,7 @@ test_that("load_results_Incucyte works as expected", { ), file_csv_2_na_path ) - + writeLines(c( paste("Vessel Name\tTest Plate 3\t\t"), paste("Barcode\tPLATE_003_TSV\t\t"), @@ -345,7 +345,7 @@ test_that("load_results_Incucyte works as expected", { paste("8/18/25 9:02", "6", "510", "610", sep = "\t") ), file_tsv_3_path) - + xlsx_df <- data.frame( V1 = c( "Vessel Name", @@ -361,7 +361,7 @@ test_that("load_results_Incucyte works as expected", { stringsAsFactors = FALSE ) openxlsx::write.xlsx(xlsx_df, file_xlsx_4_path, colNames = FALSE) - + writeLines( c( "Vessel Name: Bad Plate,,", @@ -371,7 +371,7 @@ test_that("load_results_Incucyte works as expected", { ), file_bad_header_path ) - + writeLines( c( "Vessel Name: Bad Barcode Plate,,,", @@ -381,7 +381,7 @@ test_that("load_results_Incucyte works as expected", { ), file_bad_barcode_path ) - + writeLines( c( "Vessel,PLATE_CUSTOM", @@ -392,17 +392,17 @@ test_that("load_results_Incucyte works as expected", { ), file_custom_header_path ) - + writeLines( c( - "Vessel Name: Colon Test,,", - sprintf("%s:,%s", bcode_name, "PLATE_COLON_TEST,"), - "Date Time,Elapsed,A1", + "Vessel Name: Colon Test,,", + sprintf("%s:,%s", bcode_name, "PLATE_COLON_TEST,"), + "Date Time,Elapsed,A1", "8/18/25 3:02,0,999" ), file_colon_header_path ) - + on.exit(unlink( c( file_csv_1_path, @@ -414,54 +414,54 @@ test_that("load_results_Incucyte works as expected", { file_custom_header_path ) )) - + dt_csv <- load_results_Incucyte(file_csv_1_path, headers) - + expect_s3_class(dt_csv, "data.table") expect_equal(nrow(dt_csv), 4) - + expect_true(all( c(bcode_name, d_name, well_rname, well_cname, "ReadoutValue") %in% names(dt_csv) )) expect_false(any(c("Well", "Elapsed") %in% names(dt_csv))) - + expect_equal(unique(dt_csv[[bcode_name]]), "PLATE_001_CSV") expect_equal(sort(unique(dt_csv[[d_name]])), c(0, 6)) expect_equal(sort(dt_csv$ReadoutValue), c(100, 110, 200, 210)) expect_equal(sort(unique(dt_csv[[well_rname]])), "A") expect_equal(sort(unique(dt_csv[[well_cname]])), c("1", "2")) - + dt_tsv <- load_results_Incucyte(file_tsv_3_path, headers) - + expect_s3_class(dt_tsv, "data.table") expect_equal(nrow(dt_tsv), 4) - + expect_equal(unique(dt_tsv[[bcode_name]]), "PLATE_003_TSV") expect_equal(sort(dt_tsv$ReadoutValue), c(500, 510, 600, 610)) expect_equal(sort(unique(dt_tsv[[well_rname]])), "C") expect_equal(sort(unique(dt_tsv[[well_cname]])), c("1", "2")) - + dt_xlsx <- load_results_Incucyte(file_xlsx_4_path, headers) - + expect_s3_class(dt_xlsx, "data.table") expect_equal(nrow(dt_xlsx), 4) - + expect_equal(unique(dt_xlsx[[bcode_name]]), "PLATE_004_XLSX") expect_equal(sort(dt_xlsx$ReadoutValue), c(700, 710, 800, 810)) expect_equal(sort(unique(dt_xlsx[[well_rname]])), "D") expect_equal(sort(unique(dt_xlsx[[well_cname]])), c("1", "2")) - + all_files <- c(file_csv_1_path, file_csv_2_na_path, file_tsv_3_path, file_xlsx_4_path) dt_all <- load_results_Incucyte(all_files, headers) - + expect_s3_class(dt_all, "data.table") - + expect_equal(nrow(dt_all), 17) - + expect_equal( sort(unique(dt_all[[bcode_name]])), c( @@ -471,44 +471,44 @@ test_that("load_results_Incucyte works as expected", { "PLATE_004_XLSX" ) ) - + expect_false(any(is.na(dt_all$ReadoutValue))) expect_false(any(is.na(dt_all[[d_name]]))) - + missing_file <- "/non/existent/file.csv" expect_error( load_results_Incucyte(missing_file, headers), "Error reading /non/existent/file.csv" ) - + expect_error( load_results_Incucyte(file_bad_header_path, headers), "Invalid header in the result file: (missing 'Date Time' column)", fixed = TRUE ) - + expect_error( load_results_Incucyte(file_bad_barcode_path, headers), "Invalid header in the result file: (missing 'Barcode' column)", fixed = TRUE ) - + dt_colon <- load_results_Incucyte(file_colon_header_path, headers) - + expect_s3_class(dt_colon, "data.table") expect_equal(unique(dt_colon[[bcode_name]]), "PLATE_COLON_TEST") expect_equal(dt_colon$ReadoutValue, 999) - + }) test_that("load_results_EnVision_new works as expected", { headers <- gDRutils::get_env_identifiers() bcode_name <- headers$barcode[1] - + file_envision_multi <- tempfile(fileext = ".csv") file_envision_error <- tempfile(fileext = ".csv") file_envision_xlsx <- tempfile(fileext = ".xlsx") - + writeLines( c( "Instrument Results from;;;;;;", @@ -526,7 +526,7 @@ test_that("load_results_EnVision_new works as expected", { "Measurement Information;;;;;;", "Measurement GUID;;747f1a39-507e-4049;;;;", "Result of Fluorescence Intensity Filter 1;;;;;;", - "Plate Barcode;Loop;Repeat;Point X;Point Y;Exc WL[nm];", + "Plate Barcode;Loop;Repeat;Point X;Point Y;Exc WL[nm];", "P02;;1;;;485 / 20;", ";;;;;;", ";1;2;3", @@ -542,7 +542,7 @@ test_that("load_results_EnVision_new works as expected", { ), file_envision_multi ) - + writeLines( c( "Instrument Results from;;;;;;", @@ -550,7 +550,7 @@ test_that("load_results_EnVision_new works as expected", { ), file_envision_error ) - + xlsx_df <- data.frame( V1 = c("Instrument Results from", "Protocol Name", NA, "Result of Fluorescence", "Plate Barcode", "P03", NA, NA, "A", "B", "C"), @@ -562,44 +562,44 @@ test_that("load_results_EnVision_new works as expected", { stringsAsFactors = FALSE ) openxlsx::write.xlsx(xlsx_df, file_envision_xlsx, colNames = FALSE) - + on.exit(unlink(c(file_envision_multi, file_envision_error, file_envision_xlsx))) - + dt_multi <- expect_warning( load_results_EnVision_new(file_envision_multi, headers), NA ) - + expect_s3_class(dt_multi, "data.table") expect_equal(nrow(dt_multi), 18) - + expect_equal(unique(dt_multi[[bcode_name]]), c("P01", "P02")) - + expect_true(all(c(bcode_name, "WellRow", "WellColumn", "ReadoutValue", "BackgroundValue") %in% names(dt_multi))) - + plate1 <- dt_multi[dt_multi[[bcode_name]] == "P01", ] expect_equal(plate1[WellRow == "A" & WellColumn == 1, ReadoutValue], 10) expect_true(is.na(plate1[WellRow == "B" & WellColumn == 2, ReadoutValue])) expect_true(is.na(plate1[WellRow == "C" & WellColumn == 2, ReadoutValue])) - + plate2 <- dt_multi[dt_multi[[bcode_name]] == "P02", ] expect_false(any(is.na(plate2$ReadoutValue))) expect_equal(plate2[WellRow == "A" & WellColumn == 1, ReadoutValue], 11) expect_equal(plate2[WellRow == "C" & WellColumn == 3, ReadoutValue], 91) - + dt_xlsx <- expect_warning( load_results_EnVision_new(file_envision_xlsx, headers), NA ) - + expect_s3_class(dt_xlsx, "data.table") expect_equal(nrow(dt_xlsx), 9) expect_equal(unique(dt_xlsx[[bcode_name]]), "P03") - + plate3 <- dt_xlsx[dt_xlsx[[bcode_name]] == "P03", ] expect_equal(plate3[WellRow == "A" & WellColumn == 1, ReadoutValue], 12) expect_equal(plate3[WellRow == "C" & WellColumn == 3, ReadoutValue], 92) - + expect_error( load_results_EnVision_new(file_envision_error, headers), "Could not find data matrix header" diff --git a/tests/testthat/test-prism_to_gdrDF.R b/tests/testthat/test-prism_to_gdrDF.R index 9c3d097a..0c97d8ea 100644 --- a/tests/testthat/test-prism_to_gdrDF.R +++ b/tests/testthat/test-prism_to_gdrDF.R @@ -16,17 +16,17 @@ test_that("prism level5 single-agent data can be processed into gDR input format expect_equal(df_prism$result$Duration, c(120, 240, 120, 240)) expect_true(all(gDRutils::get_env_identifiers(c("drug", "cellline"), simplify = FALSE) %in% names(df_prism$result))) - + # testing format of clid, CellLineName and Tissue column expect_equal(df_prism$result$clid, rep("some_clid", 4)) expect_equal( df_prism$result$Tissue, rep("Breast", 4)) - + df_prism_unknown <- purrr::quietly(convert_LEVEL5_prism_to_gDR_input)(prism_data_2, prism_meta) expect_is(df_prism_unknown$result, "data.table") expect_equal(dim(df_prism_unknown$result), c(4, 12)) - + # testing format of clid, CellLineName and Tissue column expect_equal(df_prism_unknown$result$clid, df_prism_unknown$result$CellLineName) expect_equal(df_prism_unknown$result$Tissue, rep("unknown", 4)) @@ -41,7 +41,7 @@ test_that("prism level5 combo data can be processed into gDR input format ", { "Gnumber", "Gnumber_2", "Concentration", "Concentration_2", "masked")) expect_true(all(gDRutils::get_env_identifiers(c("drug", "drug2", "cellline"), simplify = FALSE) %in% names(df_prism$result))) - + # testing format of clid, CellLineName and Tissue column expect_equal(df_prism$result$clid, rep("some_clid", 2)) expect_equal( @@ -56,8 +56,8 @@ test_that("prism level6 data can be processed into gDR format ", { prism_meta) expect_is(df_prism$result, "data.table") expect_equal(dim(df_prism$result), c(3, 13)) - expect_equal(names(df_prism$result), c("clid", "Gnumber", "DrugName", "drug_moa", "Duration", "Concentration", - "ReadoutValue", "masked", "CellLineName", "Tissue", "parental_identifier", + expect_equal(names(df_prism$result), c("clid", "Gnumber", "DrugName", "drug_moa", "Duration", "Concentration", + "ReadoutValue", "masked", "CellLineName", "Tissue", "parental_identifier", "subtype", "ReferenceDivisionTime")) expect_true(all(gDRutils::get_env_identifiers(c("drug", "cellline"), simplify = FALSE) %in% names(df_prism$result))) diff --git a/tests/testthat/test-pset_to_gdrDF.R b/tests/testthat/test-pset_to_gdrDF.R index 608db8fd..7c8bd8ac 100644 --- a/tests/testthat/test-pset_to_gdrDF.R +++ b/tests/testthat/test-pset_to_gdrDF.R @@ -41,7 +41,7 @@ test_that(".extractDoseResponse works as expected", { ) dt <- .extractDoseResponse(pset) expect_s3_class(dt, "data.table") - expect_equal(names(dt), c("rn", "ReadoutValue", "Concentration", "clid", "DrugName", + expect_equal(names(dt), c("rn", "ReadoutValue", "Concentration", "clid", "DrugName", "Duration")) expect_equal(dim(dt), c(34684, 6)) }) @@ -56,13 +56,13 @@ test_that(".extractDoseResponse, .removeNegatives, and .createPseudoData work as ) dt <- .extractDoseResponse(pset) expect_s3_class(dt, "data.table") - expect_equal(names(dt), c("rn", "ReadoutValue", "Concentration", "clid", "DrugName", + expect_equal(names(dt), c("rn", "ReadoutValue", "Concentration", "clid", "DrugName", "Duration")) expect_equal(dim(dt), c(34684, 6)) - + dt_positive <- .removeNegatives(dt) expect_equal(dim(dt_positive), c(34516, 6)) - + dt_with_pseudodata <- .createPseudoData(dt_positive) expect_equal(dim(dt_with_pseudodata), c(34516, 7)) }) @@ -79,6 +79,6 @@ test_that("convert_pset_to_df works as expected", { dt <- convert_pset_to_df(pset) checkmate::assert_data_table(dt) expect_equal(dim(dt), c(34516, 7)) - expect_equal(names(dt), c("Barcode", "ReadoutValue", "Concentration", "Clid", "DrugName", + expect_equal(names(dt), c("Barcode", "ReadoutValue", "Concentration", "Clid", "DrugName", "Duration", "ReferenceDivisionTime")) }) diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index 257ad8d1..d4bedabe 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -19,11 +19,11 @@ test_that("detect_file_format works as expected", { envision_path <- list.files(system.file(package = "gDRimport", "extdata", "data1"), "^RawData", full.names = TRUE) expect_equal(unique(unlist(lapply(envision_path, detect_file_format))), "EnVision") - + tecan_path <- list.files(system.file(package = "gDRimport", "extdata", "data2"), "^RawData", full.names = TRUE) expect_equal(unique(unlist(lapply(tecan_path, detect_file_format))), "Tecan") - + tsv_path <- list.files(system.file(package = "gDRimport", "extdata", "data1"), "ref_RawData", full.names = TRUE) expect_equal(unique(unlist(lapply(tsv_path, detect_file_format))), "long_tsv") diff --git a/vignettes/ConvertingMAEtoPharmacoSet.Rmd b/vignettes/ConvertingMAEtoPharmacoSet.Rmd index 9307170a..10a86ab3 100644 --- a/vignettes/ConvertingMAEtoPharmacoSet.Rmd +++ b/vignettes/ConvertingMAEtoPharmacoSet.Rmd @@ -1,5 +1,5 @@ --- -title: "Converting a gDR-generated MultiAssayExperiment object into a PharmacoSet" +title: "Converting a gDR-generated MultiAssayExperiment object into a PharmacoSet" author: - name: Jermiah Joseph email: jermiah.joseph@uhn.ca @@ -30,20 +30,20 @@ library(MultiAssayExperiment) ## Introduction -The `PharmacoGx` package is a popular tool from the Bioconductor project within the field of bioinformatics and computational biology. -Whereas the `gDR` package is a valuable tool that provides the framework for the import, processing, analysism and visualization of -high-dimensional drug response data, the `PharmacoGx` package provides functionality to containerize processed drug response and genomic data -to perform pharmacogenomic analyses. The data structure used by `PharmacoGx` is described as a `PharamcoSet` class, inherited from `CoreGx::CoreSet`. -Inspired by the `SummarizedExperiment` class, a new data structure class called the `TreatmentResponseExperiment` was developed to store drug response -data within the `PharmacoSet`. +The `PharmacoGx` package is a popular tool from the Bioconductor project within the field of bioinformatics and computational biology. +Whereas the `gDR` package is a valuable tool that provides the framework for the import, processing, analysism and visualization of +high-dimensional drug response data, the `PharmacoGx` package provides functionality to containerize processed drug response and genomic data +to perform pharmacogenomic analyses. The data structure used by `PharmacoGx` is described as a `PharamcoSet` class, inherited from `CoreGx::CoreSet`. +Inspired by the `SummarizedExperiment` class, a new data structure class called the `TreatmentResponseExperiment` was developed to store drug response +data within the `PharmacoSet`. -In collaboration with the BHKLab, we have developed functionality to convert between the `gDR`-generated `MultiAssayExperiment (MAE)` object and the `PharmacoSet` object +In collaboration with the BHKLab, we have developed functionality to convert between the `gDR`-generated `MultiAssayExperiment (MAE)` object and the `PharmacoSet` object (see the ConvertingPharmacoSetToGDR vignette to convert from a PharmacoSet to gDR object). # Loading the gDR-generated MAE object -This workflow assumes that you have already generated a `MAE` object using the `gDR` package. We will load in a `MAE` object generated using -the data from the [KW. Song et al, 2022](https://doi.org/10.1158/2159-8290.CD-21-0072) study. This `MAE` contains two drugs (`treatments`) and 7 cell lines (`samples`). +This workflow assumes that you have already generated a `MAE` object using the `gDR` package. We will load in a `MAE` object generated using +the data from the [KW. Song et al, 2022](https://doi.org/10.1158/2159-8290.CD-21-0072) study. This `MAE` contains two drugs (`treatments`) and 7 cell lines (`samples`). ``` {r load-mae-data} (mae <- qs2::qs_read( system.file("extdata", "kyung2022mae", "MAE_E2.qs2", package = "gDRimport") @@ -52,7 +52,7 @@ the data from the [KW. Song et al, 2022](https://doi.org/10.1158/2159-8290.CD-21 ``` # Converting the MAE object into a PharmacoSet object -First we will view the experiments and assays within the MultiAssayExperiment object. +First we will view the experiments and assays within the MultiAssayExperiment object. ``` {r inspect-mae-assays} gDRutils::MAEpply(mae, assays) ``` @@ -63,7 +63,7 @@ we will use the name of the study. pset <- convert_MAE_to_PSet(mae, pset_name="Kyung2022") ``` -We can now view the `PharmacoSet` object. +We can now view the `PharmacoSet` object. ``` {r display-pset-object} pset ``` @@ -80,7 +80,7 @@ head(sampleInfo(pset)) # TreatmentResponseExperiment Object The `TreatmentResponseExperiment` is a new class that was developed to store drug response data within the `PharmacoSet` object. -This vignette will cover some basics about this class, however for more information, please refer to the `PharmacoSet` vignette +This vignette will cover some basics about this class, however for more information, please refer to the `PharmacoSet` vignette [TreatmentResponseExperiment](https://bioconductor.org/packages/release/bioc/vignettes/CoreGx/inst/doc/TreatmentResponseExperiment.html)****. ## Row and Column Names @@ -124,7 +124,7 @@ tre[ ## assays The assays can be viewed through the `assays` function. Each assay has a column "data_type" which references which `SummarizedExperiment` from -the `MAE` the data corresponds with. +the `MAE` the data corresponds with. ``` {r view-tre-assays} lapply(assays(tre), head) @@ -141,7 +141,7 @@ head(assay(tre, "Metrics"),3) ``` # References -1. Song, K. W., Edgar, K. A., Hanan, E. J., Hafner, M., Oeh, J., Merchant, M., ... & Friedman, L. S. (2022). +1. Song, K. W., Edgar, K. A., Hanan, E. J., Hafner, M., Oeh, J., Merchant, M., ... & Friedman, L. S. (2022). RTK-dependent inducible degradation of mutant PI3K drives GDC-0077 (Inavolisib) efficacy. Cancer discovery, 12(1), 204-219. diff --git a/vignettes/ConvertingPharmacoSetToGDR.Rmd b/vignettes/ConvertingPharmacoSetToGDR.Rmd index 3ae2bbea..fa768eb5 100644 --- a/vignettes/ConvertingPharmacoSetToGDR.Rmd +++ b/vignettes/ConvertingPharmacoSetToGDR.Rmd @@ -1,5 +1,5 @@ --- -title: "Converting PharmacoSet Drug Response Data into gDR object" +title: "Converting PharmacoSet Drug Response Data into gDR object" author: - name: Jermiah Joseph email: jermiah.joseph@uhn.ca @@ -27,17 +27,17 @@ library(gDRimport) # Overview -The `gDRimport` package is a part of the gDR suite. It helps to prepare raw drug response data for downstream processing. -It mainly contains helper functions for importing/loading/validating dose response data provided from different scanner sources. -In collaboration with the BHKLab, `gDRimport` also provides functions that can convert a `PharmacoGx::PharamcoSet` object into a gDR object. -With this functionality, users familiar with the gDR suite of packages and methods can utilize the publically available, curated datasets from the PharmacoGx database. +The `gDRimport` package is a part of the gDR suite. It helps to prepare raw drug response data for downstream processing. +It mainly contains helper functions for importing/loading/validating dose response data provided from different scanner sources. +In collaboration with the BHKLab, `gDRimport` also provides functions that can convert a `PharmacoGx::PharamcoSet` object into a gDR object. +With this functionality, users familiar with the gDR suite of packages and methods can utilize the publically available, curated datasets from the PharmacoGx database. The main step in this process is to extract the drug dose-response data from the PharmacoSets and transform them into a `data.table` that can be used as input for the `gDRcore::runDrugResponseProcessingPipeline`. # Loading a PharmacoSet (PSet) -Whereas a user might already have a pharmacoset loaded in their R session, if they wish to obtain a different pharmacoset or use the same script in the future we provide a helper function to do so. +Whereas a user might already have a pharmacoset loaded in their R session, if they wish to obtain a different pharmacoset or use the same script in the future we provide a helper function to do so. It helps to have a user directory in which to store all pharmacosets, and by passing this directory into the function as a parameter, the function will also check to see if the PSet exists in the user-defined directory. -This is to ensure that the PSet is not being re-downloaded if it already has. +This is to ensure that the PSet is not being re-downloaded if it already has. ```{r get-pset-example, eval = FALSE} pset <- getPSet("Tavor_2020") pset @@ -60,19 +60,19 @@ pset ``` # Converting PharmacoSet to data.table for gDR pipeline -`PharamcoSets` hold data pertaining to the cell lines (@sample slot), drugs (@treatment slot), and dose response experiments (@treatmentResponse slot). -The dose response data is stored in a `treatmentResponseExperiment` object and the function `gDRimport::convert_pset_to_df` extracts this information to -build a `data.table` that can be used as input to the gDR pipeline. +`PharamcoSets` hold data pertaining to the cell lines (@sample slot), drugs (@treatment slot), and dose response experiments (@treatmentResponse slot). +The dose response data is stored in a `treatmentResponseExperiment` object and the function `gDRimport::convert_pset_to_df` extracts this information to +build a `data.table` that can be used as input to the gDR pipeline. ```{r convert-pset-to-dt} -# Store treatment response data in df_ +# Store treatment response data in df_ dt <- convert_pset_to_df(pharmacoset = pset) str(dt) ``` # Subsetting to extract relevant information Most canonical `PharmacoSets` have data pertaining to many cell lines and their response to many drugs (drug-combination data is available in some but its conversion to gDR is not currently supported). -As such, in the interest of time and resources, it may be useful to subset the data before providing it as input for the gDR pipeline. +As such, in the interest of time and resources, it may be useful to subset the data before providing it as input for the gDR pipeline. ```{r subset-by-cell-line} # example subset using only 1 cell line @@ -88,10 +88,10 @@ The subsetted data can now be used as input for the `gDRcore::runDrugResponsePro # RUN DRUG RESPONSE PROCESSING PIPELINE se <- gDRcore::runDrugResponseProcessingPipeline(x) se -``` +``` ```{r convert-se-to-dt, eval = FALSE} -# Convert Summarized Experiments to data.table +# Convert Summarized Experiments to data.table # Available SEs : "RawTreatred", "Controls", "Normalized", "Averaged", "Metrics" str(gDRutils::convert_se_assay_to_dt(se[[1]], "Averaged")) diff --git a/vignettes/gDRimport.Rmd b/vignettes/gDRimport.Rmd index 4409a780..ccc31e4c 100644 --- a/vignettes/gDRimport.Rmd +++ b/vignettes/gDRimport.Rmd @@ -15,12 +15,12 @@ log_level <- futile.logger::flog.threshold("ERROR") # Overview -The `gDRimport` package is a part of the gDR suite. It helps to prepare raw drug response data for downstream processing. It mainly contains helper functions for importing/loading/validating dose response data provided in different file formats. +The `gDRimport` package is a part of the gDR suite. It helps to prepare raw drug response data for downstream processing. It mainly contains helper functions for importing/loading/validating dose response data provided in different file formats. # Use Cases -## Test Data -There are currently four test datasets that can be used to see what's the expected input data for the gDRimport. +## Test Data +There are currently four test datasets that can be used to see what's the expected input data for the gDRimport. ```{r load-test-datasets} # primary test data @@ -99,7 +99,7 @@ Replace "prism_data_path", "cell_line_data_path", and "treatment_data_path" with ## Package installation -The function `installAllDeps` assists in installing package dependencies. +The function `installAllDeps` assists in installing package dependencies. # SessionInfo {-}