diff --git a/DESCRIPTION b/DESCRIPTION index d97a973..66156dc 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: gDRtestData Title: gDRtestData - R data package with testing dose response data -Version: 1.11.1 +Version: 1.11.2 Date: 2026-04-29 Description: R package with internal dose-response test data. Package provides functions to generate input testing data that can be used as the input for gDR pipeline. It also contains qs2 files diff --git a/NEWS.md b/NEWS.md index 824916c..ab95c01 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,6 @@ +## gDRtestData 1.11.2 - 2026-05-18 +* apply updated gDRstyle rules + ## gDRtestData 1.11.1 - 2026-04-29 * synchronize Bioconductor and GitHub versioning @@ -135,4 +138,4 @@ ## gDRtestData 0.99.0 - 2023-03-31 * preparing package for Bioc submission * fix examples - + \ No newline at end of file diff --git a/R/generate_data.R b/R/generate_data.R index c24f3dd..d6f9461 100644 --- a/R/generate_data.R +++ b/R/generate_data.R @@ -7,17 +7,17 @@ #' #' #' @examples -#' +#' #' cell_lines <- create_synthetic_cell_lines() #' add_data_replicates(cell_lines) -#' +#' #' @return data.table with replicates #' @keywords generate_data #' @export #' add_data_replicates <- function(df_layout) { df_layout_duplicates <- Reduce(rbind, list(df_layout)[rep(1, times = 3)]) - barcode <- do.call(c, lapply(paste0("plate_", seq_len(3)), function(x) rep(x, nrow(df_layout)))) + barcode <- do.call(c, lapply(paste0("plate_", seq_len(3)), function(x) rep(x, NROW(df_layout)))) cbind(Barcode = barcode, df_layout_duplicates) } @@ -30,10 +30,10 @@ add_data_replicates <- function(df_layout) { #' @keywords generate_data #' #' @examples -#' +#' #' cell_lines <- create_synthetic_cell_lines() #' add_concentration(cell_lines) -#' +#' #' @return data.table with concentrations #' @export #' @@ -55,41 +55,41 @@ add_concentration <- function(df_layout, concentrations = 10 ^ (seq(-3, 1, 0.5)) #' #' #' @examples -#' +#' #' cell_lines <- create_synthetic_cell_lines() #' drugs <- create_synthetic_drugs() #' df_layout <- prepareData(cell_lines[seq_len(2), ], drugs[seq_len(4), ]) #' generate_response_data(df_layout) -#' +#' #' #' @return data.table with response data #' @export #' generate_response_data <- function(df_layout, noise_level = 0.1) { - + drugs <- create_synthetic_drugs() cell_lines <- create_synthetic_cell_lines() hill_coef <- generate_hill_coef(drugs, cell_lines) ec50 <- generate_ec50(drugs, cell_lines) e_inf <- generate_e_inf(drugs, cell_lines) - + df_layout$ReadoutValue <- round(100 * pmax( getReadoutCoef(df_layout, e_inf, ec50, hill_coef) + - (noise_level * stats::runif(nrow(df_layout)) - (noise_level / 2)), # add some noise - 0.01 * stats::runif(nrow(df_layout)) + 0.005), # avoid hard 0 values + (noise_level * stats::runif(NROW(df_layout)) - (noise_level / 2)), # add some noise + 0.01 * stats::runif(NROW(df_layout)) + 0.005), # avoid hard 0 values 1) df_layout$BackgroundValue <- 0 df_layout$Duration <- 72 df_layout <- introduceVehicle(df_layout) - + if ("Gnumber_2" %in% colnames(df_layout)) { # combo data df_layout <- introduceGNum(df_layout, e_inf, ec50, hill_coef, "_2") } - + if ("Gnumber_3" %in% colnames(df_layout)) { # combo data df_layout <- introduceGNum(df_layout, e_inf, ec50, hill_coef, "_3") } - + df_layout } @@ -98,24 +98,24 @@ getReadoutCoef <- function(df, e_inf, ec50, hill_coef, suffix = "") { apply(df, 1, function(x) { clid <- x["clid"] gnum <- x[paste0("Gnumber", suffix)] - + e_inf_val <- e_inf[gnum, clid] ec50_val <- ec50[gnum, clid] hill_val <- hill_coef[gnum, clid] concentration <- as.numeric(x["Concentration"]) - - e_inf_val + (1 - e_inf_val) * (ec50_val ^ hill_val / (concentration ^ hill_val + ec50_val ^ hill_val)) + + e_inf_val + (1 - e_inf_val) * (ec50_val ^ hill_val / (concentration ^ hill_val + ec50_val ^ hill_val)) }) } #' @keywords internal introduceVehicle <- function(df, suffix = "") { zeroIdx <- df[[paste0("Concentration", suffix)]] == 0 - + df[zeroIdx, paste0("Gnumber", suffix)] <- "vehicle" df[zeroIdx, paste0("DrugName", suffix)] <- "vehicle" df[zeroIdx, paste0("drug_moa", suffix)] <- "vehicle" - + df } @@ -123,7 +123,7 @@ introduceVehicle <- function(df, suffix = "") { introduceGNum <- function(df, e_inf, ec50, hill_coef, suffix) { df$ReadoutValue <- df$ReadoutValue * getReadoutCoef(df, e_inf, ec50, hill_coef, suffix) df <- introduceVehicle(df, suffix) - + df } @@ -134,7 +134,7 @@ introduceGNum <- function(df, e_inf, ec50, hill_coef, suffix) { #' @keywords generate_data #' #' @examples -#' +#' #' cell_lines <- create_synthetic_cell_lines() #' drugs <- create_synthetic_drugs() #' df_merged <- prepareData(cell_lines[seq_len(2), ], drugs[seq_len(4), ]) @@ -152,14 +152,14 @@ add_day0_data <- function(df_merged, noise_level = 0.05) { TRUE } df_Day0 <- unique(df_merged[df_merged$Concentration == 0 & cond, ]) - + df_Day0$ReadoutValue <- df_Day0$ReadoutValue / 2 ^ (df_Day0$Duration / df_Day0$ReferenceDivisionTime) - coef <- (1 - noise_level / 2 + noise_level * stats::runif(nrow(df_Day0))) + coef <- (1 - noise_level / 2 + noise_level * stats::runif(NROW(df_Day0))) df_Day0$ReadoutValue <- round(df_Day0$ReadoutValue * coef, 1) - + df_Day0$Duration <- 0 df_Day0$Barcode <- "plate_0" - + df_merged <- rbind(df_merged, df_Day0) df_merged } diff --git a/R/get_test_datasets.R b/R/get_test_datasets.R index 8fa40df..ede0283 100644 --- a/R/get_test_datasets.R +++ b/R/get_test_datasets.R @@ -1,5 +1,5 @@ #' get_test_dataset_paths -#' +#' #' Returns named vector of absolute paths to test datasets. #' #' @param datasets_dir path to directory with datasets (default \code{NULL}). @@ -9,13 +9,13 @@ #' @keywords generate_test_data #' #' @return named vector of absolute paths -#' +#' #' @examples -#' +#' #' get_test_dataset_paths() #' path <- system.file("testdata", package = "gDRtestData", mustWork = TRUE) #' get_test_dataset_paths(path) -#' +#' #' @export #' #' @author Kamil FoltyƄski \email{kamil.foltynski@@contractors.roche.com} @@ -30,12 +30,10 @@ get_test_dataset_paths <- } checkmate::assert_string(datasets_dir, min.chars = 1) checkmate::assert_directory_exists(datasets_dir) - + checkmate::assert_string(pattern, min.chars = 1) - + epaths <- list.files(datasets_dir, pattern = paste0(pattern, ".*\\.qs2$"), full.names = TRUE) enames <- gsub(pattern, "", gsub("\\.qs2$", "", basename(epaths))) structure(epaths, names = enames) } - - diff --git a/R/helper_functions.R b/R/helper_functions.R index d380f81..11fd8f9 100644 --- a/R/helper_functions.R +++ b/R/helper_functions.R @@ -1,7 +1,7 @@ # Helper functions #' prepareData -#' +#' #' Create data.table with input data for testing purposes #' #' @param cell_lines data.table with cell line info @@ -10,11 +10,11 @@ #' @keywords generate_test_data #' #' @return data.table with input data for testing -#' +#' #' @examples -#' +#' #' prepareData(create_synthetic_cell_lines(), create_synthetic_drugs()) -#' +#' #' @export prepareData <- function(cell_lines, drugs, conc = 10 ^ (seq(-3, 1, 0.5))) { df_layout <- data.table::as.data.table(merge.data.frame(cell_lines, drugs, by = NULL)) @@ -23,8 +23,8 @@ prepareData <- function(cell_lines, drugs, conc = 10 ^ (seq(-3, 1, 0.5))) { } #' prepareMergedData -#' -#' Create data.table with input data containing noise for testing purposes +#' +#' Create data.table with input data containing noise for testing purposes #' #' @param cell_lines data.table with cell line info #' @param drugs data.table with drug info @@ -32,11 +32,11 @@ prepareData <- function(cell_lines, drugs, conc = 10 ^ (seq(-3, 1, 0.5))) { #' @keywords generate_test_data #' #' @return data.table with input data for testing -#' +#' #' @examples -#' +#' #' prepareMergedData(create_synthetic_cell_lines(), create_synthetic_drugs()) -#' +#' #' @export prepareMergedData <- function(cell_lines, drugs, noise = 0.1) { df <- prepareData(cell_lines, drugs) @@ -44,8 +44,8 @@ prepareMergedData <- function(cell_lines, drugs, noise = 0.1) { } #' prepareComboMergedData -#' -#' Create data.table with input combination data containing noise for testing purposes +#' +#' Create data.table with input combination data containing noise for testing purposes #' #' @param cell_lines data.table with cell line info #' @param drugs data.table with drug info @@ -58,35 +58,35 @@ prepareMergedData <- function(cell_lines, drugs, noise = 0.1) { #' @keywords generate_test_data #' #' @return data.table with input data for testing -#' +#' #' @examples -#' +#' #' prepareComboMergedData(create_synthetic_cell_lines(), create_synthetic_drugs()) -#' +#' #' @export -prepareComboMergedData <- function(cell_lines, - drugs, +prepareComboMergedData <- function(cell_lines, + drugs, drugsIdx1 = 2:4, - drugsIdx2 = c(26, 26, 26), - concentration = c(0, .2, 1), - noise = 0.1, + drugsIdx2 = c(26, 26, 26), + concentration = c(0, .2, 1), + noise = 0.1, modifyDf2 = FALSE) { df_layout <- prepareData(cell_lines, drugs[drugsIdx1, ]) - + df_2 <- cbind(drugs[drugsIdx2, ], Concentration = concentration) colnames(df_2) <- paste0(colnames(df_2), "_2") - + df_layout_2 <- data.table::as.data.table(merge.data.frame(df_layout, df_2, by = NULL)) if (modifyDf2) { df_layout_2 <- df_layout_2[!(df_layout_2$Concentration == 0 & df_layout_2$Concentration_2 > 0), ] } - + generate_response_data(df_layout_2, noise) } #' prepareCodilutionData -#' -#' Create data.table with input co-dilution data containing noise for testing purposes +#' +#' Create data.table with input co-dilution data containing noise for testing purposes #' #' @param cell_lines data.table with cell line info #' @param drugs data.table with drug info @@ -94,32 +94,32 @@ prepareComboMergedData <- function(cell_lines, #' @param conc vector of doses #' @param noise number indicating level of noise #' @keywords generate_test_data -#' +#' #' @return data.table with input data for testing -#' +#' #' @examples -#' +#' #' prepareCodilutionData(create_synthetic_cell_lines()[seq_len(2), ], #' create_synthetic_drugs()[seq_len(4), ]) -#' +#' #' @export prepareCodilutionData <- function(cell_lines, drugs, drugsIdx2 = 1, conc = 10 ^ (seq(-3, 1, 0.5)), noise = 0.1) { - + df_layout <- prepareData(cell_lines = cell_lines, drugs = drugs, conc = conc) - + df_2 <- cbind(drugs[drugsIdx2, , drop = FALSE], df_layout[, "Concentration", drop = FALSE]) colnames(df_2) <- paste0(colnames(df_2), "_2") - + df_layout_2 <- cbind(df_layout, df_2) df_layout_2 <- df_layout_2[df_layout_2$DrugName != df_layout_2$DrugName_2, ] rows <- which(df_layout_2$Concentration_2 > 0) cols <- c("Concentration", "Concentration_2") df_layout_2[rows, (cols) := lapply(.SD, function(x) x / 2), .SDcols = cols] - + generate_response_data(df_layout_2, noise) } diff --git a/R/package.R b/R/package.R index d5699c7..a3fbb19 100644 --- a/R/package.R +++ b/R/package.R @@ -6,7 +6,7 @@ #' @examples #' path <- system.file("annotation_data", "cell_lines.csv", package = "gDRtestData") #' data.table::fread(file = path) -#' +#' #' @return data.table NULL @@ -15,9 +15,9 @@ NULL #' @name drugs #' @docType data #' @keywords data internal -#' @examples +#' @examples #' path <- system.file("annotation_data", "drugs.csv", package = "gDRtestData") #' data.table::fread(file = path) -#' +#' #' @return data.table NULL diff --git a/R/random_data.R b/R/random_data.R index 13bbeb1..3bf90c1 100644 --- a/R/random_data.R +++ b/R/random_data.R @@ -5,7 +5,7 @@ #' #' @examples #' create_synthetic_cell_lines() -#' +#' #' @export create_synthetic_cell_lines <- function() { cell_lines <- data.table::data.table( @@ -15,7 +15,7 @@ create_synthetic_cell_lines <- function() { ReferenceDivisionTime = seq(22, 80, 4) ) cell_lines <- Reduce(rbind, list(cell_lines)[rep(1, times = 6)]) - cell_lines$clid <- paste0("CL000", 9 + (seq_len(nrow(cell_lines)))) + cell_lines$clid <- paste0("CL000", 9 + (seq_len(NROW(cell_lines)))) cell_lines$CellLineName <- paste0(cell_lines$CellLineName, sort(array(LETTERS[seq_len(15)], 90))) cell_lines$Tissue[16:40] <- "tissue_w" cell_lines$Tissue[41:50] <- "tissue_v" @@ -28,7 +28,7 @@ create_synthetic_cell_lines <- function() { #' @return data.table with synthetic drugs #' @examples #' create_synthetic_drugs() -#' +#' #' @export create_synthetic_drugs <- function() { drugs <- data.table::data.table( @@ -37,8 +37,8 @@ create_synthetic_drugs <- function() { drug_moa = sort(paste0("moa_", array(LETTERS[c(1, seq_len(6), 6)], 40))) ) drugs <- Reduce(rbind, list(drugs)[rep(1, times = 6)]) - drugs$Gnumber <- sprintf("G00%03i", seq_len(nrow(drugs))) - drugs$DrugName <- sprintf("drug_%03i", seq_len(nrow(drugs))) + drugs$Gnumber <- sprintf("G00%03i", seq_len(NROW(drugs))) + drugs$DrugName <- sprintf("drug_%03i", seq_len(NROW(drugs))) drugs$drug_moa[-seq_len(80)] <- sort(paste0("moa_", array(LETTERS[seq_len(24)], 160))) drugs } @@ -52,11 +52,11 @@ create_synthetic_drugs <- function() { #' #' @return matrix with random hill coefficient #' @examples -#' generate_hill_coef(create_synthetic_drugs(), create_synthetic_cell_lines()) -#' +#' generate_hill_coef(create_synthetic_drugs(), create_synthetic_cell_lines()) +#' #' @export generate_hill_coef <- function(drugs, cell_lines) { - hill_coef <- matrix(1.8 + stats::runif(nrow(drugs) * nrow(cell_lines)), nrow(drugs), nrow(cell_lines)) + hill_coef <- matrix(1.8 + stats::runif(NROW(drugs) * NROW(cell_lines)), NROW(drugs), NROW(cell_lines)) colnames(hill_coef) <- cell_lines$clid rownames(hill_coef) <- drugs$Gnumber hill_coef @@ -71,14 +71,14 @@ generate_hill_coef <- function(drugs, cell_lines) { #' @return matrix with random EC50 #' @examples #' generate_ec50(create_synthetic_drugs(), create_synthetic_cell_lines()) -#' +#' #' @export generate_ec50 <- function(drugs, cell_lines) { checkmate::assert_data_table(drugs) checkmate::assert_data_table(cell_lines) - - nDrugs <- nrow(drugs) - nCells <- nrow(cell_lines) + + nDrugs <- NROW(drugs) + nCells <- NROW(cell_lines) ec50 <- matrix(stats::runif(nDrugs * nCells) - 0.5, nDrugs, nCells) + matrix(sort(rep(seq(-1.2, 0, 0.3), 8)), nDrugs, nCells) + t(matrix(seq(-0.4, 0, 0.1), nCells, nDrugs)) @@ -110,15 +110,15 @@ generate_ec50 <- function(drugs, cell_lines) { #' @return matrix with random E inf #' @examples #' generate_e_inf(create_synthetic_drugs(), create_synthetic_cell_lines()) -#' +#' #' @export generate_e_inf <- function(drugs, cell_lines) { checkmate::assert_data_table(drugs) checkmate::assert_data_table(cell_lines) - - nDrugs <- nrow(drugs) - nCells <- nrow(cell_lines) - + + nDrugs <- NROW(drugs) + nCells <- NROW(cell_lines) + e_inf <- matrix(0.5 * stats::runif(nDrugs * nCells), nDrugs, nCells) + t(matrix(seq(0, 0.2, 0.05), nCells, nDrugs)) @@ -135,9 +135,9 @@ generate_e_inf <- function(drugs, cell_lines) { e_inf[moa_BE, tissue_x] <- e_inf[moa_BE, tissue_x] - 0.5 * stats::runif(sum(tissue_x)) e_inf[moa_F, ] <- e_inf[moa_F, ] + 0.3 + 0.2 * stats::runif(sum(moa_F)) e_inf <- matrix(pmin(0.89, pmax(0.01, e_inf)) + stats::runif(nDrugs * nCells) * 0.1, nDrugs, nCells) - + colnames(e_inf) <- cell_lines$clid rownames(e_inf) <- drugs$Gnumber - + e_inf } diff --git a/inst/scripts/generate_example_data.R b/inst/scripts/generate_example_data.R index 9946eae..25611ee 100644 --- a/inst/scripts/generate_example_data.R +++ b/inst/scripts/generate_example_data.R @@ -3,18 +3,18 @@ # The data have no day0 information. # dataset with * are to be imported as example for visualization # -# "small_no_noise" 10 drugs (3 different drug_moa) by 10 lines (3 tissues); +# "small_no_noise" 10 drugs (3 different drug_moa) by 10 lines (3 tissues); # single agent - no noise in the data # "small" * 10 drugs (3 different drug_moa) by 10 lines (3 tissues); single agent # "wLigand" * 3 drugs by 4 lines (3 tissues); "Ligand = 0.1" as reference; single agent # "medium" * 40 drugs (6 different drug_moa) by 15 lines (3 tissues); single agent # "many_lines" * 150 drugs (6 different drug_moa) by 10 lines (3 tissues); single agent # "many_drugs" * 150 drugs (6 different drug_moa) by 10 lines (3 tissues); single agent -# "combo_2dose_nonoise" * 3 drugs x 2 co-treatment (1 drug at 2 doses) by 3 cell lines; +# "combo_2dose_nonoise" * 3 drugs x 2 co-treatment (1 drug at 2 doses) by 3 cell lines; # co-treatment drug occurs also as a primary drug -# "combo_2dose_nonoise2" 3 drugs x 2 co-treatment (1 drug at 2 doses) by 3 cell lines; +# "combo_2dose_nonoise2" 3 drugs x 2 co-treatment (1 drug at 2 doses) by 3 cell lines; # co-treatment drug occurs also as a primary drug -# "combo_2dose_nonoise3" 3 drugs x 2 co-treatment (1 drug at 2 doses) by 3 cell lines; +# "combo_2dose_nonoise3" 3 drugs x 2 co-treatment (1 drug at 2 doses) by 3 cell lines; # co-treatment drug does NOT have single agent response # "combo_1dose_many_drugs" * 149 drugs x 1 drug (1 dose) by 3 lines; # "combo_matrix_small" 3 x 2 drugs (matrix) for 2 cell lines; no noise @@ -97,4 +97,3 @@ generateCodilutionSmall(cell_lines, drugs) # generate the data for the test set with combo (co-dilution) set.seed(2) generateCodilution(cell_lines, drugs) - diff --git a/vignettes/gDRtestData.Rmd b/vignettes/gDRtestData.Rmd index 42b8d5a..97f5d82 100644 --- a/vignettes/gDRtestData.Rmd +++ b/vignettes/gDRtestData.Rmd @@ -73,12 +73,12 @@ In a further step, the user may generate a set of synthetic results: 1. Hill coefficient ```{r} -hill <- generate_hill_coef(cell_lines, drugs) +hill <- generate_hill_coef(cell_lines, drugs) ``` 2. EC50 metric ```{r} -ec50_met <- generate_ec50(cell_lines, drugs) +ec50_met <- generate_ec50(cell_lines, drugs) ``` 3. E inf metric @@ -107,16 +107,16 @@ head(response_data) ## Synthetic object of gDR data model -The gDR data model is built on the MultiAssayExperiments (MAE) structure. A detailed description of the gDR data model can be found in `gDRcore` package vignette. +The gDR data model is built on the MultiAssayExperiments (MAE) structure. A detailed description of the gDR data model can be found in `gDRcore` package vignette. -In `inst/testdata` the user may find a set of `qs2` files that are examples of gDR data model for different data types. In the file `synthetic_list.yml` one can find a list of these datasets. Currently available are: +In `inst/testdata` the user may find a set of `qs2` files that are examples of gDR data model for different data types. In the file `synthetic_list.yml` one can find a list of these datasets. Currently available are: ```{r echo=FALSE} yml_path <- system.file(package = "gDRtestData", "testdata", "synthetic_list.yml") cat(paste0("* ", names(yaml::read_yaml(yml_path)), collapse = ", \n"), ".") ``` The script `generate_example_data.R` which shows how to generate and process above-mentioned datasets is in `inst/scripts` dir. -All key functions can be found in package `gDRcore` in script `generate_wrappers.R`. +All key functions can be found in package `gDRcore` in script `generate_wrappers.R`. ## Annotation data