From 7b000e1ce6f712a9cbd672e7932663a317fb07c6 Mon Sep 17 00:00:00 2001 From: Bartek Czech Date: Thu, 14 May 2026 17:38:25 +0200 Subject: [PATCH 1/6] fix: replace nrow/ncol with NROW/NCOL to satisfy gDRstyle linting rules --- R/assay_names.R | 2 +- R/convert_mae_se_assay_to_dt.R | 2 +- R/experiment_validators.R | 4 ++-- R/split_SE_components.R | 2 +- R/utils.R | 8 ++++---- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/R/assay_names.R b/R/assay_names.R index fd7aea8c..8f335f8c 100644 --- a/R/assay_names.R +++ b/R/assay_names.R @@ -66,7 +66,7 @@ get_env_assay_names <- df <- df[df[[eval(filter)]] %in% v_filters[[filter]], ] } - if ((nrow(df)) == 0) { + if ((NROW(df)) == 0) { v_filters_str <- paste(names(v_filters), v_filters, diff --git a/R/convert_mae_se_assay_to_dt.R b/R/convert_mae_se_assay_to_dt.R index 80a5330c..2b9ef0b9 100644 --- a/R/convert_mae_se_assay_to_dt.R +++ b/R/convert_mae_se_assay_to_dt.R @@ -65,7 +65,7 @@ convert_se_assay_to_dt <- function(se, } } dt <- .convert_se_assay_to_dt(se, assay_name, retain_nested_rownames = retain_nested_rownames) - if (nrow(dt) == 0L) { + if (NROW(dt) == 0L) { return(dt) } if (drop_masked) { diff --git a/R/experiment_validators.R b/R/experiment_validators.R index 08193325..6402db0a 100644 --- a/R/experiment_validators.R +++ b/R/experiment_validators.R @@ -102,8 +102,8 @@ validate_SE <- function(se, as.character(which(names(rowdata) == get_env_identifiers("drug")) - 1), "}([^_]+).*") #nolint end - checkmate::assert_true(nrow(coldata) == nrow(unique(coldata))) - checkmate::assert_true(nrow(rowdata) == nrow(unique(rowdata))) + checkmate::assert_true(NROW(coldata) == NROW(unique(coldata))) + checkmate::assert_true(NROW(rowdata) == NROW(unique(rowdata))) # Validate non-empty values in rowData and colData checkmate::assert_false(any(stats::na.omit(unlist(coldata)) == "")) diff --git a/R/split_SE_components.R b/R/split_SE_components.R index 5f511984..a0457f0d 100755 --- a/R/split_SE_components.R +++ b/R/split_SE_components.R @@ -71,7 +71,7 @@ split_SE_components <- function(df_, nested_keys = NULL, combine_on = 1L) { singletons <- vapply(remaining_cols, function(x) { - nrow(unique(md[, x, drop = FALSE])) == 1L + NROW(unique(md[, x, drop = FALSE])) == 1L }, logical(1)) # Get experiment columns. diff --git a/R/utils.R b/R/utils.R index b39cd8e7..daf9ed67 100644 --- a/R/utils.R +++ b/R/utils.R @@ -359,14 +359,14 @@ apply_bumpy_function <- function(se, checkmate::assert_class(asy, "BumpyDataFrameMatrix") df <- BumpyMatrix::unsplitAsDataFrame(asy, row.field = "row", column.field = "column") iterator <- unique(df[, c("column", "row")]) - out <- loop(seq_len(nrow(iterator)), FUN = function(elem) { + out <- loop(seq_len(NROW(iterator)), FUN = function(elem) { x <- iterator[elem, ] i <- x[["row"]] j <- x[["column"]] elem_df <- asy[i, j][[1]] store <- FUN(elem_df, ...) if (is(store, "data.table") || is(store, "DFrame")) { - if (nrow(store) != 0L) { + if (NROW(store) != 0L) { store$row <- i store$column <- j store @@ -457,8 +457,8 @@ is_exp_empty <- function(exp) { ) any( - nrow(SummarizedExperiment::assay(exp)) == 0, - nrow(dt) == 0 + NROW(SummarizedExperiment::assay(exp)) == 0, + NROW(dt) == 0 ) } From ca86d9dd95bad404acf05ba591c1c2c0e9e2e4a7 Mon Sep 17 00:00:00 2001 From: Bartek Czech Date: Mon, 18 May 2026 10:07:49 +0200 Subject: [PATCH 2/6] fix: resolve gDRstyle linting violations Address lint violations from updated gDRstyle rules: - Fix trailing whitespace and blank lines - Replace nrow/ncol with NROW/NCOL - Fix paste_linter (paste0+collapse, toString, strrep, file.path) - Fix seq_linter (seq_along, seq_len) - Refactor cyclocomp_linter violations (extract helper functions) - Fix test linters (yoda, expect_true_false) - Remove undesirable operators (|> pipes) --- .serena/.gitignore | 2 + .serena/memories/project_overview.md | 41 ++ .serena/memories/style_conventions.md | 29 + .serena/memories/suggested_commands.md | 50 ++ .serena/project.yml | 154 +++++ R/assay_names.R | 22 +- R/combo.R | 52 +- R/concatentate_SEs.R | 32 +- R/convert_mae_se_assay_to_dt.R | 70 +-- R/df_to_bm_assay.R | 22 +- R/duplicates.R | 75 ++- R/experiment_validators.R | 19 +- R/experiments.R | 14 +- R/fit_curves.R | 138 ++--- R/flatten.R | 8 +- R/global_cache.R | 10 +- R/headers.R | 6 +- R/headers_list.R | 48 +- R/helpers-tests.R | 42 +- R/identifiers.R | 54 +- R/identifiers_list.R | 12 +- R/json_const.R | 18 +- R/json_convert.R | 28 +- R/json_validate.R | 8 +- R/manage_additional_metadata.R | 20 +- R/merge_SE.R | 122 ++-- R/packages.R | 1 - R/prettify.R | 46 +- R/se_metadata.R | 60 +- R/split_SE_components.R | 12 +- R/standardize_MAE.R | 112 ++-- R/utils.R | 536 +++++++++--------- R/validate_identifiers.R | 14 +- tests/testthat/test-assay_names.R | 24 +- tests/testthat/test-combo.R | 24 +- .../test-convert_mae_se_assay_to_dt.R | 158 +++--- tests/testthat/test-duplicates.R | 24 +- tests/testthat/test-experiment_validators.R | 32 +- tests/testthat/test-fit_curves.R | 38 +- tests/testthat/test-flatten.R | 2 +- tests/testthat/test-headers.R | 11 +- tests/testthat/test-json_const.R | 12 +- tests/testthat/test-json_convert.R | 1 - tests/testthat/test-merge_SE.R | 22 +- tests/testthat/test-prettify.R | 37 +- tests/testthat/test-se_metadata.R | 25 +- tests/testthat/test-split_SE_Components.R | 23 +- tests/testthat/test-standardize_MAE.R | 54 +- tests/testthat/test-utils.R | 324 +++++------ tests/testthat/test-validate_identifiers.R | 8 +- vignettes/gDRutils.Rmd | 17 +- 51 files changed, 1489 insertions(+), 1224 deletions(-) create mode 100644 .serena/.gitignore create mode 100644 .serena/memories/project_overview.md create mode 100644 .serena/memories/style_conventions.md create mode 100644 .serena/memories/suggested_commands.md create mode 100644 .serena/project.yml diff --git a/.serena/.gitignore b/.serena/.gitignore new file mode 100644 index 00000000..2e510aff --- /dev/null +++ b/.serena/.gitignore @@ -0,0 +1,2 @@ +/cache +/project.local.yml diff --git a/.serena/memories/project_overview.md b/.serena/memories/project_overview.md new file mode 100644 index 00000000..a810516d --- /dev/null +++ b/.serena/memories/project_overview.md @@ -0,0 +1,41 @@ +--- +name: gDRutils project overview +description: High-level overview of the gDRutils R package purpose, structure, and tech stack +type: project +--- + +# gDRutils + +**Purpose**: R utility package for the gDR (drug response) platform. Provides helper functions for: +- Fitting dose-response curves +- Manipulating/converting data between long table and SummarizedExperiment/MAE structures +- Identifier get/set/validation +- Constants and defaults for the gDR platform + +**Version**: 1.9.7 (Bioconductor package) +**License**: Artistic-2.0 + +## Tech Stack +- Language: R (>= 4.2) +- Key dependencies: BumpyMatrix, MultiAssayExperiment, SummarizedExperiment, data.table, drc, checkmate, S4Vectors, qs, jsonlite + +## Code Structure +- `R/` — source files (27 files): + - `utils.R` — large general utilities (~51KB) + - `fit_curves.R` — dose-response curve fitting (~29KB) + - `convert_mae_se_assay_to_dt.R` — MAE/SE to data.table conversion (~21KB) + - `standardize_MAE.R` — MAE standardization (~17KB) + - `merge_SE.R` — SummarizedExperiment merging (~13KB) + - `identifiers.R`, `identifiers_list.R` — column identifier management + - `assay_names.R`, `headers.R`, `headers_list.R` — assay/header naming + - `combo.R` — combination drug data helpers + - `concatentate_SEs.R`, `split_SE_components.R` — SE manipulation + - `json_const.R`, `json_convert.R`, `json_validate.R` — JSON handling + - `prettify.R` — display formatting + - `se_metadata.R`, `manage_additional_metadata.R` — metadata management + - `experiment_validators.R`, `validate_identifiers.R` — validation + - `flatten.R`, `duplicates.R`, `global_cache.R` — misc utilities +- `tests/testthat/` — test files matching each R source file +- `man/` — roxygen2-generated documentation +- `vignettes/` — package vignettes +- `inst/` — installed files diff --git a/.serena/memories/style_conventions.md b/.serena/memories/style_conventions.md new file mode 100644 index 00000000..ef1a4664 --- /dev/null +++ b/.serena/memories/style_conventions.md @@ -0,0 +1,29 @@ +--- +name: gDRutils code style and conventions +description: R coding style, naming conventions, and documentation patterns used in gDRutils +type: project +--- + +# Code Style & Conventions + +## Naming +- Public functions: `snake_case` (e.g., `fit_curves`, `average_biological_replicates_dt`) +- Private/internal functions: prefixed with `.` (e.g., `.applyLogisticFit`, `.checkNonNaAvgNorm`) +- S4 classes and methods follow Bioconductor conventions + +## Documentation +- Roxygen2 with markdown enabled (`Roxygen: list(markdown = TRUE)`) +- All exported functions documented with `@param`, `@return`, `@examples` +- RoxygenNote: 7.3.3 + +## Style +- lintr used for linting +- ByteCompile: TRUE +- Data manipulation via `data.table` (not dplyr) +- Argument validation via `checkmate` +- Bioconductor-style package (biocViews: Software, Infrastructure) + +## Testing +- testthat framework +- One test file per source file (`test-.R`) +- Setup file at `tests/testthat/setup.R` diff --git a/.serena/memories/suggested_commands.md b/.serena/memories/suggested_commands.md new file mode 100644 index 00000000..5ff2f565 --- /dev/null +++ b/.serena/memories/suggested_commands.md @@ -0,0 +1,50 @@ +--- +name: gDRutils suggested commands +description: Key commands for developing, testing, linting, and building the gDRutils R package +type: project +--- + +# Suggested Commands for gDRutils Development + +## Testing +```r +# Run all tests +testthat::test_package("gDRutils") +# or from shell: +Rscript -e 'testthat::test_package("gDRutils")' + +# Run a specific test file +Rscript -e 'testthat::test_file("tests/testthat/test-fit_curves.R")' +``` + +## Linting +```r +lintr::lint_package() +# or from shell: +Rscript -e 'lintr::lint_package()' +``` + +## Documentation (Roxygen2) +```r +roxygen2::roxygenise() +# or: +devtools::document() +``` + +## Build & Check +```bash +R CMD build . +R CMD check gDRutils_*.tar.gz +# or: +Rscript -e 'rcmdcheck::rcmdcheck()' +``` + +## Install locally +```r +devtools::install() +``` + +## Load for development +```r +devtools::load_all() +``` diff --git a/.serena/project.yml b/.serena/project.yml new file mode 100644 index 00000000..2074e9d8 --- /dev/null +++ b/.serena/project.yml @@ -0,0 +1,154 @@ +# the name by which the project can be referenced within Serena +project_name: "gDRutils" + + +# list of languages for which language servers are started; choose from: +# al bash clojure cpp csharp +# csharp_omnisharp dart elixir elm erlang +# fortran fsharp go groovy haskell +# haxe java julia kotlin lua +# markdown +# matlab nix pascal perl php +# php_phpactor powershell python python_jedi r +# rego ruby ruby_solargraph rust scala +# swift terraform toml typescript typescript_vts +# vue yaml zig +# (This list may be outdated. For the current list, see values of Language enum here: +# https://github.com/oraios/serena/blob/main/src/solidlsp/ls_config.py +# For some languages, there are alternative language servers, e.g. csharp_omnisharp, ruby_solargraph.) +# Note: +# - For C, use cpp +# - For JavaScript, use typescript +# - For Free Pascal/Lazarus, use pascal +# Special requirements: +# Some languages require additional setup/installations. +# See here for details: https://oraios.github.io/serena/01-about/020_programming-languages.html#language-servers +# When using multiple languages, the first language server that supports a given file will be used for that file. +# The first language is the default language and the respective language server will be used as a fallback. +# Note that when using the JetBrains backend, language servers are not used and this list is correspondingly ignored. +languages: +- r + +# the encoding used by text files in the project +# For a list of possible encodings, see https://docs.python.org/3.11/library/codecs.html#standard-encodings +encoding: "utf-8" + +# line ending convention to use when writing source files. +# Possible values: unset (use global setting), "lf", "crlf", or "native" (platform default) +# This does not affect Serena's own files (e.g. memories and configuration files), which always use native line endings. +line_ending: + +# The language backend to use for this project. +# If not set, the global setting from serena_config.yml is used. +# Valid values: LSP, JetBrains +# Note: the backend is fixed at startup. If a project with a different backend +# is activated post-init, an error will be returned. +language_backend: + +# whether to use project's .gitignore files to ignore files +ignore_all_files_in_gitignore: true + +# advanced configuration option allowing to configure language server-specific options. +# Maps the language key to the options. +# Have a look at the docstring of the constructors of the LS implementations within solidlsp (e.g., for C# or PHP) to see which options are available. +# No documentation on options means no options are available. +ls_specific_settings: {} + +# list of additional paths to ignore in this project. +# Same syntax as gitignore, so you can use * and **. +# Note: global ignored_paths from serena_config.yml are also applied additively. +ignored_paths: [] + +# whether the project is in read-only mode +# If set to true, all editing tools will be disabled and attempts to use them will result in an error +# Added on 2025-04-18 +read_only: false + +# list of tool names to exclude. +# This extends the existing exclusions (e.g. from the global configuration) +# +# Below is the complete list of tools for convenience. +# To make sure you have the latest list of tools, and to view their descriptions, +# execute `uv run scripts/print_tool_overview.py`. +# +# * `activate_project`: Activates a project based on the project name or path. +# * `check_onboarding_performed`: Checks whether project onboarding was already performed. +# * `create_text_file`: Creates/overwrites a file in the project directory. +# * `delete_memory`: Delete a memory file. Should only happen if a user asks for it explicitly, +# for example by saying that the information retrieved from a memory file is no longer correct +# or no longer relevant for the project. +# * `edit_memory`: Replaces content matching a regular expression in a memory. +# * `execute_shell_command`: Executes a shell command. +# * `find_file`: Finds files in the given relative paths +# * `find_referencing_symbols`: Finds symbols that reference the given symbol using the language server backend +# * `find_symbol`: Performs a global (or local) search using the language server backend. +# * `get_current_config`: Prints the current configuration of the agent, including the active and available projects, tools, contexts, and modes. +# * `get_symbols_overview`: Gets an overview of the top-level symbols defined in a given file. +# * `initial_instructions`: Provides instructions Serena usage (i.e. the 'Serena Instructions Manual') +# for clients that do not read the initial instructions when the MCP server is connected. +# * `insert_after_symbol`: Inserts content after the end of the definition of a given symbol. +# * `insert_before_symbol`: Inserts content before the beginning of the definition of a given symbol. +# * `list_dir`: Lists files and directories in the given directory (optionally with recursion). +# * `list_memories`: List available memories. Any memory can be read using the `read_memory` tool. +# * `onboarding`: Performs onboarding (identifying the project structure and essential tasks, e.g. for testing or building). +# * `read_file`: Reads a file within the project directory. +# * `read_memory`: Read the content of a memory file. This tool should only be used if the information +# is relevant to the current task. You can infer whether the information +# is relevant from the memory file name. +# You should not read the same memory file multiple times in the same conversation. +# * `rename_memory`: Renames or moves a memory. Moving between project and global scope is supported +# (e.g., renaming "global/foo" to "bar" moves it from global to project scope). +# * `rename_symbol`: Renames a symbol throughout the codebase using language server refactoring capabilities. +# For JB, we use a separate tool. +# * `replace_content`: Replaces content in a file (optionally using regular expressions). +# * `replace_symbol_body`: Replaces the full definition of a symbol using the language server backend. +# * `safe_delete_symbol`: +# * `search_for_pattern`: Performs a search for a pattern in the project. +# * `write_memory`: Write some information (utf-8-encoded) about this project that can be useful for future tasks to a memory in md format. +# The memory name should be meaningful. +excluded_tools: [] + +# list of tools to include that would otherwise be disabled (particularly optional tools that are disabled by default). +# This extends the existing inclusions (e.g. from the global configuration). +included_optional_tools: [] + +# fixed set of tools to use as the base tool set (if non-empty), replacing Serena's default set of tools. +# This cannot be combined with non-empty excluded_tools or included_optional_tools. +fixed_tools: [] + +# list of mode names to that are always to be included in the set of active modes +# The full set of modes to be activated is base_modes + default_modes. +# If the setting is undefined, the base_modes from the global configuration (serena_config.yml) apply. +# Otherwise, this setting overrides the global configuration. +# Set this to [] to disable base modes for this project. +# Set this to a list of mode names to always include the respective modes for this project. +base_modes: + +# list of mode names that are to be activated by default. +# The full set of modes to be activated is base_modes + default_modes. +# If the setting is undefined, the default_modes from the global configuration (serena_config.yml) apply. +# Otherwise, this overrides the setting from the global configuration (serena_config.yml). +# This setting can, in turn, be overridden by CLI parameters (--mode). +default_modes: + +# initial prompt for the project. It will always be given to the LLM upon activating the project +# (contrary to the memories, which are loaded on demand). +initial_prompt: "" + +# time budget (seconds) per tool call for the retrieval of additional symbol information +# such as docstrings or parameter information. +# This overrides the corresponding setting in the global configuration; see the documentation there. +# If null or missing, use the setting from the global configuration. +symbol_info_budget: + +# list of regex patterns which, when matched, mark a memory entry as read‑only. +# Extends the list from the global configuration, merging the two lists. +read_only_memory_patterns: [] + +# list of regex patterns for memories to completely ignore. +# Matching memories will not appear in list_memories or activate_project output +# and cannot be accessed via read_memory or write_memory. +# To access ignored memory files, use the read_file tool on the raw file path. +# Extends the list from the global configuration, merging the two lists. +# Example: ["_archive/.*", "_episodes/.*"] +ignored_memory_patterns: [] diff --git a/R/assay_names.R b/R/assay_names.R index 8f335f8c..c09d3f82 100644 --- a/R/assay_names.R +++ b/R/assay_names.R @@ -22,10 +22,10 @@ ASSAY_INFO_TBL <- data.table::data.table( #' will return single string instead of named vector with single element #' useful when function is expected to return single element/assay only #' @keywords assay_names -#' +#' #' @return charvec -#' -#' @examples +#' +#' @examples #' get_env_assay_names() #' #' @author Arkadiusz Gładki \email{arkadiusz.gladki@@contractors.roche.com} @@ -93,9 +93,9 @@ get_env_assay_names <- #' #' @return charvec #' -#' @examples +#' @examples #' get_assay_names() -#' +#' #' @export get_assay_names <- function(se = NULL, ...) { if (!is.null(se) && @@ -116,9 +116,9 @@ get_assay_names <- function(se = NULL, ...) { #' @keywords assay_names #' @return charvec of combo assay names. #' @export -#' @examples +#' @examples #' get_combo_assay_names() -#' +#' #' @author Arkadiusz Gładki \email{arkadiusz.gladki@@contractors.roche.com} #' get_combo_assay_names <- function(se = NULL, ...) { @@ -134,7 +134,7 @@ get_combo_assay_names <- function(se = NULL, ...) { #' @return charvec #' @export #' -#' @examples +#' @examples #' get_combo_base_assay_names() #' @author Arkadiusz Gładki \email{arkadiusz.gladki@@contractors.roche.com} #' @@ -151,10 +151,10 @@ get_combo_base_assay_names <- function(se = NULL, ...) { #' @return charvec #' #' @export -#' -#' @examples +#' +#' @examples #' get_combo_score_assay_names() -#' +#' #' @author Arkadiusz Gładki \email{arkadiusz.gladki@@contractors.roche.com} #' get_combo_score_assay_names <- function(se = NULL, ...) { diff --git a/R/combo.R b/R/combo.R index 74183aac..a9714e8a 100644 --- a/R/combo.R +++ b/R/combo.R @@ -3,14 +3,14 @@ #' @param se \code{SummarizedExperiment} object with dose-response data #' @param c_assays charvec of combo assays to be used #' @param normalization_type charvec of normalization_types expected in the data -#' @param prettify boolean flag indicating whether or not to prettify the colnames of the returned data +#' @param prettify boolean flag indicating whether or not to prettify the colnames of the returned data #' @keywords combination_data -#' +#' #' @author Arkadiusz Gładki \email{arkadiusz.gladki@@contractors.roche.com} #' #' @return list of data.table(s) with combo data #' -#' @examples +#' @examples #' mae <- get_synthetic_data("finalMAE_combo_matrix_small.qs2") #' convert_combo_data_to_dt(mae[[1]]) #' @@ -38,7 +38,7 @@ convert_combo_data_to_dt <- } dt }) - + # TODO: discuss what should be returned: assay_name or maybe assay_type? names(my_l) <- as.character(c_assays) my_l @@ -60,10 +60,10 @@ DATA_COMBO_INFO_TBL <- data.table::data.table( #' @return charvec #' #' @export -#' -#' @examples +#' +#' @examples #' get_combo_score_assay_names() -#' +#' get_combo_score_field_names <- function() { dt <- DATA_COMBO_INFO_TBL[type == "scores", c("name", "pname"), with = FALSE] stats::setNames(dt$pname, dt$name) @@ -75,10 +75,10 @@ get_combo_score_field_names <- function() { #' @return charvec #' #' @export -#' -#' @examples +#' +#' @examples #' get_combo_excess_field_names() -#' +#' get_combo_excess_field_names <- function() { dt <- DATA_COMBO_INFO_TBL[type == "excess", c("name", "pname"), with = FALSE] stats::setNames(dt$pname, dt$name) @@ -93,10 +93,10 @@ get_combo_excess_field_names <- function() { #' @return charvec #' #' @export -#' -#' @examples +#' +#' @examples #' convert_combo_field_to_assay("hsa_score") -#' +#' convert_combo_field_to_assay <- function(field) { checkmate::assert_string(field) DATA_COMBO_INFO_TBL[name == field, ][["type"]] @@ -111,26 +111,26 @@ convert_combo_field_to_assay <- function(field) { #' @details #' \code{drug_1} is diluted along the rows as the y-axis and #' \code{drug_2} is diluted along the columns and will be the x-axis. -#' +#' #' @keywords combination_data #' @return list with axis grid positions -#' +#' #' @examples #' cl_name <- "cellline_BC" #' drug1_name <- "drug_001" #' drug2_name <- "drug_026" -#' +#' #' se <- get_synthetic_data("combo_matrix_small")[["combination"]] #' dt_average <- convert_se_assay_to_dt(se, "Averaged")[normalization_type == "GR"] -#' +#' #' ls_axes <- define_matrix_grid_positions( #' dt_average[["Concentration"]], dt_average[["Concentration_2"]]) -#' +#' #' @export define_matrix_grid_positions <- function(conc1, conc2) { checkmate::assert_numeric(conc1) checkmate::assert_numeric(conc2) - + .generate_gap_for_single_agent <- function(x) { if (NROW(x) == 1) { x @@ -139,8 +139,8 @@ define_matrix_grid_positions <- function(conc1, conc2) { } else { x[2] - 0.5 # diff(log10(c(0, 10^(seq(-3, 1, 0.5))))) # nolint } - } - + } + conc_1 <- sort(unique(round_concentration(conc1))) pos_y <- log10conc_1 <- log10(conc_1) pos_y[1] <- .generate_gap_for_single_agent(log10conc_1) @@ -149,7 +149,7 @@ define_matrix_grid_positions <- function(conc1, conc2) { pos_y = pos_y, marks_y = sprintf("%.2g", conc_1) ) - + conc_2 <- sort(unique(round_concentration(conc2))) pos_x <- log10conc_2 <- log10(conc_2) pos_x[1] <- .generate_gap_for_single_agent(log10conc_2) @@ -158,7 +158,7 @@ define_matrix_grid_positions <- function(conc1, conc2) { pos_x = pos_x, marks_x = sprintf("%.2g", conc_2) ) - + list(axis_1 = axis_1, axis_2 = axis_2) } @@ -166,8 +166,8 @@ define_matrix_grid_positions <- function(conc1, conc2) { #' #' @param x value to be rounded. #' @param ndigit number of significant digits (default = 4). -#' -#' @examples +#' +#' @examples #' round_concentration(x = c(0.00175,0.00324,0.0091), ndigit = 1) #' #' @return rounded x @@ -176,6 +176,6 @@ define_matrix_grid_positions <- function(conc1, conc2) { round_concentration <- function(x, ndigit = 3) { checkmate::assert_numeric(x) checkmate::assert_integerish(ndigit) - + round(10 ^ (round(log10(x), ndigit)), ndigit - 1 - floor(log10(x))) } diff --git a/R/concatentate_SEs.R b/R/concatentate_SEs.R index 0f115a5f..62400565 100644 --- a/R/concatentate_SEs.R +++ b/R/concatentate_SEs.R @@ -31,13 +31,13 @@ has_nested_field <- function(asy, nested_field) { #' #' @seealso promote_fields #' @details Revert this operation using \code{promote_fields}. -#' -#' @examples +#' +#' @examples #' mae <- get_synthetic_data("finalMAE_small.qs2") #' se <- mae[[1]] #' se <- promote_fields(se, "ReadoutValue", 2) #' demote_fields(se, "ReadoutValue") -#' +#' #' @export demote_fields <- function(se, fields) { checkmate::assertClass(se, "SummarizedExperiment") @@ -48,7 +48,7 @@ demote_fields <- function(se, fields) { if (any(are_nested_fields <- !fields %in% c(rowmd, colmd))) { stop(sprintf("field(s) '%s' are already demoted fields, perhapy you intended to call 'promote_fields'?", - paste0(fields[are_nested_fields], collapse = ", "))) + toString(fields[are_nested_fields]))) } rowmd <- setdiff(rowmd, fields) @@ -94,12 +94,12 @@ demote_fields <- function(se, fields) { #' @return A \code{SummarizedExperiment} object with new dimensions resulting from promoting given \code{fields}. #' @details Revert this operation using \code{demote_fields}. #' @seealso demote_fields -#' -#' @examples +#' +#' @examples #' mae <- get_synthetic_data("finalMAE_small.qs2") #' se <- mae[[1]] #' se <- promote_fields(se, "ReadoutValue", 2) -#' +#' #' @export promote_fields <- function(se, fields, MARGIN = c(1, 2)) { checkmate::assertClass(se, "SummarizedExperiment") @@ -107,13 +107,13 @@ promote_fields <- function(se, fields, MARGIN = c(1, 2)) { if (length(MARGIN) != 1L || !MARGIN %in% c(1, 2)) { stop("invalid 'MARGIN' argument, must be either '1' or '2'") } - rowmd <- colnames(rowData(se)) + rowmd <- colnames(rowData(se)) colmd <- colnames(colData(se)) rowmd <- colnames(rowData(se)) colmd <- colnames(colData(se)) if (any(rfields <- fields %in% rowmd) || any(cfields <- fields %in% colmd)) { stop(sprintf("fields '%s' are already promoted fields", - paste0(fields[rfields || cfields], collapse = ", "))) + toString(fields[rfields || cfields]))) } if (MARGIN == 1) { rowmd <- c(rowmd, fields) @@ -139,7 +139,7 @@ promote_fields <- function(se, fields, MARGIN = c(1, 2)) { } if (length(final_assays) == 0L) { stop(sprintf("unable to find nested fields: '%s' in any assays, perhaps you intended to call 'demote_fields'?", - paste0(fields, collapse = ", "))) + toString(fields))) } .validate_final_assays(final_assays) assay_list <- lapply(final_assays, function(x) { @@ -184,7 +184,7 @@ promote_fields <- function(se, fields, MARGIN = c(1, 2)) { missing <- setdiff(colnames(df), c(row_fields, column_fields, nested_fields)) if (length(missing) != 0L) { stop(sprintf("found columns in 'df' not specified as row, column, or nested fields", - paste0(missing, collapse = ", "))) + toString(missing))) } if (any(is_duplicated <- duplicated(c(row_fields, column_fields, nested_fields)))) { stop(sprintf("fields: '%s' are duplicated across arguments 'row_fields', 'column_fields', 'nested_fields'", @@ -216,13 +216,13 @@ promote_fields <- function(se, fields, MARGIN = c(1, 2)) { #' @keywords SE_operators #' #' @return A \code{BumpyMatrix} object aggregated by \code{FUN}. -#' -#' @examples -#' mae <- get_synthetic_data("finalMAE_small.qs2") +#' +#' @examples +#' mae <- get_synthetic_data("finalMAE_small.qs2") #' se <- mae[[1]] #' assay <- SummarizedExperiment::assay(se) #' aggregate_assay(assay, FUN = mean, by = c("Barcode")) -#' +#' #' @export aggregate_assay <- function(asy, by, FUN) { checkmate::assert_class(asy, "BumpyMatrix") @@ -238,7 +238,7 @@ aggregate_assay <- function(asy, by, FUN) { row.field = row.field, column.field = column.field) if (!all(present <- by %in% setdiff(colnames(df), c(row.field, column.field)))) { - stop(sprintf("specified 'by' columns: '%s' are not present in 'asy'", paste0(by[!present], collapse = ", "))) + stop(sprintf("specified 'by' columns: '%s' are not present in 'asy'", toString(by[!present]))) } by <- c(row.field, column.field, by) diff --git a/R/convert_mae_se_assay_to_dt.R b/R/convert_mae_se_assay_to_dt.R index 2b9ef0b9..c129aae5 100644 --- a/R/convert_mae_se_assay_to_dt.R +++ b/R/convert_mae_se_assay_to_dt.R @@ -83,9 +83,9 @@ convert_se_assay_to_dt <- function(se, } if (include_metadata) { dt <- .extract_and_merge_metadata(se, data.table::copy(dt)) - + if (merge_additional_variables) { - additional_vars <- get_additional_variables(list(dt)) + additional_vars <- get_additional_variables(list(dt)) if (!is.null(additional_vars) && length(additional_vars) > 0) { dt <- update_drug_name(dt, additional_vars) @@ -100,7 +100,7 @@ convert_se_assay_to_dt <- function(se, intersect(unlist(get_header()[c("excess", "scores", "response_metrics")]), names(dt)))) rest_cols <- setdiff(colnames(dt), c(normalization_cols, "normalization_type")) - dcast_formula <- paste0(paste0(rest_cols, collapse = " + "), " ~ normalization_type") + dcast_formula <- paste(paste(rest_cols, collapse = " + "), " ~ normalization_type") new_cols <- as.vector(outer(normalization_cols, unique(dt$normalization_type), paste, sep = "_")) new_cols_rename <- unlist(lapply(strsplit(new_cols, "_"), function(x) { @@ -133,12 +133,12 @@ convert_se_assay_to_dt <- function(se, .extract_and_merge_metadata <- function(se, dt) { checkmate::assert_class(se, "SummarizedExperiment") checkmate::assert_data_table(dt) - + rData <- data.table::as.data.table(rowData(se)) rData[, rId := rownames(se)] cData <- data.table::as.data.table(colData(se)) cData[, cId := colnames(se)] - + ids <- data.table::CJ(cData$cId, rData$rId) data.table::setnames(ids, c("cId", "rId")) ids[, names(ids) := lapply(.SD, as.character), .SDcols = names(ids)] @@ -153,16 +153,16 @@ convert_se_assay_to_dt <- function(se, #' @noRd #' .convert_se_assay_to_dt <- function(se, assay_name, retain_nested_rownames) { - + checkmate::assert_class(se, "SummarizedExperiment") checkmate::assert_string(assay_name) - + object <- assays(se)[[assay_name]] checkmate::assert_true(inherits(object, "BumpyDataFrameMatrix") || inherits(object, "matrix")) - + rowfield <- "rId" colfield <- "cId" - + if (methods::is(object, "BumpyDataFrameMatrix")) { as_df <- BumpyMatrix::unsplitAsDataFrame(object, row.field = rowfield, column.field = colfield) # Retain nested rownames. @@ -172,7 +172,7 @@ convert_se_assay_to_dt <- function(se, } } as_dt <- data.table::as.data.table(as_df) - + } else if (methods::is(object, "matrix")) { first <- object[1, 1][[1]] if (is.numeric(first)) { @@ -242,7 +242,7 @@ convert_mae_assay_to_dt <- function(mae, wide_structure = FALSE, drop_masked = TRUE, merge_additional_variables = FALSE) { - + # Assertions. checkmate::assert_class(mae, "MultiAssayExperiment") checkmate::assert_string(assay_name) @@ -251,11 +251,11 @@ convert_mae_assay_to_dt <- function(mae, checkmate::assert_flag(retain_nested_rownames) checkmate::assert_flag(wide_structure) checkmate::assert_flag(merge_additional_variables) - + if (is.null(experiment_name)) { experiment_name <- names(mae) } - + dtList <- lapply(experiment_name, function(x) { if (!assay_name %in% assayNames(mae[[x]])) { return() @@ -271,7 +271,7 @@ convert_mae_assay_to_dt <- function(mae, if (all(vapply(dtList, is.null, logical(1)))) { warning(sprintf("assay '%s' was not found in any of the following experiments: '%s'", assay_name, - paste(experiment_name, collapse = ", "))) + toString(experiment_name))) } data.table::rbindlist(dtList, fill = TRUE, use.names = TRUE) } @@ -321,7 +321,7 @@ convert_se_assay_to_custom_dt <- function(se, assay_name, output_table = NULL, cap_values = FALSE) { - + checkmate::assert_class(se, "SummarizedExperiment") checkmate::assert_string(assay_name) checkmate::assert_string(output_table, null.ok = TRUE) @@ -330,29 +330,29 @@ convert_se_assay_to_custom_dt <- function(se, c(get_assay_names(), "Metrics_initial", "Metrics_raw"), null.ok = TRUE) checkmate::assert_flag(cap_values) - + if (is.null(output_table)) { output_table <- assay_name } if (output_table %in% c("Metrics_initial", "Metrics_raw")) { stopifnot(assay_name == "Metrics") } - + wide_structure <- assay_name %in% c("Normalized", "Averaged") dt <- convert_se_assay_to_dt(se, assay_name, include_metadata = TRUE, wide_structure = wide_structure) - + if (output_table %in% c(get_combo_assay_names(), "Metrics_initial")) { return(dt) } - + if (output_table %in% c("Metrics", "Metrics_raw")) { # SE*.qs2 files contain 'c50' column instead of the 'ec50' in the metrics # this is a temporary fix that should be removed once data (qs2 files) is reprocessed data.table::setnames(dt, "c50", "ec50", skip_absent = TRUE) - + groups <- c("normalization_type", "fit_source") if (all(groups %in% names(dt))) { dt <- flatten( @@ -362,7 +362,7 @@ convert_se_assay_to_custom_dt <- function(se, ) } } - + if (output_table %in% c("Metrics_raw", "Metrics", "Normalized", "Averaged")) { # TODO GDR-2513 # nolint start # pidfs <- get_SE_identifiers(se) @@ -374,15 +374,15 @@ convert_se_assay_to_custom_dt <- function(se, # dt[vars] <- NULL # } # nolint end - + # add identifiers specific for given SE colnames(dt) <- prettify_flat_metrics(colnames(dt), human_readable = TRUE) } - + if (output_table == "Metrics" && cap_values) { dt <- capVals(dt) } - + dt } @@ -402,13 +402,13 @@ convert_se_assay_to_custom_dt <- function(se, #' } #' #' @param x \code{data.table} containing growth metrics extracted from a \code{SummarizedExperiment} -#' +#' #' @examples #' dt <- data.table::data.table( #' `E Max` = c(-0.1, 0, 0.5, 1.2), #' `GR Max` = c(-1.1, -1, 0.5, 1.2), #' `RV AOC within set range` = c(-0.2, -0.1, 0, 3), -#' `GR AOC within set range` = c(-0.2, -0.1, 0, 3), +#' `GR AOC within set range` = c(-0.2, -0.1, 0, 3), #' `GR50` = c(0, 1e-7, 10, 34), #' `IC50` = c(0, 1e-7, 10, 34), #' `EC50` = c(0, 1e-7, 10, 34), @@ -417,7 +417,7 @@ convert_se_assay_to_custom_dt <- function(se, #' dt #' dt1 <- capVals(dt) #' dt1 -#' +#' #' @return A data table with capped values. #' @keywords internal #' @@ -425,9 +425,9 @@ convert_se_assay_to_custom_dt <- function(se, #' #' @export capVals <- function(x) { - + checkmate::assert_data_table(x) - + json_path <- system.file(package = "gDRutils", "settings.json") s <- get_settings_from_json("capVals", json_path) # fifty_lower_limit numeric value of the lower limit to cap all x50 metrics @@ -438,10 +438,10 @@ capVals <- function(x) { checkmate::assert_number(s$max_upper_limit) # range_lower_limit numeric value of the lower limit to cap all xrange metrics checkmate::assert_number(s$range_lower_limit) - + s_col <- get_settings_from_json("CAP_VALS_COLS", json_path) if (!NROW(intersect(s_col, names(x)))) return(x) # no columns to capped - + X <- data.table::copy(x) if ("E Max" %in% names(X)) { X[, `E Max` := scales::oob_squish_any(`E Max`, range = c(0, s$max_upper_limit))] @@ -506,17 +506,17 @@ capVals <- function(x) { update_drug_name <- function(dt, additional_vars) { checkmate::assert_data_table(dt) checkmate::assert_character(additional_vars) - + dt <- data.table::copy(dt) - + cols_to_merge <- unlist(get_env_identifiers(c("drug", "drug_name"), simplify = FALSE)) - + for (var in additional_vars) { if (!var %in% names(dt)) { warning(sprintf("Additional variable '%s' not found in data.table. Skipping merge for this variable.", var)) next } - + for (col in cols_to_merge) { if (!col %in% names(dt)) { warning(sprintf("Drug identifier column '%s' not found in data.table. Skipping update for this column.", col)) diff --git a/R/df_to_bm_assay.R b/R/df_to_bm_assay.R index 2ed9c808..4dce83d5 100644 --- a/R/df_to_bm_assay.R +++ b/R/df_to_bm_assay.R @@ -23,40 +23,40 @@ df_to_bm_assay <- discard_keys = NULL) { stopifnot(any(inherits(data, "data.table"), checkmate::test_character(data))) checkmate::assert_character(discard_keys, null.ok = TRUE) - + allMetadata <- split_SE_components(data, nested_keys = discard_keys) - + seColData <- data.table::as.data.table(allMetadata$condition_md) seRowData <- data.table::as.data.table(allMetadata$treatment_md) - + # Create row_id and col_id directly in data.table for speed seColData[, col_id := .I] seRowData[, row_id := .I] - + cl_entries <- setdiff(colnames(seColData), c("col_id", "name_")) cond_entries <- setdiff(colnames(seRowData), c("row_id", "name_")) - + # Use data.table's cross-join for speed complete <- data.table::CJ(col_id = seColData$col_id, row_id = seRowData$row_id) complete[, factor_id := .I] - + # Use data.table merge for speed completeMerged <- merge(complete, seColData, by = "col_id", all.x = TRUE) completeMerged <- merge(completeMerged, seRowData, by = "row_id", all.x = TRUE) - + # Merge with original data using data.table merge data_assigned <- merge(data, completeMerged, by = c(cond_entries, cl_entries), all.x = TRUE) - + # Order by factor_id using data.table's setorder data.table::setorder(data_assigned, factor_id) - + bm <- BumpyMatrix::splitAsBumpyMatrix(data_assigned[, allMetadata$data_fields, with = FALSE], row = data_assigned$row_id, column = data_assigned$col_id) - + # Check if the order of rows/cols are correct stopifnot(!is.unsorted(as.numeric(rownames(bm)))) stopifnot(!is.unsorted(as.numeric(colnames(bm)))) - + bm } diff --git a/R/duplicates.R b/R/duplicates.R index d16a3771..2c13ee2a 100644 --- a/R/duplicates.R +++ b/R/duplicates.R @@ -1,7 +1,7 @@ #' check if data.table contains duplicated data -#' +#' #' An auxiliary function that checks for duplicates in the data.table (or its subset) -#' +#' #' @param dt data.table #' @param col_names charvec with columns to be used for subsetting #' @examples @@ -11,24 +11,24 @@ #' @keywords duplicates #' #' @export -#' +#' has_dt_duplicated_rows <- function(dt, col_names = NULL) { checkmate::assert_data_table(dt) checkmate::assert_character(col_names, null.ok = TRUE) - + if (is.null(col_names)) { anyDuplicated(dt) != 0 } else { checkmate::assert_subset(col_names, colnames(dt)) anyDuplicated(dt, by = col_names) != 0 } - + } #' get columns in the assay data required to have unique data -#' +#' #' get columns in the assay data required to have unique (non-duplicated) data -#' +#' #' @param dt data.table with assay data #' @examples #' sdata <- get_synthetic_data("finalMAE_small.qs2") @@ -37,21 +37,21 @@ has_dt_duplicated_rows <- function(dt, col_names = NULL) { #' @return charvec with columns required to have unique data #' @keywords duplicates #' @export -#' +#' get_assay_req_uniq_cols <- function(dt) { - + checkmate::assert_data_table(dt) col_ids <- get_settings_from_json( "assay_dt_req_uniq_col_ids", system.file(package = "gDRutils", "settings.json") ) - + # check with both pretiffied and unprettified version of ids col_names_p <- unlist(get_prettified_identifiers(col_ids, simplify = FALSE)) col_names_up <- as.character(get_env_identifiers(col_ids, simplify = FALSE)) - + # add additional variables - ## there are columns that should not be considered additional variables + ## there are columns that should not be considered additional variables skip_col_ids <- get_settings_from_json( "assay_dt_skip_uniq_col_ids", system.file(package = "gDRutils", "settings.json") @@ -65,18 +65,18 @@ get_assay_req_uniq_cols <- function(dt) { add_v_p <- gDRutils::get_additional_variables(dt) add_v_up <- gDRutils::get_additional_variables(dt, prettified = FALSE) } - + col_names <- unique(c(col_names_p, col_names_up, add_v_p, add_v_up)) - + intersect(col_names, names(dt)) } #' check if assay data contains duplicated data -#' +#' #' An auxiliary function that checks for duplicates in the assay data -#' +#' #' @param dt data.table with assay data -#' +#' #' @return logical flag indicating if a dt contains duplicated rows or not #' @keywords duplicates #' @examples @@ -84,11 +84,11 @@ get_assay_req_uniq_cols <- function(dt) { #' smetrics_data <- convert_se_assay_to_dt(sdata[[1]], "Metrics") #' has_assay_dt_duplicated_rows(smetrics_data) #' @export -#' -has_assay_dt_duplicated_rows <- function(dt) { +#' +has_assay_dt_duplicated_rows <- function(dt) { checkmate::assert_data_table(dt) - + col_names <- get_assay_req_uniq_cols(dt) has_dt_duplicated_rows(dt, col_names) @@ -99,7 +99,7 @@ has_assay_dt_duplicated_rows <- function(dt) { #' #' @param x DataFrame or data.table #' @param col_names character vector, columns in which duplication are searched for -#' @param output string with the output format to be returned - +#' @param output string with the output format to be returned - #' one of "index" (index of duplicates) or "data" (subset of input data with duplicates) #' @examples #' dt <- data.table::data.table(a = c(1, 2, 3), b = c(3, 2, 2)) @@ -108,22 +108,22 @@ has_assay_dt_duplicated_rows <- function(dt) { #' @return integer vector or data.table with duplicated rows #' @keywords duplicates #' @export -get_duplicated_rows <- function(x, - col_names = NULL, +get_duplicated_rows <- function(x, + col_names = NULL, output = "index") { - + checkmate::assertMultiClass(x, c("data.table", "DataFrame")) checkmate::assert_true(all(col_names %in% colnames(x))) checkmate::assert_choice(output, c("index", "data")) - - + + if (!is.null(col_names)) { sub_x <- subset(x, select = col_names) } else { sub_x <- x } idx <- which(duplicated(sub_x) | duplicated(sub_x, fromLast = TRUE)) - + out <- if (output == "index") { idx } else { @@ -149,21 +149,21 @@ get_duplicated_rows <- function(x, #' @keywords duplicates #' @export get_assay_dt_duplicated_rows <- function(dt, output = "index") { - + checkmate::assert_data_table(dt) - + col_names <- get_assay_req_uniq_cols(dt) - + get_duplicated_rows(dt, col_names, output = output) } #' throw message if assay data.table contains duplicated rows -#' -#' An auxiliary function that checks for duplicated rows in assay data.table, -#' In case of duplicates it throws a message. The messsage function is by default `stop()` +#' +#' An auxiliary function that checks for duplicated rows in assay data.table, +#' In case of duplicates it throws a message. The messsage function is by default `stop()` #' The message function can be customized with `msg_f` parameter -#' +#' #' @param dt data.table with assay data #' @param assay_name string with the name of the assay #' @param msg_f function to be used to throw the message @@ -176,7 +176,7 @@ get_assay_dt_duplicated_rows <- function(dt, output = "index") { #' @keywords duplicates #' #' @export -#' +#' throw_msg_if_duplicates <- function(dt, assay_name = "unknown", msg_f = stop, preview_max_numb = 4) { checkmate::assert_data_table(dt) @@ -184,11 +184,11 @@ throw_msg_if_duplicates <- function(dt, assay_name = "unknown", msg_f = stop, pr checkmate::assert_function(msg_f) checkmate::assert_number(preview_max_numb) - if (has_assay_dt_duplicated_rows(dt)) { + if (has_assay_dt_duplicated_rows(dt)) { dup_dt <- get_assay_dt_duplicated_rows(dt, output = "data") preview_numb <- min(c(preview_max_numb, NROW(dup_dt))) - + msg <- sprintf( "The %i ouf of %i rows are duplicated in the assay '%s'", NROW(dup_dt), @@ -202,4 +202,3 @@ throw_msg_if_duplicates <- function(dt, assay_name = "unknown", msg_f = stop, pr msg_f(paste0(msg, msg2, msg3)) } } - diff --git a/R/experiment_validators.R b/R/experiment_validators.R index 6402db0a..39d6e415 100644 --- a/R/experiment_validators.R +++ b/R/experiment_validators.R @@ -36,16 +36,16 @@ validate_dimnames <- function(obj, obj2, skip_empty = TRUE) { #' Throws an error if the assay is not valid. #' #' @export -#' -#' @examples -#' mae <- get_synthetic_data("finalMAE_small.qs2") +#' +#' @examples +#' mae <- get_synthetic_data("finalMAE_small.qs2") #' se <- mae[[1]] #' validate_se_assay_name(se, "RawTreated") #' validate_se_assay_name <- function(se, name) { if (!name %in% assayNames(se)) { stop(sprintf("'%s' is not on of the available assays: '%s'", - name, paste0(assayNames(se), collapse = ", "))) + name, toString(assayNames(se)))) } invisible(NULL) } @@ -69,8 +69,8 @@ validate_se_assay_name <- function(se, name) { #' @return \code{NULL} invisibly if the SummarizedExperiment is valid. #' Throws an error if the SummarizedExperiment is not valid. #' @export -#' -#' @examples +#' +#' @examples #' mae <- get_synthetic_data("finalMAE_small.qs2") #' se <- mae[[1]] #' validate_SE(se) @@ -141,10 +141,10 @@ validate_SE <- function(se, #' Throws an error if the MultiAssayExperiment is not valid. #' @export #' -#' @examples -#' mae <- get_synthetic_data("finalMAE_small.qs2") +#' @examples +#' mae <- get_synthetic_data("finalMAE_small.qs2") #' validate_MAE(mae) -#' +#' #' @author Bartosz Czech validate_MAE <- function(mae) { # Validate the SE structure, assays and metadata, as well as dimnames of assays @@ -161,4 +161,3 @@ validate_MAE <- function(mae) { } invisible(NULL) } - diff --git a/R/experiments.R b/R/experiments.R index f0fdc5ad..244716ac 100644 --- a/R/experiments.R +++ b/R/experiments.R @@ -23,9 +23,9 @@ EXPERIMENT_GROUPS <- #' @export #' @author Arkadiusz Gladki get_supported_experiments <- function(type = NULL) { - + checkmate::assert_choice(type, names(SUPPORTED_EXPERIMENTS), null.ok = TRUE) - + if (is.null(type)) { as.character(SUPPORTED_EXPERIMENTS) } else { @@ -42,19 +42,19 @@ get_supported_experiments <- function(type = NULL) { #' #' @return list with experiment groups or string (if type not NULL) #' -#' @examples +#' @examples #' get_experiment_groups() -#' +#' #' @export #' @author Arkadiusz Gladki get_experiment_groups <- function(type = NULL) { - + checkmate::assert_choice(type, names(EXPERIMENT_GROUPS), null.ok = TRUE) - + if (!is.null(type)) { EXPERIMENT_GROUPS[[type]] } else { EXPERIMENT_GROUPS } - + } diff --git a/R/fit_curves.R b/R/fit_curves.R index 5624bb97..963961d0 100644 --- a/R/fit_curves.R +++ b/R/fit_curves.R @@ -36,12 +36,12 @@ #' The purpose of this statistic is to enable comparison across different experiments with slightly #' different concentration ranges. #' -#' @examples -#' df_ <- data.table::data.table(Concentration = c(0.001, 0.00316227766016838, +#' @examples +#' df_ <- data.table::data.table(Concentration = c(0.001, 0.00316227766016838, #' 0.01, 0.0316227766016838), #' x_std = c(0.1, 0.1, 0.1, 0.1), normalization_types = c("RV", "RV", "RV", "RV"), #' x = c(0.9999964000144, 0.999964001439942, 0.999640143942423, 0.996414342629482)) -#' +#' #' fit_curves(df_, "Concentration", normalization_type = "RV") #' #' @export @@ -62,49 +62,49 @@ fit_curves <- function(df_, stopifnot(any(inherits(df_, "data.table"), inherits(df_, "DFrame"))) if (any(bad_normalization_type <- ! normalization_type %in% c("GR", "RV"))) { stop(sprintf("unknown curve type: '%s'", - paste0(normalization_type[bad_normalization_type], collapse = ", "))) + toString(normalization_type[bad_normalization_type]))) } - + req_fields <- series_identifiers opt_fields <- NULL - + req_fields <- c(req_fields, "x") opt_fields <- "x_std" - + if (!all(req_fields %in% colnames(df_))) { - stop(sprintf("missing one of the required fields: '%s'", paste(req_fields, collapse = ", "))) + stop(sprintf("missing one of the required fields: '%s'", toString(req_fields))) } - + if (length(setdiff(opt_fields, colnames(df_))) > 0L) { df_[, setdiff(opt_fields, colnames(df_))] <- NA } - df_metrics <- .applyLogisticFit(df_, normalization_type, series_identifiers, e_0, GR_0, range_conc, force_fit, + df_metrics <- .applyLogisticFit(df_, normalization_type, series_identifiers, e_0, GR_0, range_conc, force_fit, pcutoff, cap, n_point_cutoff) - - is_unique_normalization_type_and_fit_source <- + + is_unique_normalization_type_and_fit_source <- NROW(unique(df_metrics[, c("normalization_type", "fit_source")])) == NROW(df_metrics) if (!is_unique_normalization_type_and_fit_source) { - stop("'normalization_type' and 'fit_source' columns do not create unique combinations") + stop("'normalization_type' and 'fit_source' columns do not create unique combinations") } rownames(df_metrics) <- paste0(df_metrics$normalization_type, "_", df_metrics$fit_source) - + concsNA <- all(is.na(unique(df_[[series_identifiers]]))) if (concsNA) df_metrics[] <- NA df_metrics } #' @keywords internal -.applyLogisticFit <- function(df_, normalization_type, series_identifiers, e_0, GR_0, range_conc, force_fit, +.applyLogisticFit <- function(df_, normalization_type, series_identifiers, e_0, GR_0, range_conc, force_fit, pcutoff, cap, n_point_cutoff) { - + df_metrics <- NULL concs <- unique(df_[[series_identifiers]]) med_concs <- stats::median(concs) min_concs <- min(concs) - + concsNA <- all(is.na(concs)) if (concsNA) concs[] <- 0 - + if ("RV" %in% normalization_type) { df_metrics <- logisticFit( concs, @@ -121,7 +121,7 @@ fit_curves <- function(df_, ) df_metrics$normalization_type <- "RV" } - + if ("GR" %in% normalization_type) { df_gr <- logisticFit( concs, @@ -139,7 +139,7 @@ fit_curves <- function(df_, df_gr$normalization_type <- "GR" df_metrics <- data.table::rbindlist(list(df_metrics, df_gr), fill = TRUE) } - + df_metrics$fit_source <- "gDR" df_metrics } @@ -228,11 +228,11 @@ logisticFit <- if (length(concs) != length(norm_values)) { stop("unequal vector lengths for 'conc' and 'norm_values'") } - # Check that values have not been logged yet. + # Check that values have not been logged yet. if (any(concs < 0)) { stop("logisticFit accepts only unlogged concentrations, negative concentrations are detected") } - + out <- .setup_metric_output() out$maxlog10Concentration <- log10(max(concs)) out$N_conc <- length(unique(concs)) @@ -243,14 +243,14 @@ logisticFit <- } else { x_0 + cap } - + norm_values <- pmin(norm_values, limit) df_ <- data.table::data.table(concs = concs, norm_values = norm_values) if (has_dups(df_$concs)) { warning("duplicates were found, averaging values") df_ <- average_dups(df_, "concs") } - + mean_norm_value <- mean(df_$norm_values, na.rm = TRUE) out$x_mean <- mean_norm_value out$x_AOC <- .calculate_complement(mean_norm_value) @@ -259,13 +259,13 @@ logisticFit <- ## Fit type is determined based on number of free variables available. fit_param <- c("h", "x_inf", "x_0", "ec50") controls <- drc::drmc(relTol = 1e-06, errorm = FALSE, noMessage = TRUE, rmNA = TRUE) - + out <- .setLogisticFit(out = out, df_ = df_, n_point_cutoff = n_point_cutoff, fit_param = fit_param, - priors = priors, lower = lower, force_fit = force_fit, x_0 = x_0, cap = cap, - concs = concs, controls = controls, range_conc = range_conc, pcutoff = pcutoff, + priors = priors, lower = lower, force_fit = force_fit, x_0 = x_0, cap = cap, + concs = concs, controls = controls, range_conc = range_conc, pcutoff = pcutoff, capping_fold = capping_fold, mean_norm_value = mean_norm_value) - + data.table::setDT(out) out } @@ -298,7 +298,7 @@ logisticFit <- df1 <- nparam - 1 # (N of parameters in the growth curve) - (F-test for the models) df2 <- length(stats::na.omit(df_$norm_values)) - nparam + 1 out$p_value <- f_pval <- .calculate_f_pval(df1, df2, RSS1, RSS2) - if (all((!force_fit), + if (all((!force_fit), any(all(exists("f_pval"), !is.na(f_pval), f_pval >= pcutoff), is.na(out$ec50)))) { stop(fitting_handler( "constant_fit", @@ -347,12 +347,12 @@ logisticFit <- if (is.na(x$xc50)) { x$xc50 <- .estimate_xc50(x$x_inf) } else { - # set the xc50 to Inf if the value is extrapolated beyond to 5-fold above/below the + # set the xc50 to Inf if the value is extrapolated beyond to 5-fold above/below the # max/min tested concentrations (default) x$xc50 <- cap_xc50( - x$xc50, - max_conc = 10 ^ x$maxlog10Concentration, - min_conc = min(concs[concs > 0]), + x$xc50, + max_conc = 10 ^ x$maxlog10Concentration, + min_conc = min(concs[concs > 0]), capping_fold = capping_fold ) } @@ -366,19 +366,19 @@ logisticFit <- fit_param <- fit_param[-3] priors <- priors[-3] lower <- lower[-3] - + fct <- drc::LL.3u(upper = x_0, names = fit_param) upperl <- c(5, min(x_0 + cap, 1), max(concs) * 10) - + x$fit_type <- "DRC3pHillFitModelFixS0" x$x_0 <- x_0 } else { fct <- drc::LL.4(names = fit_param) upperl <- c(5, 1, 1 + cap, max(concs) * 10) - + x$fit_type <- "DRC4pHillFitModel" } - + drc::drm( norm_values ~ concs, data = df_, @@ -410,10 +410,10 @@ logisticFit <- #' #' @details The inverse of this function is \code{predict_conc_from_efficacy}. #' @seealso predict_conc_from_efficacy -#' -#' @examples +#' +#' @examples #' predict_efficacy_from_conc(c = 1, x_inf = 0.1, x_0 = 1, ec50 = 0.5, h = 2) -#' +#' #' @export predict_efficacy_from_conc <- function(c, x_inf, x_0, ec50, h) { checkmate::assert_numeric(c) @@ -442,7 +442,7 @@ predict_efficacy_from_conc <- function(c, x_inf, x_0, ec50, h) { #' Expects columns: 'dilution_drug', 'cotrt_value', 'ratio', 'ec50', 'h', 'x_inf', 'x_0'. #' #' @return A single numeric value for the predicted 'smooth' response. -#' +#' #' @examples #' mae <- get_synthetic_data("combo_matrix") #' se <- mae[[gDRutils::get_supported_experiments("combo")]] @@ -460,16 +460,16 @@ predict_smooth_from_combo <- function(conc_1, conc_2, metrics_merged) { colnames(metrics_merged), must.include = c("dilution_drug", "cotrt_value", "ratio", "ec50", "h", "x_inf", "x_0") ) - + available_cotrt_1 <- unique(metrics_merged[dilution_drug == "drug_2"]$cotrt_value) available_cotrt_2 <- unique(metrics_merged[dilution_drug == "drug_1"]$cotrt_value) - + snapped_conc_1 <- .snap_conc_to_model(conc_1, available_cotrt_1) snapped_conc_2 <- .snap_conc_to_model(conc_2, available_cotrt_2) - + message(sprintf("Requested: (%.2f, %.2f) ==> Using models for nearest concentrations: (%.2f, %.2f)", conc_1, conc_2, snapped_conc_1, snapped_conc_2)) - + col_params <- metrics_merged[dilution_drug == "drug_1" & cotrt_value == snapped_conc_2, ] col_value <- if (NROW(col_params) == 1) { @@ -481,7 +481,7 @@ predict_smooth_from_combo <- function(conc_1, conc_2, metrics_merged) { } else { NA } - + row_params <- metrics_merged[dilution_drug == "drug_2" & cotrt_value == snapped_conc_1, ] row_value <- if (NROW(row_params) == 1) { predict_efficacy_from_conc(conc_2, @@ -492,7 +492,7 @@ predict_smooth_from_combo <- function(conc_1, conc_2, metrics_merged) { } else { NA } - + codil_value <- NA if (!is.na(snapped_conc_1) && snapped_conc_1 != 0) { ratio <- snapped_conc_2 / snapped_conc_1 @@ -507,10 +507,10 @@ predict_smooth_from_combo <- function(conc_1, conc_2, metrics_merged) { NA } } - + predicted_values <- c(col_value, row_value, codil_value) final_prediction <- mean(predicted_values, na.rm = TRUE) - + if (is.nan(final_prediction)) { NA_real_ } else { @@ -524,7 +524,7 @@ predict_smooth_from_combo <- function(conc_1, conc_2, metrics_merged) { #' Predict a concentration for a given efficacy with fit parameters. #' #' @details The inverse of this function is \code{predict_efficacy_from_conc}. -#' +#' #' @param efficacy Numeric vector representing efficacies to predict concentrations for. #' @param x_inf Numeric vector representing the asymptotic value of the sigmoidal fit to the dose-response #' data as concentration goes to infinity. @@ -536,9 +536,9 @@ predict_smooth_from_combo <- function(conc_1, conc_2, metrics_merged) { #' #' @return Numeric vector representing predicted concentrations from given efficacies and fit parameters. #' -#' @examples +#' @examples #' predict_conc_from_efficacy(efficacy = c(1, 1.5), x_inf = 0.1, x_0 = 1, ec50 = 0.5, h = 2) -#' +#' #' @seealso predict_efficacy_from_conc .calculate_x50 #' @export predict_conc_from_efficacy <- function(efficacy, x_inf, x_0, ec50, h) { @@ -595,7 +595,7 @@ logistic_metrics <- function(c, x_metrics) { .snap_conc_to_model <- function(user_conc, available_concs) { checkmate::assert_number(user_conc, lower = 0) checkmate::assert_numeric(available_concs) - + if (length(available_concs) == 0 || is.na(user_conc)) { return(NA_real_) } @@ -609,10 +609,10 @@ logistic_metrics <- function(c, x_metrics) { .setup_metric_output <- function() { resp_metric_all_cols <- get_header("response_metrics") # remove cols ending with "_sd" - # they are not present in the primary assays + # they are not present in the primary assays # but only with the assays followed by averaging of biological replicates resp_metric_cols <- resp_metric_all_cols[!endsWith(resp_metric_all_cols, "_sd")] - + out <- as.list(rep(NA, length(resp_metric_cols))) names(out) <- resp_metric_cols out @@ -693,11 +693,11 @@ average_dups <- function(dt, col) { #' that can be calculated from the mean. #' @keywords fit_curves #' @return Modified named list of fit parameters. -#' -#' @examples +#' +#' @examples #' na <- list(x_0 = NA) #' set_constant_fit_params(na, mean_norm_value = 0.6) -#' +#' #' @export set_constant_fit_params <- function(out, mean_norm_value) { out$fit_type <- "DRCConstantFitResult" @@ -715,10 +715,10 @@ set_constant_fit_params <- function(out, mean_norm_value) { #' @param norm_values Numeric vector used to estimate an \code{xc50} value. #' @keywords fit_curves #' @return Modified named list of fit parameters. -#' -#' @examples +#' +#' @examples #' .set_invalid_fit_params(list(), norm_values = rep(0.3, 6)) -#' +#' #' @export .set_invalid_fit_params <- function(out, norm_values) { out$fit_type <- "DRCInvalidFitResult" @@ -783,24 +783,24 @@ set_constant_fit_params <- function(out, mean_norm_value) { } #' Cap XC50 value. -#' +#' #' Set IC50/GR50 value to \code{Inf} or \code{-Inf} based on upper and lower limits. #' -#' @details +#' @details #' Note: \code{xc50} and \code{max_conc} should share the same units. #' Ideally, the \code{lower_cap} should be based on the lowest tested concentration. #' However, since we don't record that, it is set 5 orders of magnitude below the highest dose. -#' -#' @param xc50 Numeric value of the IC50/GR50 to cap. +#' +#' @param xc50 Numeric value of the IC50/GR50 to cap. #' @param max_conc Numeric value of the highest concentration in a dose series used to calculate the \code{xc50}. -#' @param min_conc Numeric value of the lowest concentration in a dose series used to calculate the \code{xc50}. +#' @param min_conc Numeric value of the lowest concentration in a dose series used to calculate the \code{xc50}. #' If \code{NA} (default), using \code{max_conc/1e5} instead. #' @param capping_fold Integer value of the fold number to use for capping. Defaults to \code{5}. #' @keywords fit_curves #' #' @return Capped IC50/GR50 value. #' -#' @examples +#' @examples #' cap_xc50(xc50 = 1, max_conc = 2) #' cap_xc50(xc50 = 2, max_conc = 5, min_conc = 1) #' cap_xc50(xc50 = 26, max_conc = 5, capping_fold = 5) @@ -811,7 +811,7 @@ cap_xc50 <- function(xc50, max_conc, min_conc = NA, capping_fold = 5) { checkmate::assert_number(xc50) checkmate::assert_number(max_conc) checkmate::assert_number(min_conc, na.ok = TRUE) - + upper_cap <- max_conc * capping_fold lower_cap <- if (!is.na(min_conc)) { min_conc / capping_fold @@ -824,7 +824,7 @@ cap_xc50 <- function(xc50, max_conc, min_conc = NA, capping_fold = 5) { xc50 <- -Inf } xc50 -} +} ################# # Error handling diff --git a/R/flatten.R b/R/flatten.R index d593ec50..f2d12ff9 100644 --- a/R/flatten.R +++ b/R/flatten.R @@ -40,7 +40,7 @@ flatten <- function(tbl, groups, wide_cols, sep = "_") { checkmate::assert_character(wide_cols) checkmate::assert_string(sep) checkmate::assert_class(tbl, "data.table") - + if ("fit_source" %in% names(tbl)) { tbl <- tbl[fit_source == "gDR", ] groups <- setdiff(groups, "fit_source") @@ -48,7 +48,7 @@ flatten <- function(tbl, groups, wide_cols, sep = "_") { if (!all(groups %in% colnames(tbl))) { stop(sprintf("missing expected uniquifying groups: '%s'", - paste0(setdiff(groups, colnames(tbl)), collapse = ", "))) + toString(setdiff(groups, colnames(tbl))))) } idx <- which(colnames(tbl) %in% groups) @@ -56,11 +56,11 @@ flatten <- function(tbl, groups, wide_cols, sep = "_") { uniquifying <- unique(uniquifying) out <- split(subset(tbl, select = -idx), subset(tbl, select = idx), sep = sep) - + # in original assays there are no columns with SD-related data (with names ending with "_sd") missing <- setdiff(wide_cols[!grepl("_sd$", wide_cols)], colnames(tbl)) if (length(missing) != 0L) { - warning(sprintf("missing listed wide_cols columns: '%s'", paste0(missing, collapse = ", "))) + warning(sprintf("missing listed wide_cols columns: '%s'", toString(missing))) } rename <- colnames(out[[1]]) %in% wide_cols diff --git a/R/global_cache.R b/R/global_cache.R index bb17e112..a413d761 100644 --- a/R/global_cache.R +++ b/R/global_cache.R @@ -1,8 +1,8 @@ -## This global cache maintains a cache of identifiers, -## headers, and their respective values. +## This global cache maintains a cache of identifiers, +## headers, and their respective values. global_cache <- new.env(parent = emptyenv()) -global_cache$identifiers_list <- list() +global_cache$identifiers_list <- list() ############# # Identifiers @@ -23,11 +23,11 @@ global_cache$identifiers_list <- list() if (length(global_cache$identifiers_list) == 0L) { global_cache$identifiers_list <- IDENTIFIERS_LIST } - + if (!is.null(k)) { checkmate::assert_string(k, null.ok = TRUE) checkmate::assert_choice(k, choices = names(IDENTIFIERS_LIST)) - + return(global_cache$identifiers_list[[k]]) } else { return(global_cache$identifiers_list) diff --git a/R/headers.R b/R/headers.R index 586fb200..18747c17 100644 --- a/R/headers.R +++ b/R/headers.R @@ -2,7 +2,7 @@ #' #' @description Get the expected header(s) for one field or reset all header fields #' -#' @return +#' @return #' For \code{get_header} a character vector of headers for field \code{k}. #' #' @examples @@ -12,14 +12,14 @@ NULL #' @param k string of field (data type) to return headers for #' @keywords identifiers -#' +#' #' @details #' If \code{get_header} is called with no values, the entire available header list is returned. #' @examples #' get_header("manifest") #' @rdname headers #' @export -#' +#' get_header <- function(k = NULL) { checkmate::assert_string(k, null.ok = TRUE) .get_header(k) diff --git a/R/headers_list.R b/R/headers_list.R index 33974cdb..959be898 100644 --- a/R/headers_list.R +++ b/R/headers_list.R @@ -1,5 +1,5 @@ ## The following function utilizes the get_env_identifiers() function which can be -## changed at run time, which is why it needs to be wrapped in a function. +## changed at run time, which is why it needs to be wrapped in a function. #' @keywords internal .getHeadersList <- function() { @@ -23,7 +23,7 @@ "maxlog10Concentration", "maxlog10Concentration_sd", "N_conc", - "N_conc_sd", + "N_conc_sd", "cotrt_value", "cotrt_value_sd", "ratio", @@ -55,41 +55,41 @@ HEADERS_LIST[["metrics_results"]], get_env_identifiers("well_position", simplify = TRUE) ) - + HEADERS_LIST[["ordered_1"]] <- .orderHeaderList(HEADERS_LIST, 1) HEADERS_LIST[["ordered_2"]] <- .orderHeaderList(HEADERS_LIST, 2) - + HEADERS_LIST[["id"]] <- c("rId", "cId") - - + + HEADERS_LIST[["iso_position"]] <- c("iso_level", "pos_x", "pos_y", "pos_x_ref", "pos_y_ref") - + HEADERS_LIST[["excess"]] <- names(get_combo_excess_field_names()) HEADERS_LIST[["excess_results"]] <- c(names(get_combo_excess_field_names()), paste0(names(get_combo_excess_field_names()), "_sd")) - - + + HEADERS_LIST[["scores"]] <- names(get_combo_score_field_names()) HEADERS_LIST[["scores_results"]] <- c(names(get_combo_score_field_names()), paste0(names(get_combo_score_field_names()), "_sd")) - + HEADERS_LIST[["isobolograms"]] <- c("normalization_type", HEADERS_LIST[["iso_position"]], "log2_CI", "log10_ratio_conc") -HEADERS_LIST[["isobolograms_results"]] <- c(HEADERS_LIST[["isobolograms"]], +HEADERS_LIST[["isobolograms_results"]] <- c(HEADERS_LIST[["isobolograms"]], paste0(HEADERS_LIST[["isobolograms"]], "_sd")) - + HEADERS_LIST[["fit_source"]] <- "fit_source" - + HEADERS_LIST[["obsolete"]] <- c("RV", "GR", "Excess") - + HEADERS_LIST } @@ -117,7 +117,7 @@ HEADERS_LIST[["isobolograms_results"]] <- c(HEADERS_LIST[["isobolograms"]], "RefRelativeViability" ) } - + #' @keywords internal .getAveragedResultsList <- function() { c( @@ -233,15 +233,15 @@ HEADERS_LIST[["isobolograms_results"]] <- c(HEADERS_LIST[["isobolograms"]], .getMetricAverageFields <- function() { list( mean = c( - "x_mean", - "x_AOC", - "x_AOC_range", - "x_max", - "x_inf", + "x_mean", + "x_AOC", + "x_AOC_range", + "x_max", + "x_inf", "x_0" ), geometric_mean = c( - "xc50", + "xc50", "ec50", "GR50", "GEC50", @@ -260,8 +260,8 @@ HEADERS_LIST[["isobolograms_results"]] <- c(HEADERS_LIST[["isobolograms"]], "RV_fit_type", "GR_fit_type" ), - # due to the fact that there is some freedom in what values are in individual fields, - # in order to avoid duplicates in the application we have to exclude some fields from + # due to the fact that there is some freedom in what values are in individual fields, + # in order to avoid duplicates in the application we have to exclude some fields from # recognizing duplicates in averaging blacklisted = c( # tissue @@ -289,7 +289,7 @@ HEADERS_LIST[["isobolograms_results"]] <- c(HEADERS_LIST[["isobolograms"]], get_env_identifiers("duration", simplify = TRUE), get_env_identifiers("drug_name", simplify = TRUE), "Concentration", - paste0(c(paste0(get_env_identifiers("drug_name", simplify = TRUE), "_"), "Concentration_"), + paste0(c(paste0(get_env_identifiers("drug_name", simplify = TRUE), "_"), "Concentration_"), rep(2:10, each = 2)) ) } else { diff --git a/R/helpers-tests.R b/R/helpers-tests.R index d091c2eb..8eed5c5b 100644 --- a/R/helpers-tests.R +++ b/R/helpers-tests.R @@ -2,7 +2,7 @@ #' #' Function to obtain data from gDRtestData and prepare for unit tests #' -#' @examples +#' @examples #' get_testdata() #' #' @keywords test_helpers @@ -10,26 +10,26 @@ #' #' @export get_testdata <- function() { - + mae <- get_synthetic_data("finalMAE_small.qs2") raw_data <- convert_mae_assay_to_dt(mae, "Metrics") drug_names <- unique(raw_data$DrugName) cell_line_names <- unique(raw_data$CellLineName) - + # getting first occurrence of drug_names for each cell_line to avoid aggregation dt <- raw_data[, .SD[1], by = c("DrugName", "CellLineName")] data.table::setnames(dt, - c("CellLineName", "DrugName", "drug_moa", "x_inf", - "x_0", "xc50", "h", "r2", "x_sd_avg", "x_mean", "x_AOC_range", + c("CellLineName", "DrugName", "drug_moa", "x_inf", + "x_0", "xc50", "h", "r2", "x_sd_avg", "x_mean", "x_AOC_range", "x_max", "maxlog10Concentration"), - c("Cell Line Name", "Drug Name", "Drug MOA", - "GR Inf", "GR 0", "GEC50", "h GR", "E Inf", "E0", "EC50", "h RV", + c("Cell Line Name", "Drug Name", "Drug MOA", + "GR Inf", "GR 0", "GEC50", "h GR", "E Inf", "E0", "EC50", "h RV", "GR Max", "Concentration")) dt$GR50 <- dt$EC50 dt$IC50 <- dt$EC50 dt$`E Max` <- dt$EC50 dt$`GR value` <- dt$EC50 - + list( drug_names = drug_names, cell_line_names = cell_line_names, @@ -42,7 +42,7 @@ get_testdata <- function() { #' #' Function to obtain data from gDRtestData and prepare for unit tests #' -#' @examples +#' @examples #' get_testdata_combo() #' #' @keywords test_helpers @@ -50,26 +50,26 @@ get_testdata <- function() { #' #' @export get_testdata_combo <- function() { - + mae <- get_synthetic_data("finalMAE_combo_matrix.qs2") raw_data <- convert_mae_assay_to_dt(mae, "Metrics") drug_names <- unique(raw_data$DrugName) cell_line_names <- unique(raw_data$CellLineName) - + # getting first occurrence of drug_names for each cell_line to avoid aggregation dt <- raw_data[, .SD[1], by = c("DrugName", "CellLineName")] data.table::setnames(dt, c("CellLineName", "DrugName", "drug_moa", "DrugName_2", "drug_moa_2", - "x_inf", "x_0", "xc50", "h", "r2", "x_sd_avg", "x_mean", "x_AOC_range", + "x_inf", "x_0", "xc50", "h", "r2", "x_sd_avg", "x_mean", "x_AOC_range", "x_max", "maxlog10Concentration"), c("Cell Line Name", "Drug Name", "Drug MOA", "Drug Name 2", "Drug MOA2", - "GR Inf", "GR 0", "GEC50", "h GR", "E Inf", "E0", "EC50", "h RV", + "GR Inf", "GR 0", "GEC50", "h GR", "E Inf", "E0", "EC50", "h RV", "GR Max", "Concentration")) dt$GR50 <- dt$EC50 dt$IC50 <- dt$EC50 dt$`E Max` <- dt$EC50 dt$`GR value` <- dt$EC50 - + list( drug_names = drug_names, cell_line_names = cell_line_names, @@ -82,7 +82,7 @@ get_testdata_combo <- function() { #' #' Function to obtain data from gDRtestData and prepare for unit tests #' -#' @examples +#' @examples #' get_testdata_codilution() #' #' @keywords test_helpers @@ -90,26 +90,26 @@ get_testdata_combo <- function() { #' #' @export get_testdata_codilution <- function() { - + mae <- get_synthetic_data("finalMAE_combo_codilution_small.qs2") raw_data <- convert_mae_assay_to_dt(mae, "Metrics") drug_names <- unique(raw_data$DrugName) cell_line_names <- unique(raw_data$CellLineName) - + # getting first occurrence of drug_names for each cell_line to avoid aggregation dt <- raw_data[, .SD[1], by = c("DrugName", "CellLineName")] data.table::setnames(dt, c("CellLineName", "DrugName", "drug_moa", "DrugName_2", "drug_moa_2", - "x_inf", "x_0", "xc50", "h", "r2", "x_sd_avg", "x_mean", "x_AOC_range", + "x_inf", "x_0", "xc50", "h", "r2", "x_sd_avg", "x_mean", "x_AOC_range", "x_max", "maxlog10Concentration", "Concentration_2"), c("Cell Line Name", "Drug Name", "Drug MOA", "Drug Name 2", "Drug MOA2", - "GR Inf", "GR 0", "GEC50", "h GR", "E Inf", "E0", "EC50", "h RV", + "GR Inf", "GR 0", "GEC50", "h GR", "E Inf", "E0", "EC50", "h RV", "GR Max", "Concentration", "Concentration 2")) dt$GR50 <- dt$EC50 dt$IC50 <- dt$EC50 dt$`E Max` <- dt$EC50 dt$`GR value` <- dt$EC50 - + list( drug_names = drug_names, cell_line_names = cell_line_names, @@ -126,7 +126,7 @@ get_testdata_codilution <- function() { #' @param n number of records #' @keywords test_helpers #' -#' @examples +#' @examples #' gen_synthetic_data() #' #' @return list with drugs, cell_lines, raw_data and assay_data diff --git a/R/identifiers.R b/R/identifiers.R index dc8246bd..3858e775 100644 --- a/R/identifiers.R +++ b/R/identifiers.R @@ -44,10 +44,10 @@ NULL #' @rdname identifiers -#' +#' #' @keywords identifiers #' @return list or charvec depends on unify param -#' +#' #' @export get_env_identifiers <- function(k = NULL, simplify = TRUE) { if (simplify) { @@ -64,12 +64,12 @@ get_env_identifiers <- function(k = NULL, simplify = TRUE) { #' @rdname identifiers -#' +#' #' @keywords identifiers #' @return list or charvec depends on unify param -#' +#' #' @export -#' +#' get_prettified_identifiers <- function(k = NULL, simplify = TRUE) { idfs <- get_env_identifiers(k, simplify = simplify) pidfs <- prettify_flat_metrics(idfs, human_readable = TRUE) @@ -87,10 +87,10 @@ get_prettified_identifiers <- function(k = NULL, simplify = TRUE) { #' Get identifiers required for downstream analysis. #' @keywords identifiers #' @return charvec -#' -#' @examples +#' +#' @examples #' get_required_identifiers() -#' +#' #' @export get_required_identifiers <- function() { REQ_COL_IDENTIFIERS @@ -99,10 +99,10 @@ get_required_identifiers <- function() { #' Get gDR default identifiers required for downstream analysis. #' @keywords identifiers #' @return charvec -#' -#' @examples +#' +#' @examples #' get_default_identifiers() -#' +#' #' @export get_default_identifiers <- function() { IDENTIFIERS_LIST @@ -114,10 +114,10 @@ get_default_identifiers <- function() { #' #' @keywords identifiers #' @return charvec -#' -#' @examples +#' +#' @examples #' get_idfs_synonyms() -#' +#' #' @export get_idfs_synonyms <- function() { SYNONYMS_LIST @@ -132,12 +132,12 @@ get_idfs_synonyms <- function() { #' @keywords identifiers #' #' @return list -#' -#' @examples +#' +#' @examples #' mdict <- list(duration = "time") #' iv <- c("Time", "Duration", "time") #' update_idfs_synonyms(iv, dict = mdict) -#' +#' #' @export update_idfs_synonyms <- function(data, dict = get_idfs_synonyms()) { @@ -165,10 +165,10 @@ update_idfs_synonyms <- function(data, dict = get_idfs_synonyms()) { #' Get identifiers that expect only one value for each identifier. #' @keywords identifiers #' @export -#' -#' @examples +#' +#' @examples #' get_expect_one_identifiers() -#' +#' #' @return charvec get_expect_one_identifiers <- function() { EXPECT_ONE_IDENTIFIERS @@ -180,7 +180,7 @@ get_expect_one_identifiers <- function() { #' @export #' #' @return \code{NULL} -#' +#' set_env_identifier <- function(k, v) { .set_id(k, v) } @@ -191,7 +191,7 @@ set_env_identifier <- function(k, v) { #' @export #' #' @return \code{NULL} -#' +#' reset_env_identifiers <- function() { .reset_ids() } @@ -202,12 +202,12 @@ reset_env_identifiers <- function() { #' @param get_description return descriptions only, boolean #' @param get_example return examples only, boolean #' @keywords identifiers -#' -#' @examples +#' +#' @examples #' get_identifiers_dt() -#' +#' #' @return named list -#' +#' #' @export get_identifiers_dt <- function(k = NULL, get_description = FALSE, get_example = FALSE) { checkmate::assert_string(k, null.ok = TRUE) @@ -259,7 +259,7 @@ get_identifiers_dt <- function(k = NULL, get_description = FALSE, get_example = #' update_env_idfs_from_mae(list(get_env_identifiers())) #' #' @return \code{NULL} -#' +#' #' @export update_env_idfs_from_mae <- function(mae_idfs) { checkmate::assert_list(mae_idfs) diff --git a/R/identifiers_list.R b/R/identifiers_list.R index 0ba580b9..f74f3278 100644 --- a/R/identifiers_list.R +++ b/R/identifiers_list.R @@ -26,22 +26,22 @@ IDENTIFIERS_LIST <- list( concentration = "Concentration", template = c("Template", "Treatment"), barcode = c("Barcode", "Plate"), - - # ids for the 2nd drug + + # ids for the 2nd drug drug2 = "Gnumber_2", drug_name2 = "DrugName_2", drug_moa2 = "drug_moa_2", concentration2 = "Concentration_2", - - # ids for the 3rd drug + + # ids for the 3rd drug drug3 = "Gnumber_3", drug_name3 = "DrugName_3", drug_moa3 = "drug_moa_3", concentration3 = "Concentration_3", - + # data source data_source = "data_source", - + # replicate replicate = "Replicate", diff --git a/R/json_const.R b/R/json_const.R index 469debea..e12d8654 100644 --- a/R/json_const.R +++ b/R/json_const.R @@ -1,30 +1,30 @@ #' Get settings from JSON file -#' In most common scenario the settings are stored in JSON file +#' In most common scenario the settings are stored in JSON file #' to avoid hardcoding #' #' @param s charvec with setting entry/entries #' @param json_path string with the path to the JSON file #' #' @return value/values for entry/entries from JSON file -#' +#' #' @examples #' if (!nchar(system.file(package="gDRutils"))) { #' get_settings_from_json() #' } -#' +#' #' @keywords json_const #' @export #' get_settings_from_json <- function(s = NULL, json_path = system.file(package = "gDRutils", "settings.json")) { - + checkmate::assert_character(s, null.ok = TRUE) checkmate::assert_file_exists(json_path) - + cache_l <- jsonlite::fromJSON(json_path) - + if (!is.null(s)) { checkmate::assert_subset(s, names(cache_l)) cache_l[[s]] @@ -38,7 +38,7 @@ get_settings_from_json <- #' #' @param k key #' @param prettify change to upper case and add underscore, iso_level --> Iso_Level -#' +#' #' @examples #' get_isobologram_columns() #' get_isobologram_columns("iso_level", prettify = TRUE) @@ -49,7 +49,7 @@ get_settings_from_json <- get_isobologram_columns <- function(k = NULL, prettify = TRUE) { checkmate::assert_character(k, null.ok = TRUE) checkmate::assert_flag(prettify) - + ic <- get_settings_from_json("ISOBOLOGRAM_COLUMNS", system.file(package = "gDRutils", "settings.json")) if (!is.null(k)) { @@ -57,6 +57,6 @@ get_isobologram_columns <- function(k = NULL, prettify = TRUE) { } else { out <- ic } - + gsub(" ", "_", prettify_flat_metrics(out, human_readable = prettify)) } \ No newline at end of file diff --git a/R/json_convert.R b/R/json_convert.R index a4b4a11f..24d7dd8a 100644 --- a/R/json_convert.R +++ b/R/json_convert.R @@ -8,11 +8,11 @@ #' #' @return String representation of a JSON document. #' -#' @examples +#' @examples #' mae <- get_synthetic_data("finalMAE_small.qs2") #' convert_mae_to_json(mae) #' convert_mae_to_json(mae, with_experiments = FALSE) -#' +#' #' @export convert_mae_to_json <- function(mae, with_experiments = TRUE) { @@ -62,9 +62,9 @@ convert_mae_to_json <- function(mae, with_experiments = TRUE) { #' description = "description of experiment", #' source = list(name = "GeneData_Screener", id = "QCS-12345")) #' rdata <- data.table::data.table( -#' mydrug = letters, -#' mydrugname = letters, -#' mydrugmoa = letters, +#' mydrug = letters, +#' mydrugname = letters, +#' mydrugmoa = letters, #' Duration = 1) #' cdata <- data.table::data.table(mycellline = letters, mycelllinename = letters, #' mycelllinetissue = letters, cellline_ref_div_time = letters) @@ -84,7 +84,7 @@ convert_mae_to_json <- function(mae, with_experiments = TRUE) { #' #' @export convert_se_to_json <- function(se) { - + ml <- list( mjson = convert_metadata_to_json(se), rjson = convert_rowData_to_json(rowData(se), get_SE_identifiers(se)), @@ -145,9 +145,9 @@ convert_metadata_to_json <- function(se) { #' #' @examples #' rdata <- data.table::data.table( -#' mydrug = letters, -#' mydrugname = letters, -#' mydrugmoa = letters, +#' mydrug = letters, +#' mydrugname = letters, +#' mydrugmoa = letters, #' Duration = 1) #' identifiers <- list(drug = "mydrug", drug_name = "mydrugname", drug_moa = "mydrugmoa", #' duration = "Duration") @@ -156,7 +156,7 @@ convert_metadata_to_json <- function(se) { #' @details Standardizes the \code{rdata} to common schema fields #' and tidies formatting to be condusive to joining #' with other JSON responses. -#' +#' #' @keywords json_convert #' @export convert_rowData_to_json <- @@ -181,8 +181,8 @@ convert_rowData_to_json <- #' #' @examples #' cdata <- data.table::data.table( -#' mycellline = letters, -#' mycelllinename = letters, +#' mycellline = letters, +#' mycelllinename = letters, #' mycelllinetissue = letters, #' cellline_ref_div_time = "cellline_ref_div_time") #' identifiers <- list(cellline = "mycellline", @@ -194,7 +194,7 @@ convert_rowData_to_json <- #' @details Standardizes the \code{cdata} to common schema fields #' and tidies formatting to be condusive to joining #' with other JSON responses. -#' +#' #' @keywords json_convert #' @export convert_colData_to_json <- @@ -227,7 +227,7 @@ convert_colData_to_json <- stopifnot(all(req_cols %in% names(mdata))) mdata <- data.table::as.data.table(as.list(mdata)) - + main_mdata <- mdata[, req_cols, with = FALSE] mjson <- jsonlite::toJSON(main_mdata, "columns") mjson <- strip_first_and_last_char(mjson) diff --git a/R/json_validate.R b/R/json_validate.R index c61fb728..9e4af211 100644 --- a/R/json_validate.R +++ b/R/json_validate.R @@ -4,7 +4,7 @@ #' #' @details This is most often used to validate JSON #' before passing it in as a document to an ElasticSearch index. -#' +#' #' @param json String of JSON in memory. #' @param schema_path String of the schema to validate against. #' @keywords json_validate @@ -68,10 +68,10 @@ validate_json <- function(json, schema_path) { #' #' @return Boolean of whether or not mae is valid #' -#' @examples -#' mae <- get_synthetic_data("finalMAE_small.qs2") +#' @examples +#' mae <- get_synthetic_data("finalMAE_small.qs2") #' validate_mae_with_schema(mae) -#' +#' #' @export validate_mae_with_schema <- function(mae, diff --git a/R/manage_additional_metadata.R b/R/manage_additional_metadata.R index 5de3503d..dd3ea71e 100644 --- a/R/manage_additional_metadata.R +++ b/R/manage_additional_metadata.R @@ -11,8 +11,8 @@ #' @keywords metadata_management #' #' @return The same object with an added S3 class. -#' -#' @examples +#' +#' @examples #' addClass(data.table::data.table(), "someClass") #' #' @export @@ -77,14 +77,14 @@ modifyData.drug_name2 <- function(x, option, keep, ...) { checkmate::assert_string(option) checkmate::assert_choice(option, c("average", "toDrug", "toCellLine")) checkmate::assert_string(keep, null.ok = TRUE) - + pidfs <- get_prettified_identifiers(simplify = TRUE) drug_name <- pidfs[["drug_name"]] drug_name2 <- pidfs[["drug_name2"]] conc2 <- pidfs[["concentration2"]] drug2 <- pidfs[["drug2"]] cell_name <- pidfs[["cellline_name"]] - + if (option == "average") { # drop data and keep only the requested value x <- average_biological_replicates_dt(x, drug_name2, prettified = TRUE) @@ -108,7 +108,7 @@ modifyData.drug_name2 <- function(x, option, keep, ...) { sub(" \\(.*? at 0\\.?0* μM\\)", "", x[[cell_name]]) } } - + # drop the additional columns x[c(drug_name2, conc2, drug2)] <- NULL # remove special class @@ -124,13 +124,13 @@ modifyData.data_source <- function(x, option, keep, ...) { checkmate::assert_string(option) checkmate::assert_choice(option, c("average", "toDrug", "toCellLine")) checkmate::assert_string(keep, null.ok = TRUE) - + pidfs <- get_prettified_identifiers(simplify = TRUE) dt_src <- pidfs[["data_source"]] drug <- pidfs[["drug_name"]] clid <- pidfs[["cellline"]] cl_name <- pidfs[["cellline_name"]] - + if (option == "average") { # drop data and keep only the requested value x <- average_biological_replicates_dt(x, dt_src, prettified = TRUE) @@ -142,7 +142,7 @@ modifyData.data_source <- function(x, option, keep, ...) { drug_to_replace <- x[drug_idx, drug] x[drug_idx, drug] <- vapply( - seq_len(length(drug_to_replace)), + seq_along(drug_to_replace), function(y) sprintf("%s (%s)", drug_to_replace[y], x[, dt_src][y]), "string") } else if (option == "toCellLine") { cell_lines_to_combine <- unique(x[duplicated_rows, cl_name]) @@ -173,10 +173,10 @@ modifyData.default <- function(x, option, keep, ...) { pidfs[[additional_var_names]] } else { additional_var_names - } + } cell_name <- pidfs[["cellline_name"]] drug_name <- pidfs[["drug_name"]] - + if (option == "average") { # drop data and keep only the requested value x <- average_biological_replicates_dt(x, additional_var, prettified = TRUE) diff --git a/R/merge_SE.R b/R/merge_SE.R index 01b03093..43bf4aa7 100644 --- a/R/merge_SE.R +++ b/R/merge_SE.R @@ -34,15 +34,15 @@ merge_MAE <- function(MAElist, description = NULL, source_name = NULL, source_id = NULL) { - + checkmate::assert_list(MAElist, types = "MultiAssayExperiment") checkmate::assert_string(title, null.ok = TRUE) checkmate::assert_string(description, null.ok = TRUE) checkmate::assert_string(source_name, null.ok = TRUE) checkmate::assert_string(source_id, null.ok = TRUE) - + experiments <- unique(unlist(lapply(MAElist, names))) - + merged_SE_assays <- lapply(experiments, function(exp_name) { exp_list <- lapply(MAElist, function(mae) { if (exp_name %in% names(mae)) mae[[exp_name]] else NULL @@ -51,15 +51,15 @@ merge_MAE <- function(MAElist, merge_SE(exp_list) }) names(merged_SE_assays) <- experiments - + mae_names <- names(MAElist) if (is.null(mae_names) || all(trimws(mae_names) == "")) { mae_names <- paste0("Dataset_", seq_along(MAElist)) } - + all_sources <- list() original_titles <- c() - + for (mae in MAElist) { for (exp in names(mae)) { meta <- as.list(S4Vectors::metadata(mae[[exp]])$experiment_metadata) @@ -69,26 +69,26 @@ merge_MAE <- function(MAElist, } } } - + if (is.null(title)) { title <- sprintf("Merged MAE: %s", paste(mae_names, collapse = " + ")) } - + if (is.null(description)) { - description <- sprintf("Synthetically merged dataset originating from: %s.", paste(mae_names, collapse = ", ")) + description <- sprintf("Synthetically merged dataset originating from: %s.", toString(mae_names)) unique_titles <- unique(original_titles) if (length(unique_titles) > 0) { - description <- paste0(description, " Original Titles: [", paste(unique_titles, collapse = " | "), "]") + description <- paste(description, " Original Titles: [", paste(unique_titles, collapse = " | "), "]") } } - + if (is.null(source_name)) { if (length(all_sources) > 0) { unique_names <- unique(vapply(all_sources, function(s) { if (!is.null(s$name)) s$name else "unknown" }, character(1))) - + source_name <- if (length(unique_names) == 1 && unique_names[1] != "unknown") { unique_names[1] } else { @@ -98,40 +98,40 @@ merge_MAE <- function(MAElist, source_name <- "merged_analysis" } } - + if (is.null(source_id)) { source_id <- "merged_dataset" } - + synthetic_experiment_metadata <- list( title = title, description = description, experimentalist = Sys.info()[["user"]], sources = list(list(name = source_name, id = source_id)) ) - + for (i in seq_along(merged_SE_assays)) { meta_list <- as.list(S4Vectors::metadata(merged_SE_assays[[i]])) meta_list$experiment_metadata <- synthetic_experiment_metadata S4Vectors::metadata(merged_SE_assays[[i]]) <- meta_list } - + base_metadata <- as.list(S4Vectors::metadata(MAElist[[1]])) if (length(base_metadata) == 0) base_metadata <- list() - + if (!is.null(base_metadata$.internal$DataSetDB$dataset)) { ds_meta <- as.list(base_metadata$.internal$DataSetDB$dataset) ds_meta$title <- synthetic_experiment_metadata$title ds_meta$description <- synthetic_experiment_metadata$description ds_meta$sources <- synthetic_experiment_metadata$sources - + internal_meta <- as.list(base_metadata$.internal) internal_meta$DataSetDB <- as.list(internal_meta$DataSetDB) internal_meta$DataSetDB$dataset <- ds_meta - + base_metadata$.internal <- internal_meta } - + MultiAssayExperiment::MultiAssayExperiment( experiments = MultiAssayExperiment::ExperimentList(merged_SE_assays), metadata = base_metadata @@ -169,12 +169,12 @@ merge_SE <- function(SElist, checkmate::assert_list(SElist, types = "SummarizedExperiment") checkmate::assert_string(additional_col_name, null.ok = TRUE) checkmate::assert_character(discard_keys, null.ok = TRUE) - + SE_identifiers <- unique(lapply(SElist, get_SE_identifiers))[[1]] lapply(names(SE_identifiers), function(x) { set_env_identifier(x, SE_identifiers[[x]]) }) - + discard_keys <- c(discard_keys, unique(unlist( lapply(SElist, get_SE_identifiers, c("barcode", @@ -189,9 +189,9 @@ merge_SE <- function(SElist, additional_col_name = additional_col_name, discard_keys = discard_keys) }) - + names(merged_assays) <- se_assays - + if (!is.null(additional_col_name)) { data.table::set(merged_assays$Averaged$DT, , intersect(names(merged_assays$Averaged$DT), @@ -207,10 +207,10 @@ merge_SE <- function(SElist, metadataNames <- setdiff(metadataNames, identifiersNames) identifiers <- S4Vectors::metadata(SElist[[1]])[identifiersNames] } - + metadata <- merge_metadata(SElist, metadataNames) metadata <- c(metadata, identifiers) - + assays <- lapply( merged_assays, FUN = function(x) { @@ -220,7 +220,7 @@ merge_SE <- function(SElist, bm_assay } ) - + p_list <- list( assays = assays, @@ -246,28 +246,28 @@ merge_SE <- function(SElist, #' @keywords SE_operators #' #' @return BumpyMatrix or list with data.table + BumpyMatrix -#' -#' @examples +#' +#' @examples #' mae <- get_synthetic_data("finalMAE_combo_2dose_nonoise.qs2") -#' +#' #' listSE <- list( -#' combo1 = mae[[1]], +#' combo1 = mae[[1]], #' sa = mae[[2]] #' ) #' merge_assay(listSE, "Normalized") -#' +#' #' @export #' merge_assay <- function(SElist, assay_name, additional_col_name = "data_source", discard_keys = NULL) { - + checkmate::assert_list(SElist, types = "SummarizedExperiment") checkmate::assert_string(assay_name) checkmate::assert_string(additional_col_name, null.ok = TRUE) checkmate::assert_character(discard_keys, null.ok = TRUE) - + SElist <- lapply(SElist, function(x) { if (assay_name %in% SummarizedExperiment::assayNames(x)) { x @@ -278,17 +278,17 @@ merge_assay <- function(SElist, x } }) - + DT <- data.table::rbindlist(lapply(stats::setNames(names(SElist), names(SElist)), function(y) { convert_se_assay_to_dt(SElist[[y]], assay_name) }), fill = TRUE, idcol = additional_col_name) - + drug_cols <- unlist(get_env_identifiers(c("drug", "drug2", "drug3"), simplify = FALSE)) existing_drug_cols <- intersect(drug_cols, names(DT)) DT[, (existing_drug_cols) := lapply(.SD, remove_drug_batch), .SDcols = existing_drug_cols] - + DT$rId <- DT$cId <- NULL discard_keys <- intersect(names(DT), c(discard_keys, additional_col_name)) BM <- df_to_bm_assay(DT, discard_keys = discard_keys) @@ -301,20 +301,20 @@ merge_assay <- function(SElist, #' @keywords SE_operators #' #' @return character vector of unique names of metadata -#' -#' @examples +#' +#' @examples #' mae <- get_synthetic_data("finalMAE_small.qs2") #' se <- mae[[1]] #' SElist <- list( -#' se, +#' se, #' se #' ) #' identify_unique_se_metadata_fields(SElist) -#' +#' #' @export identify_unique_se_metadata_fields <- function(SElist) { checkmate::assert_list(SElist, types = "SummarizedExperiment") - + unique(unlist(lapply(SElist, function(x) { names(S4Vectors::metadata(x)) }))) @@ -327,72 +327,72 @@ identify_unique_se_metadata_fields <- function(SElist) { #' @keywords SE_operators #' #' @return list of merged metadata -#' -#' @examples +#' +#' @examples #' mae <- get_synthetic_data("finalMAE_small.qs2") #' se <- mae[[1]] #' listSE <- list( -#' se, +#' se, #' se #' ) #' metadata_fields <- identify_unique_se_metadata_fields(listSE) #' merge_metadata(listSE, metadata_fields) -#' +#' #' @export #' merge_metadata <- function(SElist, metadata_fields) { - + checkmate::assert_list(SElist, types = "SummarizedExperiment") checkmate::assert_character(metadata_fields) - + all_metadata <- lapply(metadata_fields, function(x) { - + if (x %in% c("experiment_metadata", ".internal")) { - + valid_metas <- lapply(SElist, function(se) S4Vectors::metadata(se)[[x]]) valid_metas <- valid_metas[!vapply(valid_metas, is.null, FUN.VALUE = logical(1))] - + if (length(valid_metas) == 0) return(list()) - + if (x == "experiment_metadata") { - synth <- as.list(valid_metas[[1]]) - + synth <- as.list(valid_metas[[1]]) + all_sources <- list() for (vm in valid_metas) { vm_list <- as.list(vm) if (is.list(vm_list$sources)) all_sources <- c(all_sources, vm_list$sources) } - + if (length(all_sources) > 0) { unique_names <- unique(vapply(all_sources, function(s) { if (!is.null(s$name)) s$name else "unknown" }, character(1))) - + std_name <- if (length(unique_names) == 1 && unique_names[1] != "unknown") { unique_names[1] } else { "merged_analysis" } - + synth$sources <- list(list(name = std_name, id = "merged_dataset")) } else { synth$sources <- list() } - + return(synth) } - + return(as.list(valid_metas[[1]])) } - + do.call(c, lapply(names(SElist), function(SE) { meta <- list(S4Vectors::metadata(SElist[[SE]])[[x]]) names(meta) <- SE meta })) }) - + names(all_metadata) <- metadata_fields all_metadata } diff --git a/R/packages.R b/R/packages.R index 8a4ca067..01edc886 100644 --- a/R/packages.R +++ b/R/packages.R @@ -53,4 +53,3 @@ if (getRversion() >= "2.15.1") { ), utils::packageName()) } - diff --git a/R/prettify.R b/R/prettify.R index a989451b..e4c5d4f4 100644 --- a/R/prettify.R +++ b/R/prettify.R @@ -18,7 +18,7 @@ #' of front-end applications, whereas \code{"human_readable" = FALSE} is often used for #' prettification in the context of the command line. #' -#' @examples +#' @examples #' x <- c("CellLineName", "Tissue", "Primary Tissue", "GR_gDR_x_mean", "GR_gDR_xc50", "RV_GDS_x_mean") #' prettify_flat_metrics(x, human_readable = FALSE) #' @@ -27,9 +27,9 @@ prettify_flat_metrics <- function(x, human_readable = FALSE, normalization_type = c("GR", "RV")) { - + new_names <- .convert_norm_specific_metrics(x, normalization_type = normalization_type) - + if (human_readable) { new_names <- .prettify_GDS_columns(new_names) new_names <- .prettify_metadata_columns(new_names) @@ -37,7 +37,7 @@ prettify_flat_metrics <- function(x, new_names <- .prettify_cotreatment_columns(new_names) new_names <- gsub("_", " ", new_names) } - + # gDR is the default name. new_names <- gsub("gDR", "", new_names) new_names <- gsub("^_+", "", new_names) @@ -51,32 +51,32 @@ prettify_flat_metrics <- function(x, #################### #' This function change raw names of metric from long format table into more descriptive -#' names in the wide format table. +#' names in the wide format table. #' It works for metrics: \code{colnames(get_header("metrics_names"))} -#' +#' #' @return object with more descriptive names -#' +#' #' @keywords internal .convert_norm_specific_metrics <- function(x, normalization_type) { - + # to do not crush app for unsupported norm type - normalization_type <- + normalization_type <- normalization_type[normalization_type %in% rownames(get_header("metrics_names"))] - + for (norm in normalization_type) { metrics_names <- get_header("metrics_names")[norm, ] - + is_norm <- grepl(norm, x) - + if (!sum(is_norm)) next # to skip loop below (nothing to change) - + # to do not touch combo metrics - combo_pattern <- paste(c(names(get_combo_score_field_names()), + combo_pattern <- paste(c(names(get_combo_score_field_names()), names(get_combo_excess_field_names())), collapse = "|") is_combo <- grepl(combo_pattern, x) - + if (!sum(is_norm & !is_combo)) next # to skip loop below (nothing to change) - + for (name in names(metrics_names)) { replace <- is_norm & grepl(name, x) & !is_combo x[replace] <- gsub(name, metrics_names[[name]], gsub(norm, "", x[replace])) @@ -96,13 +96,13 @@ prettify_flat_metrics <- function(x, #' @keywords internal .prettify_cotreatment_columns <- function(cols) { - + # Replace underscore by space for the Drug/Concentration for co-treatment. pattern <- "[0-9]+" conc_cotrt <- paste0("^Concentration_", pattern, "$") drug_cotrt <- paste0("^", get_env_identifiers("drug", simplify = TRUE), "_", pattern, "$|^drug_.*", pattern, "$|^DrugName_", pattern, "$") - + replace <- grepl(paste0(conc_cotrt, "|", drug_cotrt), cols) cols[replace] <- gsub("_", " ", cols[replace]) cols[replace] <- gsub("Name", "", cols[replace]) @@ -118,20 +118,20 @@ prettify_flat_metrics <- function(x, "GRvalue" = "GR value", "RelativeViability" = "Relative Viability", "mean" = "Mean Viability") - + for (i in names(metric_patterns)) { cols <- gsub(i, metric_patterns[i], cols) } - + cols } #' @keywords internal .prettify_metadata_columns <- function(cols) { - + # prettifying formatting - + prettified_cols <- gsub("gDR", "", cols) prettified_cols <- gsub("_", " ", prettified_cols) prettified_cols <- tools::toTitleCase(prettified_cols) @@ -139,6 +139,6 @@ prettify_flat_metrics <- function(x, prettified_cols <- gsub("Hsa ", "HSA ", prettified_cols) # adding space between words like “ReferenceDivisionTime” prettified_cols <- gsub("([a-z])([A-Z])", "\\1 \\2", prettified_cols) - + trimws(prettified_cols) } diff --git a/R/se_metadata.R b/R/se_metadata.R index 5a16d733..074f4589 100644 --- a/R/se_metadata.R +++ b/R/se_metadata.R @@ -7,7 +7,7 @@ #' @param key_type string of a specific key type (i.e. 'nested_keys', etc.). #' @param id_type string of a specific id type (i.e. 'duration', 'cellline_name', etc.). #' @param simplify Boolean indicating whether output should be simplified. -#' @param append Boolean indicating whether to append the new metadata value to the existing entry. +#' @param append Boolean indicating whether to append the new metadata value to the existing entry. #' #' @details #' For \code{*et_SE_processing_metadata}, get/set metadata for the processing info that defines @@ -74,12 +74,12 @@ set_SE_experiment_raw_data <- function(se, value) { # Getters ############ #' @rdname SE_metadata -#' -#' @examples -#' mae <- get_synthetic_data("finalMAE_small.qs2") +#' +#' @examples +#' mae <- get_synthetic_data("finalMAE_small.qs2") #' se <- mae[[1]] #' get_SE_fit_parameters(se) -#' +#' #' @keywords SE_operators #' @export get_SE_fit_parameters <- function(se) { @@ -88,12 +88,12 @@ get_SE_fit_parameters <- function(se) { #' @rdname SE_metadata -#' -#' @examples -#' mae <- get_synthetic_data("finalMAE_small.qs2") +#' +#' @examples +#' mae <- get_synthetic_data("finalMAE_small.qs2") #' se <- mae[[1]] #' meta <- get_SE_processing_metadata(se) -#' +#' #' @keywords SE_operators #' @export get_SE_processing_metadata <- function(se) { @@ -101,12 +101,12 @@ get_SE_processing_metadata <- function(se) { } #' @rdname SE_metadata -#' -#' @examples -#' mae <- get_synthetic_data("finalMAE_small.qs2") +#' +#' @examples +#' mae <- get_synthetic_data("finalMAE_small.qs2") #' se <- mae[[1]] #' get_SE_experiment_raw_data(se) -#' +#' #' @keywords SE_operators #' @export get_SE_experiment_raw_data <- function(se) { @@ -114,12 +114,12 @@ get_SE_experiment_raw_data <- function(se) { } #' @rdname SE_metadata -#' -#' @examples -#' mae <- get_synthetic_data("finalMAE_small.qs2") +#' +#' @examples +#' mae <- get_synthetic_data("finalMAE_small.qs2") #' se <- mae[[1]] #' get_SE_experiment_metadata(se) -#' +#' #' @keywords SE_operators #' @export get_SE_experiment_metadata <- function(se) { @@ -140,12 +140,12 @@ get_SE_keys <- function(se, key_type = NULL) { ############## #' @rdname SE_metadata -#' -#' @examples -#' mae <- get_synthetic_data("finalMAE_small.qs2") +#' +#' @examples +#' mae <- get_synthetic_data("finalMAE_small.qs2") #' se <- mae[[1]] #' get_SE_identifiers(se) -#' +#' #' @keywords SE_operators #' @export get_SE_identifiers <- function(se, id_type = NULL, simplify = TRUE) { @@ -163,7 +163,7 @@ get_SE_identifiers <- function(se, id_type = NULL, simplify = TRUE) { SIMPLIFY = FALSE) out <- id_vector(id_type) } - + out } @@ -173,16 +173,16 @@ get_SE_identifiers <- function(se, id_type = NULL, simplify = TRUE) { #' @param mae MultiAssayExperiment #' #' @return named list with identifiers for each SE -#' -#' @examples +#' +#' @examples #' mae <- get_synthetic_data("finalMAE_small.qs2") #' get_MAE_identifiers(mae) -#' +#' #' @keywords SE_operators #' @export get_MAE_identifiers <- function(mae) { checkmate::assert_class(mae, "MultiAssayExperiment") - + MAEpply(mae, get_SE_identifiers) } @@ -223,7 +223,7 @@ set_SE_identifiers <- function(se, value) { v <- S4Vectors::metadata(se)[[name]] if (!is.null(subname)) { if (!subname %in% names(v) && strict) { - stop(sprintf("'%s' is not one of valid subname(s): '%s'", subname, paste0(names(v), collapse = ", "))) + stop(sprintf("'%s' is not one of valid subname(s): '%s'", subname, toString(names(v)))) } v <- v[[subname]] } @@ -235,11 +235,11 @@ set_SE_identifiers <- function(se, value) { #' @noRd .set_SE_metadata <- function(se, name, value, append = FALSE) { current_metadata <- .get_SE_metadata(se, name) - + if (is.null(current_metadata) || S4Vectors::isEmpty(current_metadata)) { current_metadata <- NULL } - + if (!is.null(current_metadata)) { if (append) { S4Vectors::metadata(se)[[name]] <- c(current_metadata, value) @@ -250,6 +250,6 @@ set_SE_identifiers <- function(se, value) { } else { S4Vectors::metadata(se)[[name]] <- value } - + se } diff --git a/R/split_SE_components.R b/R/split_SE_components.R index a0457f0d..5e7f6dba 100755 --- a/R/split_SE_components.R +++ b/R/split_SE_components.R @@ -42,7 +42,7 @@ #' Next, any cell line metadata will be heuristically extracted. #' Finally, all remaining columns will be combined on either the rows or columns as specified by #' \code{combine_on}. -#' +#' #' @export #' split_SE_components <- function(df_, nested_keys = NULL, combine_on = 1L) { @@ -62,8 +62,8 @@ split_SE_components <- function(df_, nested_keys = NULL, combine_on = 1L) { identifiers_md$well_position, identifiers_md$template, nested_keys, get_header("scores_results"), get_header("excess_results"), get_header("isobolograms_results"))) data_cols <- data_fields[data_fields %in% all_cols] - md_cols <- setdiff(all_cols, data_cols) - md <- unique(df_[, md_cols]) + md_cols <- setdiff(all_cols, data_cols) + md <- unique(df_[, md_cols]) colnames_list <- .extract_colnames(identifiers_md, md_cols) remaining_cols <- colnames_list$remaining_cols cell_cols <- colnames_list$cell_cols @@ -102,7 +102,7 @@ split_SE_components <- function(df_, nested_keys = NULL, combine_on = 1L) { cell_id <- identifiers_md$cellline cell_fields <- c(cell_id, get_header("add_clid"), identifiers_md["replicate"]) cell_cols <- cell_fields[cell_fields %in% md_cols] - + remaining_cols <- setdiff(md_cols, c(drug_cols, cell_cols)) list( remaining_cols = remaining_cols, @@ -117,7 +117,7 @@ split_SE_components <- function(df_, nested_keys = NULL, combine_on = 1L) { cell_cols <- unique(c(cell_cols, cl_entries)) if (combine_on == 1L) { # sort remaining columns to assure that rId across the assays will be the same - # example "Gnumber_bn" "Media" "fix5.aza" + # example "Gnumber_bn" "Media" "fix5.aza" trt_cols <- c(trt_cols, sort(remaining_cols)) } else if (combine_on == 2L) { # sort remaining columns to assure that cId across the assays will be the same @@ -126,7 +126,7 @@ split_SE_components <- function(df_, nested_keys = NULL, combine_on = 1L) { stop(sprintf("combine_on input: '%s' of class: '%s' is not supported", combine_on, class(combine_on))) } - + list( condition_md = add_rownames_to_metadata(md, cell_cols), treatment_md = add_rownames_to_metadata(md, trt_cols) diff --git a/R/standardize_MAE.R b/R/standardize_MAE.R index 818dbf7d..6b1221ce 100644 --- a/R/standardize_MAE.R +++ b/R/standardize_MAE.R @@ -4,14 +4,14 @@ #' @param use_default boolean indicating whether or not to use default #' identifiers for standardization #' @keywords standardize_MAE -#' +#' #' @return mae a MultiAssayExperiment with default gDR identifiers -#' -#' @examples +#' +#' @examples #' mae <- get_synthetic_data("finalMAE_small.qs2") #' S4Vectors::metadata(mae[[1]])$identifiers$drug <- "druug" #' standardize_mae(mae) -#' +#' #' @export standardize_mae <- function(mae, use_default = TRUE) { checkmate::assert_class(mae, "MultiAssayExperiment") @@ -30,21 +30,21 @@ standardize_mae <- function(mae, use_default = TRUE) { #' @keywords standardize_MAE #' #' @return se a SummarizedExperiment with default gDR identifiers -#' -#' @examples +#' +#' @examples #' mae <- get_synthetic_data("finalMAE_small.qs2") #' se <- mae[[1]] #' S4Vectors::metadata(se)$identifiers$drug <- "druug" #' standardize_se(se) -#' +#' #' @export standardize_se <- function(se, use_default = TRUE) { checkmate::assert_class(se, "SummarizedExperiment") - + reset_env_identifiers() idfs <- get_default_identifiers() idfs_se <- get_SE_identifiers(se) - + if (use_default) { from_idfs <- idfs_se to_idfs <- idfs @@ -59,7 +59,7 @@ standardize_se <- function(se, use_default = TRUE) { diff_identifiers <- .extract_diff_identifiers(matching_idfs$default, matching_idfs$se) diff_names <- unique(unlist(lapply(diff_identifiers, names))) - + if ("untreated_tag" %in% diff_names) { rowData(se) <- .replace_untreated_tag(rowData(se), to_idfs, @@ -80,7 +80,7 @@ standardize_se <- function(se, use_default = TRUE) { # Replace rowData, colData and assays rowData(se) <- rename_DFrame(rowData(se), mapping_vector) colData(se) <- rename_DFrame(colData(se), mapping_vector) - + assayList <- lapply(assays(se), function(x) { rename_bumpy(x, mapping_vector) }) @@ -106,7 +106,7 @@ standardize_se <- function(se, use_default = TRUE) { #' @keywords internal .extract_diff_identifiers <- function(default, se_identifiers) { - + diff_names <- names(which(vapply(names(se_identifiers), function(x) !identical(se_identifiers[[x]], default[[x]]), FUN.VALUE = logical(1)))) @@ -140,11 +140,11 @@ standardize_se <- function(se, use_default = TRUE) { #' @keywords standardize_MAE #' #' @return a renamed DFrame object -#' -#' @examples +#' +#' @examples #' mae <- get_synthetic_data("finalMAE_small.qs2") #' rename_DFrame(SummarizedExperiment::rowData(mae[[1]]), c("Gnumber" = "Gnumber1")) -#' +#' #' @export #' rename_DFrame <- function(df, mapping_vector) { @@ -163,13 +163,13 @@ rename_DFrame <- function(df, mapping_vector) { #' @keywords standardize_MAE #' #' @return a renamed BumpyMatrix object -#' -#' @examples +#' +#' @examples #' mae <- get_synthetic_data("finalMAE_small.qs2") #' se <- mae[[1]] #' assay <- SummarizedExperiment::assay(se) #' rename_bumpy(assay, c("Concentration" = "conc")) -#' +#' #' @export rename_bumpy <- function(bumpy, mapping_vector) { checkmate::assert_class(bumpy, "BumpyMatrix") @@ -190,7 +190,7 @@ rename_bumpy <- function(bumpy, mapping_vector) { get_optional_coldata_fields <- function(se) { checkmate::assert_class(se, "SummarizedExperiment") idfs <- get_SE_identifiers(se) - + as.character(idfs["cellline_tissue"]) } @@ -205,16 +205,16 @@ get_optional_rowdata_fields <- function(se) { checkmate::assert_class(se, "SummarizedExperiment") idfs <- get_SE_identifiers(se) rowdata <- SummarizedExperiment::rowData(se) - + out <- c(idfs["drug_moa"]) - + if (!is.null(rowdata[[idfs[["drug2"]]]])) { out <- c(out, idfs["drug_moa2"]) } if (!is.null(rowdata[[idfs[["drug3"]]]])) { out <- c(out, idfs["drug_moa3"]) } - + as.character(out) } @@ -229,21 +229,21 @@ get_optional_rowdata_fields <- function(se) { #' @keywords standardize_MAE #' #' @return refined colData -#' -#' @examples +#' +#' @examples #' mae <- get_synthetic_data("finalMAE_small.qs2") #' refine_coldata(SummarizedExperiment::colData(mae[[1]]), mae[[1]]) -#' +#' #' @export #' refine_coldata <- function(cd, se, default_v = "Undefined") { - + checkmate::assert_class(se, "SummarizedExperiment") checkmate::assert_class(cd, "DataFrame") checkmate::assert_string(default_v) - + undef_fields <- setdiff(get_optional_coldata_fields(se), colnames(cd)) - + if (length(undef_fields)) { cd[, undef_fields] <- default_v } @@ -261,21 +261,21 @@ refine_coldata <- function(cd, se, default_v = "Undefined") { #' @keywords standardize_MAE #' #' @return refined rowData -#' -#' @examples +#' +#' @examples #' mae <- get_synthetic_data("finalMAE_small.qs2") #' refine_rowdata(SummarizedExperiment::colData(mae[[1]]), mae[[1]]) -#' +#' #' @export #' refine_rowdata <- function(rd, se, default_v = "Undefined") { - + checkmate::assert_class(se, "SummarizedExperiment") checkmate::assert_class(rd, "DataFrame") checkmate::assert_string(default_v) - + undef_fields <- setdiff(get_optional_rowdata_fields(se), colnames(rd)) - + if (length(undef_fields)) { rd[, undef_fields] <- default_v } @@ -284,7 +284,7 @@ refine_rowdata <- function(rd, se, default_v = "Undefined") { #' Set unique primary identifiers in the data.frame-like objects #' -#' This function sets the primary field in the data.frame-like objects to be unique +#' This function sets the primary field in the data.frame-like objects to be unique #' by appending the secondary field in parentheses for duplicates. #' #' @param dt data.table, data.frame or DFrame with data @@ -297,18 +297,18 @@ refine_rowdata <- function(rd, se, default_v = "Undefined") { #' col_data <- set_unique_names_dt(col_data, primary_name = "CellLineName", secondary_name = "clid") #' @keywords standardize_MAE #' @export -#' +#' set_unique_names_dt <- function(dt, primary_name, secondary_name, sep = " ") { - + checkmate::assert( checkmate::check_class(dt, "data.table"), checkmate::check_class(dt, "DFrame"), checkmate::check_class(dt, "data.frame") ) - + checkmate::assert_choice(primary_name, names(dt)) checkmate::assert_choice(secondary_name, names(dt)) - + if (!is.null(dt[[primary_name]])) { unique_sets <- if (inherits(dt, "data.table")) { unique(dt[, c(primary_name, secondary_name), with = FALSE]) @@ -325,7 +325,7 @@ set_unique_names_dt <- function(dt, primary_name, secondary_name, sep = " ") { #' Set Unique Parental Identifiers #' -#' This function sets the `CellLineName` field in +#' This function sets the `CellLineName` field in #' `colData` to be unique by appending the `clid` in parentheses for duplicates. #' #' @param se A SummarizedExperiment object. @@ -340,17 +340,17 @@ set_unique_names_dt <- function(dt, primary_name, secondary_name, sep = " ") { #' @keywords standardize_MAE set_unique_cl_names <- function(se) { checkmate::assert_class(se, "SummarizedExperiment") - + col_data <- SummarizedExperiment::colData(se) col_data_new <- set_unique_cl_names_dt(col_data) SummarizedExperiment::colData(se) <- col_data_new - + se } #' Set unique primary cell line identifiers in the table #' -#' This function sets the primary cell line field in data.frame-like object to be unique +#' This function sets the primary cell line field in data.frame-like object to be unique #' by appending the secondary cell line field in parentheses for duplicates. #' #' @param dt data.table, data.frame or DFrame with the data @@ -363,7 +363,7 @@ set_unique_cl_names <- function(se) { #' col_data <- set_unique_cl_names_dt(col_data) #' @export #' @keywords standardize_MAE -#' +#' set_unique_cl_names_dt <- function(dt, primary_name = get_env_identifiers("cellline_name"), secondary_name = get_env_identifiers("cellline"), @@ -399,19 +399,19 @@ set_unique_cl_names_dt <- function(dt, #' @keywords standardize_MAE set_unique_drug_names <- function(se) { checkmate::assert_class(se, "SummarizedExperiment") - + row_data <- SummarizedExperiment::rowData(se) row_data_new <- set_unique_drug_names_dt(row_data) - + SummarizedExperiment::rowData(se) <- row_data_new se } #' Set unique primary drug identifiers in the table #' -#' This function sets the primary drug field(s) in data.frame-like object to be unique +#' This function sets the primary drug field(s) in data.frame-like object to be unique #' by appending the secondary drug field(s) in parentheses for duplicates. -#' By default `DrugName`, `DrugName_2`, and `DrugName_3` are primary drug fields, +#' By default `DrugName`, `DrugName_2`, and `DrugName_3` are primary drug fields, #' while `Gnumber`, `Gnumber_2`, and `Gnumber_3` are their respective secondary drug fields. #' #' @param dt data.table, data.frame or DFrame with the data @@ -433,15 +433,15 @@ set_unique_drug_names_dt <- function(dt, primary_names = unlist(get_env_identifiers()[(c("drug_name", "drug_name2", "drug_name3"))]), # nolint secondary_names = unlist(get_env_identifiers()[(c("drug", "drug2", "drug3"))]), sep = " ") { - - checkmate::assert_character(primary_names) - checkmate::assert_character(secondary_names) - + + checkmate::assert_character(primary_names) + checkmate::assert_character(secondary_names) + primary_names <- intersect(primary_names, names(dt)) secondary_names <- intersect(secondary_names, names(dt)) - + checkmate::assert_true(NROW(primary_names) == NROW(secondary_names)) - + if (NROW(primary_names)) { for (i in seq_along(primary_names)) { dt <- set_unique_names_dt(dt, primary_names[i], secondary_names[i], sep = sep) @@ -482,13 +482,13 @@ set_unique_drug_names_dt <- function(dt, #' @keywords standardize_MAE set_unique_identifiers <- function(mae) { checkmate::assert_class(mae, "MultiAssayExperiment") - + for (name in names(MultiAssayExperiment::experiments(mae))) { se <- MultiAssayExperiment::experiments(mae)[[name]] se <- set_unique_cl_names(se) se <- set_unique_drug_names(se) MultiAssayExperiment::experiments(mae)[[name]] <- se } - + return(mae) } diff --git a/R/utils.R b/R/utils.R index daf9ed67..6bf1921c 100644 --- a/R/utils.R +++ b/R/utils.R @@ -4,7 +4,7 @@ dropped <- setdiff(keys, cols) if (length(dropped) != 0L) { warning(sprintf("ignoring input keys: '%s' which are not present in data.table", - paste0(dropped, collapse = ", "))) + toString(dropped))) } intersect(keys, cols) } @@ -18,12 +18,12 @@ assert_equal_input_len <- function(outlier, ...) { if (!h) { stop("unequal length objects provided as input") } - + contains_length_one <- length(first) == 1L || length(outlier) == 1L if (length(first) != length(outlier) && !contains_length_one) { stop("unequal lengths detected, either the fit parameters must be length one, or the tested value") } - + invisible(NULL) } @@ -32,10 +32,10 @@ assert_equal_input_len <- function(outlier, ...) { #' @param x string with normalization type #' #' @return shortened string representing the normalization type -#' -#' @examples +#' +#' @examples #' shorten_normalization_type_name("GRvalue") -#' +#' #' @keywords package_utils #' @export shorten_normalization_type_name <- function(x) { @@ -47,12 +47,12 @@ shorten_normalization_type_name <- function(x) { #' extend abbreviated normalization type #' #' @param x string with normalization type -#' +#' #' @return string -#' -#' @examples +#' +#' @examples #' extend_normalization_type_name("GR") -#' +#' #' @keywords package_utils #' @export extend_normalization_type_name <- function(x) { @@ -66,19 +66,19 @@ extend_normalization_type_name <- function(x) { #' @param x charvec expected subset #' @param choices charvec reference set #' @param ... Additional arguments to pass to \code{checkmate::test_choice} -#' +#' #' @return \code{NULL} -#' -#' @examples +#' +#' @examples #' assert_choices("x", c("x","y")) -#' +#' #' @keywords package_utils #' @export assert_choices <- function(x, choices, ...) { out <- vapply(x, function(y) { checkmate::test_choice(y, choices, ...) }, FUN.VALUE = logical(1)) - + if (!all(out)) { msg <- sprintf( @@ -101,13 +101,13 @@ assert_choices <- function(x, choices, ...) { #' @export #' #' @author Bartosz Czech -#' +#' #' @return list or vector depends on unify param -#' -#' @examples +#' +#' @examples #' mae <- get_synthetic_data("finalMAE_small.qs2") #' MAEpply(mae, SummarizedExperiment::assayNames) -#' +#' #' @keywords package_utils #' @export MAEpply <- function(mae, FUN, unify = FALSE, ...) { @@ -122,7 +122,7 @@ MAEpply <- function(mae, FUN, unify = FALSE, ...) { } else { data.table::rbindlist(lapply(out, data.table::as.data.table), fill = TRUE) } - + } else { out } @@ -169,39 +169,39 @@ loop <- function(x, temp_dir = Sys.getenv("GDR_TEMP_DIR", tempdir()), batch_size = as.numeric(Sys.getenv("GDR_BATCH_SIZE", 100)), ...) { - + checkmate::assert_vector(x, null.ok = FALSE) checkmate::assert_function(FUN) checkmate::assert_flag(parallelize) checkmate::assert_flag(use_batch) checkmate::assert_string(temp_dir) checkmate::assert_count(batch_size, positive = TRUE) - + parent_call <- sys.call(-1) parent_name <- if (!is.null(parent_call)) { deparse(parent_call[[1]]) } else { "unknown_parent_fun" } - + if (use_batch) { if (!dir.exists(temp_dir)) { dir.create(temp_dir, recursive = TRUE) } - + fun_name <- deparse(substitute(FUN)) if (any(grepl("function", fun_name))) { fun_name <- parent_name } - + user_id <- Sys.info()["user"] unique_id <- paste0(digest::digest(x, algo = "sha256"), "_", user_id) - + total_iterations <- length(x) batch_size <- min(batch_size, total_iterations) - + indices <- seq(batch_size, total_iterations, by = batch_size) - + completed_batches <- vapply(indices, function(start_index) { file_path <- file.path(temp_dir, paste0(fun_name, "_", @@ -210,13 +210,13 @@ loop <- function(x, total_iterations, "_batch.qs2")) file.exists(file_path) }, logical(1)) - + start_index <- indices[!completed_batches][1] if (is.na(start_index)) { message("All batches are already completed.") start_index <- indices[length(indices)] + batch_size } - + if (parallelize) { BiocParallel::bplapply(indices[indices >= start_index], function(start_index) { end_index <- min(start_index, total_iterations) @@ -230,7 +230,7 @@ loop <- function(x, start_index, fun_name, unique_id, total_iterations, temp_dir, FUN, ...) }) } - + final_results <- list() for (start_index in indices) { file_path <- file.path(temp_dir, @@ -243,7 +243,7 @@ loop <- function(x, final_results <- c(final_results, batch_results) } } - + for (start_index in indices) { file_path <- file.path(temp_dir, paste0(fun_name, "_", @@ -254,7 +254,7 @@ loop <- function(x, file.remove(file_path) } } - + return(final_results) } else { if (parallelize) { @@ -287,12 +287,12 @@ loop <- function(x, #' #' @keywords package_utils #' @export -process_batch <- function(batch, - start_index, - fun_name, - unique_id, - total_iterations, - temp_dir, +process_batch <- function(batch, + start_index, + fun_name, + unique_id, + total_iterations, + temp_dir, FUN, ...) { checkmate::assert_vector(batch, null.ok = FALSE) checkmate::assert_count(start_index, positive = TRUE) @@ -301,7 +301,7 @@ process_batch <- function(batch, checkmate::assert_count(total_iterations, positive = TRUE) checkmate::assert_string(temp_dir) checkmate::assert_function(FUN) - + results <- stats::setNames(vector("list", length(batch)), names(batch)) for (i in seq_along(batch)) { results[[i]] <- FUN(batch[[i]], ...) @@ -327,20 +327,20 @@ process_batch <- function(batch, #' @param parallelize Logical indicating whether or not to parallelize the computation. #' @param ... Additional args to be passed to teh \code{FUN}. #' @return The original \code{se} object with a new assay, \code{out_assay_name}. -#' -#' @examples +#' +#' @examples #' mae <- get_synthetic_data("finalMAE_small.qs2") #' se <- mae[[1]] #' FUN <- function(x) { #' data.table::data.table(Concentration = x$Concentration, CorrectedReadout = x$CorrectedReadout) -#' } +#' } #' apply_bumpy_function( -#' se, -#' FUN = FUN, -#' req_assay_name = "RawTreated", +#' se, +#' FUN = FUN, +#' req_assay_name = "RawTreated", #' out_assay_name = "CorrectedReadout" #' ) -#' +#' #' @keywords package_utils #' @export apply_bumpy_function <- function(se, @@ -354,7 +354,7 @@ apply_bumpy_function <- function(se, checkmate::assert_string(req_assay_name) checkmate::assert_string(out_assay_name) validate_se_assay_name(se, req_assay_name) - + asy <- SummarizedExperiment::assay(se, req_assay_name) checkmate::assert_class(asy, "BumpyDataFrameMatrix") df <- BumpyMatrix::unsplitAsDataFrame(asy, row.field = "row", column.field = "column") @@ -377,9 +377,9 @@ apply_bumpy_function <- function(se, stop("only data.table objects supported as return values from FUN for now") } }, parallelize = parallelize) - + out <- S4Vectors::DataFrame(do.call(rbind, out)) - + out_assay <- BumpyMatrix::splitAsBumpyMatrix(out[!colnames(out) %in% c("row", "column")], row = out$row, col = out$column) @@ -392,20 +392,20 @@ apply_bumpy_function <- function(se, #' #' check if all mae experiments are empty #' @param mae MultiAssayExperiment object -#' +#' #' @author Arkadiusz Gladki -#' +#' #' @return logical -#' -#' @examples +#' +#' @examples #' mae <- get_synthetic_data("finalMAE_small.qs2") #' is_mae_empty(mae) -#' +#' #' @keywords package_utils #' @export is_mae_empty <- function(mae) { checkmate::assert_class(mae, "MultiAssayExperiment") - + all(MAEpply(mae, is_exp_empty, unify = TRUE)) } @@ -413,20 +413,20 @@ is_mae_empty <- function(mae) { #' #' check if any experiment is empty #' @param mae MultiAssayExperiment object -#' +#' #' @author Arkadiusz Gladki -#' +#' #' @return logical -#' -#' @examples +#' +#' @examples #' mae <- get_synthetic_data("finalMAE_small.qs2") #' is_any_exp_empty(mae) -#' +#' #' @keywords package_utils #' @export is_any_exp_empty <- function(mae) { checkmate::assert_class(mae, "MultiAssayExperiment") - + any(MAEpply(mae, is_exp_empty, unify = TRUE)) } @@ -434,28 +434,28 @@ is_any_exp_empty <- function(mae) { #' #' check if experiment (SE) is empty #' @param exp \linkS4class{SummarizedExperiment} object. -#' +#' #' @author Arkadiusz Gladki -#' +#' #' @return logical -#' -#' @examples +#' +#' @examples #' mae <- get_synthetic_data("finalMAE_small.qs2") #' se <- mae[[1]] #' is_exp_empty(se) -#' +#' #' @keywords package_utils #' @export is_exp_empty <- function(exp) { checkmate::assert_class(exp, "SummarizedExperiment") - + names <- SummarizedExperiment::assayNames(exp) dt <- `if`( is.null(names), data.table::data.table(), convert_se_assay_to_dt(exp, names[[1]]) ) - + any( NROW(SummarizedExperiment::assay(exp)) == 0, NROW(dt) == 0 @@ -466,20 +466,20 @@ is_exp_empty <- function(exp) { #' #' get non empty assays #' @param mae MultiAssayExperiment object -#' +#' #' @author Arkadiusz Gladki -#' +#' #' @return charvec with non-empty experiments -#' -#' @examples +#' +#' @examples #' mae <- get_synthetic_data("finalMAE_small.qs2") #' get_non_empty_assays(mae) -#' +#' #' @keywords package_utils #' @export get_non_empty_assays <- function(mae) { checkmate::assert_class(mae, "MultiAssayExperiment") - + ne_info <- MAEpply(mae, is_exp_empty) == FALSE names(ne_info[ne_info == TRUE]) } @@ -488,9 +488,9 @@ get_non_empty_assays <- function(mae) { #' #' get colData of all experiments #' @param mae MultiAssayExperiment object -#' +#' #' @author Arkadiusz Gladki -#' +#' #' @examples #' mae <- get_synthetic_data("finalMAE_small.qs2") #' mcolData(mae) @@ -501,7 +501,7 @@ get_non_empty_assays <- function(mae) { #' @export mcolData <- function(mae) { checkmate::assert_class(mae, "MultiAssayExperiment") - + MAEpply(mae, SummarizedExperiment::colData, unify = TRUE) } @@ -514,29 +514,29 @@ mcolData <- function(mae) { #' #' @return data.table with all-experiments rowData #' -#' @examples -#' mae <- get_synthetic_data("finalMAE_small.qs2") +#' @examples +#' mae <- get_synthetic_data("finalMAE_small.qs2") #' mrowData(mae) #' #' @author Arkadiusz Gladki mrowData <- function(mae) { checkmate::assert_class(mae, "MultiAssayExperiment") - + MAEpply(mae, SummarizedExperiment::rowData, unify = TRUE) } #' Get synthetic data from gDRtestData package #' #' @param qs dataset name or qs2 filename (e.g. \code{"small"} or \code{"finalMAE_small.qs2"}) -#' +#' #' @keywords package_utils #' @export -#' +#' #' @examples #' get_synthetic_data("finalMAE_small.qs2") #' #' @return loaded data -#' +#' get_synthetic_data <- function(qs) { # check if prefix exist, if not add one if (!grepl("finalMAE", qs)) { @@ -553,27 +553,27 @@ get_synthetic_data <- function(qs) { #' Geometric mean -#' +#' #' Auxiliary function for calculating geometric mean with possibility to handle -Inf -#' +#' #' @param x numeric vector -#' @param fixed flag should be add fix for -Inf +#' @param fixed flag should be add fix for -Inf #' @param maxlog10Concentration numeric value needed to calculate minimal value -#' +#' #' @return numeric vector -#' -#' @examples +#' +#' @examples #' geometric_mean(c(2, 8)) -#' +#' #' @keywords package_utils #' @export -#' +#' #' @keywords internal geometric_mean <- function(x, fixed = TRUE, maxlog10Concentration = 1) { checkmate::assert_numeric(x) checkmate::assert_flag(fixed) checkmate::assert_numeric(maxlog10Concentration) - + if (fixed) { x <- pmax( 10 ^ maxlog10Concentration / 1e6, @@ -583,25 +583,25 @@ geometric_mean <- function(x, fixed = TRUE, maxlog10Concentration = 1) { exp(mean(log(x))) } -#' Average biological replicates on the data table side. +#' Average biological replicates on the data table side. #' #' @param dt data.table with Metric data #' @param var String representing additional metadata of replicates #' @param prettified Flag indicating if the provided identifiers in the dt are prettified #' @param fixed Flag indicating whether to add a fix for -Inf in the geometric mean. -#' @param geometric_average_fields Character vector of column names in \code{dt} +#' @param geometric_average_fields Character vector of column names in \code{dt} #' to take the geometric average of. -#' @param fit_type_average_fields Character vector of column names in \code{dt} +#' @param fit_type_average_fields Character vector of column names in \code{dt} #' that should be treated as a column with fit type data -#' @param blacklisted_fields Character vector of column names in \code{dt} +#' @param blacklisted_fields Character vector of column names in \code{dt} #' that should be skipped in averaging #' @param add_sd Flag indicating whether to add standard deviation and count columns. -#' +#' #' @examples #' dt <- data.table::data.table(a = c(seq_len(10), 1), #' b = c(rep("drugA", 10), rep("drugB", 1))) #' average_biological_replicates_dt(dt, var = "a") -#' +#' #' @return data.table without replicates #' @keywords package_utils #' @export @@ -614,16 +614,16 @@ average_biological_replicates_dt <- function( fit_type_average_fields = get_header("metric_average_fields")$fit_type, blacklisted_fields = get_header("metric_average_fields")$blacklisted, add_sd = FALSE) { - + checkmate::assert_data_table(dt) checkmate::assert_character(var, null.ok = FALSE, any.missing = FALSE) checkmate::assert_flag(prettified) checkmate::assert_character(geometric_average_fields) checkmate::assert_character(fit_type_average_fields) checkmate::assert_flag(add_sd) - + data <- data.table::copy(dt) - + if (prettified) { pidfs <- get_prettified_identifiers() iso_cols <- prettify_flat_metrics(get_header("iso_position"), human_readable = TRUE) @@ -633,23 +633,23 @@ average_biological_replicates_dt <- function( iso_cols <- get_header("iso_position") id_cols <- prettify_flat_metrics(get_header("id")) } - + max_fields <- c("maxlog10Concentration", "N_conc") regex_max_fields <- paste0(c(max_fields, prettify_flat_metrics(max_fields, human_readable = TRUE)), collapse = "|") max_fields <- grep(regex_max_fields, names(data), value = TRUE) - + p_val_col <- "p_value" regex_p_val_col <- paste0(c(p_val_col, prettify_flat_metrics(p_val_col, human_readable = TRUE)), collapse = "|") p_val_col <- grep(regex_p_val_col, names(data), value = TRUE) - + r2_col <- "r2" regex_r2_col <- paste0(c(r2_col, prettify_flat_metrics(r2_col, human_readable = TRUE)), collapse = "|") r2_col <- grep(regex_r2_col, names(data), value = TRUE) - + average_fields <- setdiff(names(Filter(is.numeric, data)), c(unlist(pidfs), var, iso_cols, max_fields, p_val_col)) # don't average across _sd$ fields (to avoid adding unexpected columns, i.e. x_sd_sd_sd_sd) @@ -657,46 +657,46 @@ average_biological_replicates_dt <- function( geometric_average_fields <- intersect(geometric_average_fields, names(dt)) blacklisted_fields <- intersect(blacklisted_fields, names(dt)) group_by <- setdiff(names(data), c(average_fields, var, id_cols, blacklisted_fields, max_fields, p_val_col)) - - + + replicate_iden_vars <- intersect(c(group_by, var), names(data)) - + if (add_sd) { # Calculate standard deviation for both average_fields and geometric_average_fields sd_fields <- paste0(average_fields, "_sd") geom_sd_fields <- paste0(geometric_average_fields, "_sd") - + data <- data[, (sd_fields) := lapply(.SD, calc_sd), .SDcols = average_fields, by = group_by] data <- data[, (geom_sd_fields) := lapply(.SD, calc_sd), .SDcols = geometric_average_fields, by = group_by] - + # Calculate count and add as a single column data <- data[, count := .N, by = group_by] } - + # 1. Remove the specified variable column data[, (var) := NULL] - + # 2. For max_fields - take the maximum value data[, (max_fields) := lapply(.SD, max, na.rm = TRUE), .SDcols = max_fields, by = group_by] - + # 3. For p_val_col - average using Fisher's method data[, (p_val_col) := lapply(.SD, average_pvalues), .SDcols = p_val_col, by = group_by] - + # 4. For standard numeric fields - use arithmetic mean data[, (average_fields) := lapply(.SD, mean, na.rm = TRUE), .SDcols = average_fields, by = group_by] - + # 5. For specified fields - use geometric mean data[, (geometric_average_fields) := lapply(.SD, FUN = function(x) { geometric_mean(x, fixed = fixed) }), .SDcols = geometric_average_fields, by = group_by] - - # 6. Choose better model + + # 6. Choose better model if (NROW(r2_col)) { data <- data.table::rbindlist(lapply(r2_col, function(col) { data[data[, .I[which.max(get(col))], by = setdiff(group_by, fit_type_average_fields)]$V1] @@ -709,7 +709,7 @@ average_biological_replicates_dt <- function( #' Checks if \code{se} is combo dataset. #' #' @param se SummarizedExperiment -#' +#' #' @examples #' se <- get_synthetic_data("finalMAE_combo_matrix.qs2")[[1]] #' is_combo_data(se) @@ -720,11 +720,11 @@ average_biological_replicates_dt <- function( #' #' @return logical #' @keywords combination_data -#' +#' #' @export is_combo_data <- function(se) { checkmate::assert_class(se, "SummarizedExperiment") - + all(get_combo_assay_names() %in% SummarizedExperiment::assayNames(se)) } @@ -733,30 +733,30 @@ is_combo_data <- function(se) { #' @param cols character vector with the columns of the input data #' @param prettify_identifiers logical flag specifying if identifiers are expected to be prettified #' @param codrug_identifiers character vector with identifiers for the codrug columns -#' +#' #' @examples #' has_single_codrug_data("Drug Name") #' has_single_codrug_data(c("Drug Name", "Cell Lines")) #' has_single_codrug_data(c("Drug Name 2", "Concentration 2")) #' has_single_codrug_data( #' get_prettified_identifiers( -#' c("concentration2", "drug_name2"), +#' c("concentration2", "drug_name2"), #' simplify = FALSE #' ) #' ) #' #' @keywords combination_data #' @return logical flag -#' +#' #' @export has_single_codrug_data <- function(cols, prettify_identifiers = TRUE, codrug_identifiers = c("drug_name2", "concentration2")) { - + checkmate::assert_true(all(codrug_identifiers %in% names(get_env_identifiers(simplify = TRUE)))) checkmate::assert_flag(prettify_identifiers) - + codrug_colnames <- if (prettify_identifiers) { get_prettified_identifiers(codrug_identifiers, simplify = FALSE) } else { @@ -764,7 +764,7 @@ has_single_codrug_data <- } checkmate::assert_character(cols, any.missing = FALSE) checkmate::assert_character(codrug_colnames, any.missing = FALSE) - + all(codrug_colnames %in% cols) } @@ -775,7 +775,7 @@ has_single_codrug_data <- #' @param prettify_identifiers logical flag specifying if identifiers are expected to be prettified #' @param codrug_name_identifier string with the identifiers for the codrug drug_name column #' @param codrug_conc_identifier string with the identifiers for the codrug concentration column -#' +#' #' @examples #' dt <- #' data.table::data.table( @@ -785,13 +785,13 @@ has_single_codrug_data <- #' "Concentration 2" = 4:6 #' ) #' has_valid_codrug_data(dt) -#' +#' #' dt$`Concentration 2` <- NULL #' has_valid_codrug_data(dt) #' #' @keywords combination_data #' @return logical flag -#' +#' #' @export has_valid_codrug_data <- function(data, @@ -803,22 +803,22 @@ has_valid_codrug_data <- checkmate::assert_flag(prettify_identifiers) checkmate::assert_string(codrug_name_identifier) checkmate::assert_string(codrug_conc_identifier) - + idfs <- if (prettify_identifiers) { get_prettified_identifiers(simplify = TRUE) } else { get_env_identifiers() } - + codrug_v <- c(codrug_name_identifier, codrug_conc_identifier) - + status <- # codrug data not present for drug_name and/or concentration data if (!has_single_codrug_data(dcols, prettify_identifiers, codrug_v)) { FALSE } else { codrug_cols <- as.character(idfs[codrug_v]) - + # codrug data not valid (for drug names and/or concentration data) if (all(data[[codrug_cols[1]]] %in% idfs[["untreated_tag"]]) || all(is.na(data[[codrug_cols[2]]]))) { @@ -835,9 +835,9 @@ has_valid_codrug_data <- #' @param data data.table with input data #' @param prettify_identifiers logical flag specifying if identifiers are expected to be prettified #' @param codrug_identifiers character vector with identifiers for the codrug columns -#' +#' #' @examples -#' +#' #' dt <- #' data.table::data.table( #' "Drug Name" = letters[seq_len(3)], @@ -850,26 +850,26 @@ has_valid_codrug_data <- #' #' @keywords combination_data #' @return data.table without combination columns -#' +#' #' @export remove_codrug_data <- function(data, prettify_identifiers = TRUE, codrug_identifiers = c("drug_name2", "concentration2")) { - + checkmate::assert_true(all(codrug_identifiers %in% names(get_env_identifiers()))) checkmate::assert_data_table(data) checkmate::assert_flag(prettify_identifiers) - + codrug_colnames <- if (prettify_identifiers) { vapply(codrug_identifiers, function(x) get_prettified_identifiers(x), character(1)) } else { vapply(codrug_identifiers, function(x) get_env_identifiers(x), character(1)) } checkmate::assert_character(codrug_colnames, any.missing = FALSE) - + idx <- which(!colnames(data) %in% codrug_colnames) - + # support both: data.table and data.frame subset(data, select = idx) } @@ -877,33 +877,33 @@ remove_codrug_data <- #' Identify and return additional variables in list of dt #' #' @param dt_list list of data.table or data.table containing additional variables -#' @param unique logical flag indicating if all variables should be returned +#' @param unique logical flag indicating if all variables should be returned #' or only those containing more than one unique value #' @param prettified Flag indicating if the provided identifiers in the dt are prettified -#' +#' #' @examples #' dt <- data.table::data.table( -#' Gnumber = seq_len(10), -#' Concentration = runif(10), +#' Gnumber = seq_len(10), +#' Concentration = runif(10), #' Ligand = c(rep(0.5, 5), rep(0, 5)) #' ) #' get_additional_variables(dt) #' #' @return vector of variable names with additional variables -#' +#' #' @keywords combination_data #' @export get_additional_variables <- function(dt_list, unique = FALSE, prettified = FALSE) { - - + + if (data.table::is.data.table(dt_list)) { dt_list <- list(dt_list) } checkmate::assert_flag(unique) checkmate::assert_flag(prettified) - + if (prettified) { headers <- prettify_flat_metrics(unlist(get_header()), human_readable = TRUE) pidfs <- get_prettified_identifiers() @@ -916,11 +916,11 @@ get_additional_variables <- function(dt_list, system.file(package = "gDRutils", "settings.json") )] idfs <- setdiff(unique(c(headers, pidfs)), idf2keep) - + additional_perturbations <- unique(unlist(lapply(dt_list, function(x) { setdiff(sub(" \\(.*\\)$", "", names(x)), idfs) }))) - + if (unique) { additional_perturbations } else { @@ -961,19 +961,19 @@ calc_sd <- function(x) { #' safe wrapper of Sys.getenv() -#' -#' So far the helper is needed to handle env vars containing `:` +#' +#' So far the helper is needed to handle env vars containing `:` #' for which the backslash is automatically added in some contexts #' and R could not get the original value for these env vars. -#' +#' #' @param x string with the name of the environmental variable #' @param ... additional params for Sys.getenev #' @keywords package_utils -#' -#' @examples +#' +#' @examples #' get_env_var("HOME") # -#' @export +#' @export #' @return sanitized value of the env variable get_env_var <- function(x, ...) { gsub("\\\\", "", Sys.getenv(x, ...)) @@ -983,7 +983,7 @@ get_env_var <- function(x, ...) { #' #' Gnumber, i.e. "G12345678" is currently the default format of drug_id. It's also used as a drug name in some cases. #' -#' By default, Gnumber(s) followed by any character (except for underscore and any digit) +#' By default, Gnumber(s) followed by any character (except for underscore and any digit) #' and any batch substring are cleaned: #' * G00060245.18 => G00060245 #' * G00060245.1-8 => G00060245 @@ -998,13 +998,13 @@ get_env_var <- function(x, ...) { #' By default, Gnumber(s) followed by the "_" or digit (regardless the batch substring) are not cleaned: #' * Gnumber with suffix added to prevent duplicated ids #' * G00060245_(G00060245.1-8) -#' * too long Gnumber +#' * too long Gnumber #' * G123456789.1-12 -#' +#' #' @param drug_vec atomic vector (e.g., character or integer) with drug id(s) #' @param drug_p string with regex pattern for drug id. Set to Gnumber format by default: "G\[0-9\]\{8\}". #' @param sep_p string with regex pattern for separator. Set to any character except for digit and space -#' @param batch_p string with regex pattern for batch substring. +#' @param batch_p string with regex pattern for batch substring. #' By default set to any character(s): ".+" #' #' @examples @@ -1017,7 +1017,7 @@ get_env_var <- function(x, ...) { #' remove_drug_batch("G03256376.1-2;G00376771.1-19;G02557755") #' remove_drug_batch("G00060245_(G00060245.1-8)") #' remove_drug_batch(c("G00060245.18", "G00060245.1-8", "G00060245.1-1.DMA")) -#' +#' #' remove_drug_batch("DRUG_01.123", drug_p = "DRUG_[0-9]+") #' remove_drug_batch("G00001234:22-1", sep_p = ":") #' remove_drug_batch("G00001234.28", batch_p = "[0-9]+") @@ -1033,9 +1033,9 @@ remove_drug_batch <- function(drug_vec, checkmate::assert_string(drug_p) checkmate::assert_string(sep_p) checkmate::assert_string(batch_p) - + drug_vec <- as.character(drug_vec) - + p <- paste0("(", drug_p, ")", sep_p, batch_p, "$") r <- "\\1" sub(p, r, drug_vec) @@ -1050,12 +1050,12 @@ remove_drug_batch <- function(drug_vec, #' @param col string with column name to be capped in assay_dt ("xc50" by default) #' @param capping_fold number for min and max concentration values #' final formulas are min / capping_fold and max * capping_fold -#' @param additional_group_cols character vector of column names used to identify unique observations +#' @param additional_group_cols character vector of column names used to identify unique observations #' - for single-agent experiment additional to the combination of \code{DrugName} and \code{CellLineName} -#' - for combination experiment additional to the combination of \code{DrugName}, \code{DrugName_2} +#' - for combination experiment additional to the combination of \code{DrugName}, \code{DrugName_2} #' and \code{CellLineName} -#' -#' +#' +#' #' @examples #' # single-agent data #' sdata <- get_synthetic_data("finalMAE_small.qs2") @@ -1064,7 +1064,7 @@ remove_drug_batch <- function(drug_vec, #' smetrics_data_capped <- cap_assay_infinities(saveraged_data, #' smetrics_data, #' experiment_name = "single-agent") -#' +#' #' # combination data #' cdata <- get_synthetic_data("finalMAE_combo_matrix_small.qs2") #' scaveraged_data <- convert_se_assay_to_dt(cdata[[get_supported_experiments("combo")]], "Averaged") @@ -1094,18 +1094,18 @@ cap_assay_infinities <- function(conc_assay_dt, checkmate::assert_subset(additional_group_cols, choices = names(conc_assay_dt)) checkmate::assert_subset(additional_group_cols, choices = names(assay_dt)) } - + if (!experiment_name %in% c(get_supported_experiments("sa"), get_supported_experiments("combo"))) { # this function does not support "co-dilution" yet stop(sprintf("unsupported experiment:'%s'", experiment_name)) } - + conc <- get_env_identifiers("concentration") conc_2 <- get_env_identifiers("concentration2") - + min_conc <- max_conc <- min_conc_2 <- max_conc_2 <- min_val_conc_cd <- min_conc_cd <- min_conc_cd <- NULL - + out_dt <- if (any(assay_dt[[col]] %in% c(Inf, -Inf))) { # check whether capping is required if (experiment_name == get_supported_experiments("sa")) { group_cols <- c( @@ -1113,51 +1113,51 @@ cap_assay_infinities <- function(conc_assay_dt, additional_group_cols) mt <- data.table::copy(assay_dt) orig_col_order <- colnames(mt) - + # calculate min and max conc for each combination - min_max_conc <- + min_max_conc <- conc_assay_dt[get(conc) > 0, .(min = min(get(conc)), max = max(get(conc))), by = group_cols] - + mt <- merge(mt, min_max_conc, by = group_cols) mt[get(col) == -Inf, col] <- mt[get(col) == -Inf, "min"] / capping_fold mt[get(col) == Inf, col] <- mt[get(col) == Inf, "max"] * capping_fold - + # return result with orgin column data.table::setkey(mt, NULL) mt[, orig_col_order, with = FALSE] - + } else if (experiment_name == get_supported_experiments("combo")) { group_cols <- c( as.character(get_env_identifiers(c("drug_name", "drug_name2", "cellline_name"), simplify = FALSE)), additional_group_cols) mt <- data.table::copy(assay_dt) orig_col_order <- colnames(mt) - + if (any(assay_dt$dilution_drug %in% c("drug_1", "drug_2"))) { # calculate min and max conc & conc_2 for each combination - min_max_conc <- - conc_assay_dt[get(conc) > 0, .(min_conc = min(get(conc)), max_conc = max(get(conc))), + min_max_conc <- + conc_assay_dt[get(conc) > 0, .(min_conc = min(get(conc)), max_conc = max(get(conc))), by = group_cols] - min_max_conc_2 <- - conc_assay_dt[get(conc_2) > 0, .(min_conc_2 = min(get(conc_2)), max_conc_2 = max(get(conc_2))), + min_max_conc_2 <- + conc_assay_dt[get(conc_2) > 0, .(min_conc_2 = min(get(conc_2)), max_conc_2 = max(get(conc_2))), by = group_cols] # all = TRUE to avoid skipping values with min_con == 2 and/or min_con_2 == 0 min_max_conc <- merge(min_max_conc, min_max_conc_2, by = group_cols, all = TRUE) - + mt <- merge(mt, min_max_conc, by = group_cols) # drug_1 - mt[get(col) == -Inf & dilution_drug == "drug_1", col] <- + mt[get(col) == -Inf & dilution_drug == "drug_1", col] <- mt[get(col) == -Inf & dilution_drug == "drug_1", "min_conc"] / capping_fold - mt[get(col) == Inf & dilution_drug == "drug_1", col] <- + mt[get(col) == Inf & dilution_drug == "drug_1", col] <- mt[get(col) == Inf & dilution_drug == "drug_1", "max_conc"] * capping_fold - + # drug_2 - mt[get(col) == -Inf & dilution_drug == "drug_2", col] <- + mt[get(col) == -Inf & dilution_drug == "drug_2", col] <- mt[get(col) == -Inf & dilution_drug == "drug_2", "min_conc_2"] / capping_fold - mt[get(col) == Inf & dilution_drug == "drug_2", col] <- + mt[get(col) == Inf & dilution_drug == "drug_2", col] <- mt[get(col) == Inf & dilution_drug == "drug_2", "max_conc_2"] * capping_fold - + # return result with orgin column data.table::setkey(mt, NULL) mt <- mt[, orig_col_order, with = FALSE] @@ -1166,14 +1166,14 @@ cap_assay_infinities <- function(conc_assay_dt, if (any(assay_dt$dilution_drug %in% c("codilution"))) { # calculate min and max conc for each codilution min_max_conc <- .prep_cd_conc_cap_dict(conc_assay_dt, group_cols) - + mt <- merge(mt, min_max_conc, by = c(group_cols, "normalization_type", "ratio"), all.x = TRUE) # codilution - mt[get(col) == -Inf & dilution_drug == "codilution", col] <- + mt[get(col) == -Inf & dilution_drug == "codilution", col] <- mt[get(col) == -Inf & dilution_drug == "codilution", "min_conc_cd"] / capping_fold - mt[get(col) == Inf & dilution_drug == "codilution", col] <- + mt[get(col) == Inf & dilution_drug == "codilution", col] <- mt[get(col) == Inf & dilution_drug == "codilution", "max_conc_cd"] * capping_fold - + # return result with orgin column data.table::setkey(mt, NULL) mt <- mt[, orig_col_order, with = FALSE] @@ -1185,50 +1185,50 @@ cap_assay_infinities <- function(conc_assay_dt, } stopifnot(identical(dim(assay_dt), dim(out_dt))) out_dt - + } #' Prepare dict with min and max concentration for codilution #' #' @param conc_assay_dt assay data in data.table format with Concentration data #' @param group_cols charvec with grouping column names -#' +#' #' @return \code{data.table} with max and min concentration for codilution -#' +#' #' @keywords internal .prep_cd_conc_cap_dict <- function( conc_assay_dt, group_cols = as.character(get_env_identifiers(c("drug_name", "drug_name2", "cellline_name"), simplify = FALSE)) ) { - + checkmate::assert_data_table(conc_assay_dt) checkmate::assert_character(group_cols) checkmate::assert_subset(group_cols, choices = names(conc_assay_dt)) - + conc <- get_env_identifiers("concentration") conc_2 <- get_env_identifiers("concentration2") - + group_cols_cd <- c(group_cols, "normalization_type") - - conc_map <- map_conc_to_standardized_conc(conc_assay_dt[[conc]], + + conc_map <- map_conc_to_standardized_conc(conc_assay_dt[[conc]], conc_assay_dt[[conc_2]]) - + conc_dict <- unique(conc_assay_dt[, c(group_cols_cd, conc, conc_2), with = FALSE]) # filter out all single-agents. conc_dict <- conc_dict[!(conc_dict[[conc]] == 0 | conc_dict[[conc_2]] == 0)] # add standardized concentration conc_dict <- merge(conc_dict, conc_map, by.x = conc, by.y = "concs") conc_dict <- merge(conc_dict, conc_map, by.x = conc_2, by.y = "concs", suffixes = c("", "_2")) - + conc_dict[["ratio"]] <- round_concentration(conc_dict[[conc_2]] / conc_dict[[conc]], ndigit = 1) conc_dict[["summed_conc"]] <- conc_dict[["rconcs"]] + conc_dict[["rconcs_2"]] - - conc_dict <- conc_dict[, .(min_conc_cd = min(summed_conc), + + conc_dict <- conc_dict[, .(min_conc_cd = min(summed_conc), max_conc_cd = max(summed_conc), N_conc = .N), by = c(group_cols_cd, "ratio")] conc_dict <- conc_dict[N_conc > 4][, N_conc := NULL] # 4 from assumption in gDRcore:::fit_combo_codilutions - + (conc_dict) } @@ -1238,37 +1238,37 @@ cap_assay_infinities <- function(conc_assay_dt, #' @param conc2 numeric vector of the concentrations for drug 2. #' #' @examples -#' +#' #' ratio <- 0.5 #' conc1 <- c(0, 10 ^ (seq(-3, 1, ratio))) -#' +#' #' shorter_range <- conc1[-1] #' noise <- runif(length(shorter_range), 1e-12, 1e-11) #' conc2 <- shorter_range + noise -#' +#' #' map_conc_to_standardized_conc(conc1, conc2) #' #' @return data.table of 2 columns named \code{"concs"} and \code{"rconcs"} -#' containing the original concentrations and their closest matched -#' standardized concentrations respectively. and their new standardized +#' containing the original concentrations and their closest matched +#' standardized concentrations respectively. and their new standardized #' concentrations. #' -#' @details The concentrations are standardized in that they will contain +#' @details The concentrations are standardized in that they will contain #' regularly spaced dilutions and close values will be rounded. #' @keywords package_utils #' @export map_conc_to_standardized_conc <- function(conc1, conc2) { # Remove single-agent. - + conc_1 <- setdiff(conc1, 0) conc_2 <- setdiff(conc2, 0) - + conc_1 <- sort(unique(conc_1)) rconc1 <- .standardize_conc(conc_1) - + conc_2 <- sort(unique(conc_2)) rconc2 <- .standardize_conc(conc_2) - + rconc <- c(0, unique(c(rconc1, rconc2))) .find_closest_match <- function(x) { rconc[which.min(abs(rconc - x))] @@ -1276,12 +1276,12 @@ map_conc_to_standardized_conc <- function(conc1, conc2) { concs <- unique(c(conc1, conc2)) mapped_rconcs <- vapply(concs, .find_closest_match, numeric(1)) map <- unique(data.table::data.table(concs = concs, rconcs = mapped_rconcs)) - + tol <- 1 - + # Check if standardized values are within 5% of the original values round_diff <- which(abs(map$concs - map$rconcs) / map$concs > 0.05) - + map$rconcs[round_diff] <- map$concs[round_diff] mismatched <- which( round_concentration(map$conc, tol) != round_concentration(map$rconc, tol) @@ -1311,7 +1311,7 @@ map_conc_to_standardized_conc <- function(conc1, conc2) { rconc <- if (S4Vectors::isEmpty(conc)) { NULL } else if (length(unique(round_concentration(conc, 3))) > 4) { - # 4 is determined by the fewest number of concentrations required to be + # 4 is determined by the fewest number of concentrations required to be # considered a "matrix". log10_step <- .calculate_dilution_ratio(conc) num_steps <- round((log10(max(conc)) - log10(min(conc)) / log10_step), 0) @@ -1330,25 +1330,25 @@ map_conc_to_standardized_conc <- function(conc1, conc2) { #' #' @param concs numeric vector of concentrations. #' -#' @return numeric value of the dilution ratio for a given set of +#' @return numeric value of the dilution ratio for a given set of #' concentrations. #' @keywords internal #' @noRd .calculate_dilution_ratio <- function(concs) { checkmate::assert_numeric(concs, min.len = 2) concs <- unique(sort(concs)) - + first_removed <- concs[-1] first_two_removed <- first_removed[-1] last_removed <- concs[-length(concs)] last_two_removed <- last_removed[-length(last_removed)] - + dil_ratios <- c( - log10(first_removed / last_removed), + log10(first_removed / last_removed), log10(first_two_removed / last_two_removed) ) rounded_dil_ratios <- round_concentration(dil_ratios, 2) - + # Get most frequent dilution ratio. highest_freq_ratio <- names( sort(table(rounded_dil_ratios), decreasing = TRUE) @@ -1358,47 +1358,47 @@ map_conc_to_standardized_conc <- function(conc1, conc2) { #' Split big table -#' -#' Helper function for saving big tables in an Excel file. Excel has a -#' sheet size limit, if the table is too large it will not be possible to save -#' such a file. This function allows you to split the table into smaller parts +#' +#' Helper function for saving big tables in an Excel file. Excel has a +#' sheet size limit, if the table is too large it will not be possible to save +#' such a file. This function allows you to split the table into smaller parts #' so that saving can be possible -#' -#' @param dt_list list of data.tables. Each data.table will be checked and +#' +#' @param dt_list list of data.tables. Each data.table will be checked and #' split if meet the criteria -#' @param max_row integer defining the maximum number of rows in one sheet, the -#' rows will be divided into portions of this size. Default value, 1 000 000, +#' @param max_row integer defining the maximum number of rows in one sheet, the +#' rows will be divided into portions of this size. Default value, 1 000 000, #' is based on excel limit - 1 048 576 with extra safety margin -#' @param max_col integer defining the maximum number of columns in one sheet, -#' the columns will be divided into portions of this size. Default value, -#' 16 000, is based on excel limit - 16 384 with extra safety margin -#' +#' @param max_col integer defining the maximum number of columns in one sheet, +#' the columns will be divided into portions of this size. Default value, +#' 16 000, is based on excel limit - 16 384 with extra safety margin +#' #' @examples #' too_large_dt <- list(data.table::data.table(matrix(seq_len(300)), nrow = 10)) #' split_big_table_for_xlsx(too_large_dt, max_row = 250) -#' +#' #' @keywords package_utils -#' -#' @return list of data.tables -#' +#' +#' @return list of data.tables +#' #' @export -#' +#' split_big_table_for_xlsx <- function(dt_list, max_row = 1000000, max_col = 16000) { - + checkmate::assert_list(dt_list) checkmate::assert_data_table(dt_list[[1]]) checkmate::assert_number(max_row, null.ok = TRUE) checkmate::assert_number(max_col, null.ok = TRUE) - + to_big_data_list <- lapply( dt_list, FUN = function(x) { c(isTRUE(NROW(x) > max_row), isTRUE(NCOL(x) > max_col)) } ) - + out_list <- list() if (any(unlist(to_big_data_list))) { for (i in seq_along(to_big_data_list)) { @@ -1427,52 +1427,52 @@ split_big_table_for_xlsx <- function(dt_list, } #' get gDR package and their version installed in the environment -#' +#' #' @param pattern string with the pattern to grep R packages from the list of installed packages -#' -#' @examples +#' +#' @examples #' get_gDR_session_info() -#' +#' #' @keywords package_utils #' @return data.table with gDR packages and their versions #' @export -#' +#' get_gDR_session_info <- function(pattern = "^gDR") { checkmate::assert_string(pattern) all_packages <- utils::installed.packages() matched_packages <- all_packages[grepl(pattern, all_packages[, "Package"]), ] - + pkg_data <- data.table::data.table( Package = matched_packages[, "Package"], Version = matched_packages[, "Version"], LibPath = matched_packages[, "LibPath"] ) - + if (NROW(pkg_data) == 0) { return(data.table::data.table(Package = character(0), Version = character(0))) } - + pkg_data[, UsedVersion := Version[order(match(LibPath, .libPaths()))[1]], by = Package] # nolint pkg_data[, MaxVersion := max(Version), by = Package] - + outdated_pkgs <- pkg_data[LibPath != .Library & UsedVersion < MaxVersion, .(Package, UsedVersion, MaxVersion)] - + if (NROW(outdated_pkgs) > 0) { warning_msg <- paste("The following packages have a user version older than the system version:", - paste(outdated_pkgs$Package, - "Used Version:", outdated_pkgs$UsedVersion, - "Highest Version:", outdated_pkgs$MaxVersion, + paste(outdated_pkgs$Package, + "Used Version:", outdated_pkgs$UsedVersion, + "Highest Version:", outdated_pkgs$MaxVersion, sep = " ", collapse = "\n"), sep = "\n") warning(warning_msg) } - + unique(pkg_data[, .(Package, Version = UsedVersion)]) } #' Average p-values using Fisher's method -#' Combines a vector of p-values into a single representative p-value. -#' It implements Fisher's method, where the test statistic is calculated as -#' \deqn{X_{2k}^2 = -2 \sum_{i=1}^{k} \ln(p_i)}. +#' Combines a vector of p-values into a single representative p-value. +#' It implements Fisher's method, where the test statistic is calculated as +#' \deqn{X_{2k}^2 = -2 \sum_{i=1}^{k} \ln(p_i)}. #' This statistic follows a chi-squared distribution with 2k degrees of freedom (where k is the number #' of p-values), from which the combined p-value is derived. #' @@ -1483,21 +1483,21 @@ get_gDR_session_info <- function(pattern = "^gDR") { #' @keywords internal average_pvalues <- function(p_values) { checkmate::assert_numeric(p_values, lower = 0, upper = 1, min.len = 1) - + p_values <- stats::na.omit(p_values) k <- length(p_values) - + if (k == 0) { return(NA) } - + if (k == 1) { return(p_values) } - + # Fisher's method formula: chi-squared statistic chi_sq_stat <- -2 * sum(log(p_values)) - + # Combined p-value from the chi-squared distribution with 2k degrees of freedom stats::pchisq(chi_sq_stat, df = 2 * k, lower.tail = FALSE) } diff --git a/R/validate_identifiers.R b/R/validate_identifiers.R index d9da8719..6e878b04 100644 --- a/R/validate_identifiers.R +++ b/R/validate_identifiers.R @@ -17,13 +17,13 @@ #' @details #' Note that this does NOT set the identifiers anywhere (i.e. environment or \code{SummarizedExperiment} object). #' If identifiers do not validate, will throw error as side effect. -#' -#' @examples +#' +#' @examples #' validate_identifiers( -#' S4Vectors::DataFrame("Barcode" = NA, "Duration" = NA, "Template" = NA, "clid" = NA), +#' S4Vectors::DataFrame("Barcode" = NA, "Duration" = NA, "Template" = NA, "clid" = NA), #' req_ids = "barcode" #' ) -#' +#' #' @export validate_identifiers <- function(df, identifiers = NULL, req_ids = NULL, exp_one_ids = NULL) { if (is.null(identifiers)) { @@ -115,13 +115,13 @@ validate_identifiers <- function(df, identifiers = NULL, req_ids = NULL, exp_one missing <- !req_ids %in% names(id_map) if (any(missing)) { stop(sprintf("required identifiers: '%s' missing in 'id_map'", - paste0(req_ids[missing], collapse = ", "))) + toString(req_ids[missing]))) } gt_one <- lengths(id_map[req_ids]) != 1L if (any(gt_one)) { stop(sprintf("more than one identifier value found for required identifiers: '%s'", - paste0(names(id_map[req_ids][gt_one]), collapse = ", "))) + toString(names(id_map[req_ids][gt_one])))) } msg <- NULL @@ -145,7 +145,7 @@ validate_identifiers <- function(df, identifiers = NULL, req_ids = NULL, exp_one msg <- NULL if (length(polymappings) > 0L) { - msg <- sprintf("more than one mapping for identifier(s): '%s'\n", paste0(names(polymappings), collapse = ", ")) + msg <- sprintf("more than one mapping for identifier(s): '%s'\n", toString(names(polymappings))) } msg } diff --git a/tests/testthat/test-assay_names.R b/tests/testthat/test-assay_names.R index 56a23727..5d2e9bce 100644 --- a/tests/testthat/test-assay_names.R +++ b/tests/testthat/test-assay_names.R @@ -8,7 +8,7 @@ test_that("get_assay_names", { expect_true(length(agan) > 2) expect_identical("character", class(agan)) expect_named(agan) - + ## subset # single value sgan <- get_assay_names(type = names(agan[1])) @@ -20,8 +20,8 @@ test_that("get_assay_names", { # multiple values sgan <- get_assay_names(type = names(agan[1:2])) expect_identical(agan[1:2], sgan) - - + + ### errors # bad value for given filter provided expect_error(get_assay_names(type = "bad_type"), @@ -32,7 +32,7 @@ test_that("get_assay_names", { get_assay_names(data_type = "inv_data_type"), "Assertion on 'inv_data_type' failed" ) - + }) test_that("get_combo_assay_names", { @@ -45,13 +45,13 @@ test_that("get_combo_assay_names", { expect_true(any(pgcan != gcan)) expect_identical(length(gcan), length(pgcan)) expect_identical(names(gcan), names(pgcan)) - + ## subset # single value sgcan <- get_combo_assay_names(type = names(gcan[1])) expect_named(sgcan) expect_true(length(sgcan) == 1) - + ### errors # bad value for given filter provided expect_error(get_combo_assay_names(type = "bad_type"), @@ -68,13 +68,13 @@ test_that("get_combo_assay_names", { expect_true(any(pgcan != gcan)) expect_identical(length(gcan), length(pgcan)) expect_identical(names(gcan), names(pgcan)) - + ## subset # single value sgcan <- get_combo_assay_names(type = names(gcan[1])) expect_named(sgcan) expect_true(length(sgcan) == 1) - + ### errors # bad value for given filter provided expect_error(get_combo_assay_names(type = "bad_type"), @@ -91,13 +91,13 @@ test_that("get_combo_base_assay_names", { expect_true(any(pgcan != gcan)) expect_identical(length(gcan), length(pgcan)) expect_identical(names(gcan), names(pgcan)) - + ## subset # single value sgcan <- get_combo_base_assay_names(type = names(gcan[1])) expect_named(sgcan) expect_true(length(sgcan) == 1) - + ### errors # bad value for given filter provided expect_error(get_combo_base_assay_names(type = "bad_type"), @@ -114,13 +114,13 @@ test_that("get_combo_score_assay_names", { expect_true(any(pgcan != gcan)) expect_identical(length(gcan), length(pgcan)) expect_identical(names(gcan), names(pgcan)) - + ## subset # single value sgcan <- get_combo_score_assay_names(type = names(gcan[1])) expect_named(sgcan) expect_true(length(sgcan) == 1) - + ### errors # bad value for given filter provided expect_error(get_combo_score_assay_names(type = "bad_type"), diff --git a/tests/testthat/test-combo.R b/tests/testthat/test-combo.R index 448e1afd..98fd33e3 100644 --- a/tests/testthat/test-combo.R +++ b/tests/testthat/test-combo.R @@ -4,21 +4,21 @@ context("combo-related functions") test_that("convert_combo_data_to_dt", { test_mae <- get_synthetic_data("finalMAE_combo_matrix_small") res_l <- convert_combo_data_to_dt(test_mae[[1]]) - + # expected assays converted exp_as <- as.character(get_combo_assay_names()) expect_identical(sort(names(res_l)), sort(exp_as)) - + # check content of data.table expect_true(nrow(res_l[[1]]) > 1 && ncol(res_l[[1]]) > 1) exp_idfs <- get_prettified_identifiers(c("drug_name", "drug_name2", "cellline"), simplify = FALSE) expect_true(all(exp_idfs %in% colnames(res_l[[1]]))) - + # errors expect_error( convert_combo_data_to_dt(data.table::data.table(a = 1)), paste("Assertion on 'se' failed: Must inherit from class 'SummarizedExperiment',", - "but has classes 'data.table','data.frame'."), + "but has classes 'data.table','data.frame'."), fixed = TRUE ) err_msg <- @@ -45,7 +45,7 @@ test_that("convert_combo_data_to_dt", { test_that("shorten_normalization_type_name", { ### expected values expect_identical("GR", shorten_normalization_type_name("GRvalue")) - + ### errors err_msg <- "Assertion on 'x' failed: Must be element of set" expect_error(shorten_normalization_type_name("invalid"), err_msg) @@ -61,9 +61,9 @@ test_that("define_matrix_grid_positions", { pos = log10(round_concentration(conc)), marks = sprintf("%.2g", conc) ) - output$pos[1] <- + output$pos[1] <- 2 * log10(round_concentration(conc))[2] - log10(round_concentration(conc))[3] - log10(1.5) - + res <- define_matrix_grid_positions(conc, conc_2) expect_is(res, "list") expect_length(res, 2) @@ -74,24 +74,24 @@ test_that("define_matrix_grid_positions", { expect_equal(res[[1]]$log10conc_1, output$log10conc) expect_equal(res[[1]]$pos_y, output$pos) expect_equal(res[[1]]$marks_y, output$marks) - + res_2 <- define_matrix_grid_positions(conc, NA) expect_is(res_2, "list") expect_length(res_2, 2) expect_equal(names(res_2), c("axis_1", "axis_2")) expect_equal(res_2$axis_1, res$axis_1) expect_equal(dim(res_2$axis_2), c(0, 4)) - + res_3 <- define_matrix_grid_positions(conc, c(1.2)) expect_is(res_3, "list") expect_length(res_3, 2) expect_equal(res_3[["axis_2"]]$pos_x, log10(1.2)) - + res_4 <- define_matrix_grid_positions(conc, c(0, 1.2)) expect_is(res_4, "list") expect_length(res_4, 2) expect_equal(res_4[["axis_2"]][conc_2 == 0, ]$pos_x, log10(1.2) - 0.5) - + expect_error(define_matrix_grid_positions(conc, LETTERS[1:5])) expect_error(define_matrix_grid_positions(NULL, conc)) }) @@ -100,7 +100,7 @@ test_that("round_concentration", { x <- c(0.00175, 0.00324, 0.0091) expect_equal(round_concentration(x), c(0.00175, 0.00324, 0.00910)) expect_equal(round_concentration(x, ndigit = 1), c(0.002, 0.003, 0.010)) - + expect_error(round_concentration(LETTERS[1:5])) expect_error(round_concentration(NULL)) expect_error(round_concentration(x, ndigit = 1.5)) diff --git a/tests/testthat/test-convert_mae_se_assay_to_dt.R b/tests/testthat/test-convert_mae_se_assay_to_dt.R index f8157fd9..2a629575 100644 --- a/tests/testthat/test-convert_mae_se_assay_to_dt.R +++ b/tests/testthat/test-convert_mae_se_assay_to_dt.R @@ -3,50 +3,50 @@ test_that("convert_se_assay_to_dt works as expected", { n <- 10 rnames <- LETTERS[1:m] cnames <- letters[1:n] - + # Normal matrix. ref_gr_value <- matrix(runif(m * n), nrow = m, ncol = n, dimnames = list(rnames, cnames)) se <- SummarizedExperiment::SummarizedExperiment(assays = list(RefGRvalue = ref_gr_value), rowData = S4Vectors::DataFrame(rnames), colData = S4Vectors::DataFrame(cnames)) - + dt <- convert_se_assay_to_dt(se = se, assay_name = "RefGRvalue", include_metadata = FALSE) expect_equal(dt$RefGRvalue, as.vector(ref_gr_value)) expect_equal(dim(dt), c(m * n, 3)) - + dt <- convert_se_assay_to_dt(se = se, assay_name = "RefGRvalue", include_metadata = TRUE) - + expect_equal(data.table::setorder(dt, cId)[["RefGRvalue"]], as.vector(ref_gr_value)) expect_equal(dim(dt), c(m * n, 5)) expect_equal(dt$rnames, as.character(dt$rId)) expect_equal(dt$cnames, as.character(dt$cId)) - + # BumpyDataFrameMatrix. df <- S4Vectors::DataFrame(r = rep(rnames, n), c = rep(cnames, m), values = runif(m * n)) - nested_rnames <- paste0("A", seq(nrow(df))) + nested_rnames <- paste0("A", seq_len(NROW(df))) rownames(df) <- nested_rnames - + norm <- BumpyMatrix::splitAsBumpyMatrix(df, row = df$r, column = df$c) se <- SummarizedExperiment::SummarizedExperiment(assays = list(norm = norm), rowData = S4Vectors::DataFrame(rnames), colData = S4Vectors::DataFrame(cnames)) dt <- convert_se_assay_to_dt(se = se, assay_name = "norm", include_metadata = FALSE) merged <- base::merge(df, S4Vectors::DataFrame(dt[, c("rId", "cId", "values")])) - + expect_equal(merged$r, merged$rId) expect_equal(merged$c, merged$cId) - - dt <- convert_se_assay_to_dt(se = se, assay_name = "norm", + + dt <- convert_se_assay_to_dt(se = se, assay_name = "norm", include_metadata = TRUE, retain_nested_rownames = TRUE) - + # Properly handles nested rownames. expect_equal(sort(dt$norm_rownames), sort(as.character(nested_rnames))) expect_true("norm_rownames" %in% colnames(dt)) - + merged <- base::merge(df, S4Vectors::DataFrame(dt[, c("rnames", "cnames", "values")])) expect_equal(merged$r, merged$rnames) expect_equal(merged$c, merged$cnames) - + # Properly drops masked values. df$normalization_type <- "value" df[1, "normalization_type"] <- NA @@ -56,7 +56,7 @@ test_that("convert_se_assay_to_dt works as expected", { colData = S4Vectors::DataFrame(cnames)) dt <- convert_se_assay_to_dt(se = se, assay_name = "norm", include_metadata = FALSE) expect_equal(NROW(dt), 199) - + df$Concentration <- runif(NROW(df)) df[2:3, "Concentration"] <- NA norm <- BumpyMatrix::splitAsBumpyMatrix(df, row = df$r, column = df$c) @@ -81,13 +81,13 @@ test_that("merge_metrics argument of assay_to_dt works as expected", { se <- SummarizedExperiment::SummarizedExperiment(assays = list(Metrics = mat), rowData = S4Vectors::DataFrame(rownames(mat)), colData = S4Vectors::DataFrame(colnames(mat))) - + obs <- convert_se_assay_to_dt(se, "Metrics") - + expect_equal(nrow(obs), m * 2) expect_true(all(colnames(get_header("metrics_names")) %in% colnames(obs))) - - # Insert random column. + + # Insert random column. metrics2 <- metrics extra_col <- "SERENA_WILLIAMS" extra_val <- rep_len(LETTERS, nrow(metrics2)) @@ -96,12 +96,12 @@ test_that("merge_metrics argument of assay_to_dt works as expected", { se2 <- SummarizedExperiment::SummarizedExperiment(assays = list(Metrics = mat2), rowData = S4Vectors::DataFrame(rownames(mat2)), colData = S4Vectors::DataFrame(colnames(mat2))) - + obs2 <- convert_se_assay_to_dt(se2, "Metrics") expect_true(extra_col %in% colnames(obs2)) expect_equal(metrics2[[extra_col]], extra_val) expect_true(all(colnames(get_header("metrics_names")) %in% colnames(obs2))) - + # unify_metadata works in covert_se_assay_to_dt se <- get_synthetic_data("finalMAE_small")[[1]][c(seq_len(3)), 1] rowData(se)$DrugName[[2]] <- rowData(se)$DrugName[[1]] @@ -116,21 +116,21 @@ test_that("convert_mae_assay_to_dt works as expected", { n <- 10 rnames <- LETTERS[1:m] cnames <- letters[1:n] - + # Normal matrix. ref_gr_value <- matrix(runif(m * n), nrow = m, ncol = n, dimnames = list(rnames, cnames)) se <- SummarizedExperiment::SummarizedExperiment(assays = list(RefGRvalue = ref_gr_value), rowData = S4Vectors::DataFrame(rnames), colData = S4Vectors::DataFrame(cnames)) - + mae <- MultiAssayExperiment::MultiAssayExperiment(experiments = list("single-agent" = se)) - + dt <- convert_mae_assay_to_dt(mae = mae, assay_name = "RefGRvalue", include_metadata = FALSE) checkmate::expect_data_table(dt) expect_equal(dt$RefGRvalue, as.vector(ref_gr_value)) expect_equal(dim(dt), c(m * n, 3)) - - dt <- suppressWarnings(convert_mae_assay_to_dt(mae = mae, assay_name = "RefGRvalue", + + dt <- suppressWarnings(convert_mae_assay_to_dt(mae = mae, assay_name = "RefGRvalue", include_metadata = FALSE, wide_structure = TRUE)) checkmate::expect_data_table(dt) expect_equal(dt$RefGRvalue, as.vector(ref_gr_value)) @@ -138,59 +138,59 @@ test_that("convert_mae_assay_to_dt works as expected", { expect_warning(convert_mae_assay_to_dt(mae = mae, assay_name = "RefGRvalue", include_metadata = FALSE, wide_structure = TRUE), "assay is not class `BumpyMatrix`, wide_structure=TRUE ignored") - + dt <- convert_se_assay_to_dt(se = se, assay_name = "RefGRvalue", include_metadata = TRUE) expect_equal(data.table::setorder(dt, cId)[["RefGRvalue"]], as.vector(ref_gr_value)) expect_equal(dim(dt), c(m * n, 5)) expect_equal(dt$rnames, as.character(dt$rId)) expect_equal(dt$cnames, as.character(dt$cId)) - + se1 <- SummarizedExperiment::SummarizedExperiment( assays = list(RefGRvalue = ref_gr_value[1:10, ]), rowData = S4Vectors::DataFrame(rnames)[1:10, , drop = FALSE], colData = S4Vectors::DataFrame(cnames)) - + se2 <- SummarizedExperiment::SummarizedExperiment( assays = list(RefGRvalue = ref_gr_value[11:20, ]), rowData = S4Vectors::DataFrame(rnames)[11:20, , drop = FALSE], colData = S4Vectors::DataFrame(cnames)) - + maeTwoExperiments <- MultiAssayExperiment::MultiAssayExperiment( experiments = list("single-agent" = se1, "combination" = se2)) - + dt1 <- convert_mae_assay_to_dt(mae = maeTwoExperiments, experiment_name = "single-agent", assay_name = "RefGRvalue", include_metadata = FALSE) checkmate::expect_data_table(dt1) expect_equal(dt1$RefGRvalue, as.vector(ref_gr_value[1:10, , drop = FALSE])) expect_equal(dim(dt1), c(m / 2 * n, 3)) - + dt1 <- convert_mae_assay_to_dt(mae = maeTwoExperiments, experiment_name = "single-agent", assay_name = "RefGRvalue", include_metadata = TRUE) checkmate::expect_data_table(dt1) - expect_equal(sort(data.table::setorder(dt1)[["RefGRvalue"]]), + expect_equal(sort(data.table::setorder(dt1)[["RefGRvalue"]]), sort(as.vector(ref_gr_value[1:10, ]))) expect_equal(dim(dt1), c(m / 2 * n, 5)) expect_equal(dt1$rnames, as.character(dt1$rId)) expect_equal(dt1$cnames, as.character(dt1$cId)) - - + + dt2 <- convert_mae_assay_to_dt(mae = maeTwoExperiments, experiment_name = "combination", assay_name = "RefGRvalue", include_metadata = FALSE) checkmate::expect_data_table(dt2) expect_equal(dt2$RefGRvalue, as.vector(ref_gr_value[11:20, , drop = FALSE])) expect_equal(dim(dt2), c(m / 2 * n, 3)) - + dt2 <- convert_mae_assay_to_dt(mae = maeTwoExperiments, experiment_name = "combination", assay_name = "RefGRvalue", include_metadata = TRUE) checkmate::expect_data_table(dt2) - expect_equal(sort(data.table::setorder(dt2)[["RefGRvalue"]]), + expect_equal(sort(data.table::setorder(dt2)[["RefGRvalue"]]), sort(as.vector(ref_gr_value[11:20, ]))) expect_equal(dim(dt2), c(m / 2 * n, 5)) expect_equal(dt2$rnames, as.character(dt2$rId)) expect_equal(dt2$cnames, as.character(dt2$cId)) - - + + dt3 <- convert_mae_assay_to_dt(mae = maeTwoExperiments, assay_name = "RefGRvalue", include_metadata = TRUE) checkmate::expect_data_table(dt3) @@ -198,28 +198,28 @@ test_that("convert_mae_assay_to_dt works as expected", { expect_equal(dim(dt), c(m * n, 5)) expect_equal(dt$rnames, as.character(dt$rId)) expect_equal(dt$cnames, as.character(dt$cId)) - + expect_warning(convert_mae_assay_to_dt(mae = maeTwoExperiments, assay_name = "Nonexistent"), "assay 'Nonexistent' was not found in any of the following experiments") - - expect_warning(convert_mae_assay_to_dt(mae = maeTwoExperiments, assay_name = "RefGRvalue", + + expect_warning(convert_mae_assay_to_dt(mae = maeTwoExperiments, assay_name = "RefGRvalue", include_metadata = TRUE, wide_structure = TRUE), "assay is not class `BumpyMatrix`, wide_structure=TRUE ignored") - - + + real_mae <- get_synthetic_data("finalMAE_small") - + dt_l <- convert_mae_assay_to_dt(mae = real_mae, assay_name = "Averaged", wide_structure = FALSE) checkmate::expect_data_table(dt_l) col_l <- c("normalization_type", "x", "x_std") expect_true(all(col_l %in% colnames(dt_l))) - + dt_w <- convert_mae_assay_to_dt(mae = real_mae, assay_name = "Averaged", wide_structure = TRUE) checkmate::expect_data_table(dt_w) col_w <- c("RelativeViability", "GRvalue", "std_RelativeViability", "std_GRvalue") expect_true(all(col_w %in% colnames(dt_w))) - + expect_equal(setdiff(colnames(dt_w), col_w), setdiff(colnames(dt_l), col_l)) expect_equal(max(dt_l[, .N, by = "normalization_type"]$N), NROW(dt_w)) }) @@ -228,63 +228,63 @@ test_that("convert_mae_assay_to_dt works as expected", { test_that("convert_se_assay_to_custom_dt works as expected", { json_path <- system.file(package = "gDRutils", "test_settings_2.json") s <- get_settings_from_json(json_path = json_path) - + se <- get_synthetic_data("finalMAE_small")[[1]] - + dt1 <- convert_se_assay_to_custom_dt(se, assay_name = "Metrics") checkmate::expect_data_table(dt1, min.rows = 2, min.cols = 2) expect_true(all(s$METRIC_WISH_LIST %in% names(dt1))) - + dt2 <- convert_se_assay_to_custom_dt(se, assay_name = "Metrics", output_table = "Metrics_raw") checkmate::expect_data_table(dt2, min.rows = 2, min.cols = 2) expect_true(all(s$METRIC_WISH_LIST %in% names(dt2))) - + dt3 <- convert_se_assay_to_custom_dt(se, assay_name = "Metrics", output_table = "Metrics_initial") checkmate::expect_data_table(dt3, min.rows = 2, min.cols = 2) expect_false(identical(dt2, dt3)) - expect_true(all(c("x_mean", "x_AOC", "x_AOC_range", "xc50", "x_max", "ec50", + expect_true(all(c("x_mean", "x_AOC", "x_AOC_range", "xc50", "x_max", "ec50", "x_inf", "x_0", "h", "r2", "x_sd_avg", "fit_type") %in% names(dt3))) - + dt4 <- convert_se_assay_to_custom_dt(se, assay_name = "Averaged") checkmate::expect_data_table(dt4, min.rows = 2, min.cols = 2) expect_true( all(c("GR value", "Relative Viability", "Std GR value", "Std Relative Viability") %in% names(dt4))) - + dt5 <- convert_se_assay_to_custom_dt(se, assay_name = "Averaged", output_table = "Averaged") expect_true(identical(dt4, dt5)) - + se2 <- get_synthetic_data("finalMAE_combo_matrix")[[1]] dt6 <- convert_se_assay_to_custom_dt(se2, assay_name = "Metrics") checkmate::expect_data_table(dt6, min.rows = 2, min.cols = 2) expect_true(all(s$METRIC_WISH_LIST %in% names(dt6))) - + dt7 <- convert_se_assay_to_custom_dt(se2, assay_name = get_combo_assay_names()[1]) checkmate::expect_data_table(dt7, min.rows = 2, min.cols = 2) expect_true(all(names(get_combo_excess_field_names()) %in% names(dt7))) - + expect_error(convert_se_assay_to_custom_dt(as.list(se), assay_name = "Metrics")) expect_error(convert_se_assay_to_custom_dt(se, assay_name = "Averaged", output_table = "Metrics_raw")) expect_error(convert_se_assay_to_custom_dt(se, "xxx")) expect_error(convert_se_assay_to_custom_dt(se, "Metrics", "xxx")) - + dt_metrics_uncapped <- convert_se_assay_to_custom_dt(se, assay_name = "Metrics", cap_values = FALSE) dt_metrics_capped <- convert_se_assay_to_custom_dt(se, assay_name = "Metrics", cap_values = TRUE) - + checkmate::expect_data_table(dt_metrics_capped) expect_false(identical(dt_metrics_uncapped, dt_metrics_capped)) - + manually_capped_dt <- capVals(dt_metrics_uncapped) expect_identical(dt_metrics_capped, manually_capped_dt) - + dt_metrics_default <- convert_se_assay_to_custom_dt(se, assay_name = "Metrics") expect_identical(dt_metrics_default, dt_metrics_uncapped) - + dt_averaged_no_cap <- convert_se_assay_to_custom_dt(se, assay_name = "Averaged", cap_values = FALSE) dt_averaged_cap_ignored <- convert_se_assay_to_custom_dt(se, assay_name = "Averaged", cap_values = TRUE) - + expect_identical(dt_averaged_no_cap, dt_averaged_cap_ignored) }) @@ -325,15 +325,15 @@ test_that("capVals works as expected", { attr(dt2c, "index") <- NULL attr(dt2c_2, "index") <- NULL attr(dt3c, "index") <- NULL - + expect_false(identical(dt1c, dt1)) expect_identical(dt2c, dt2) expect_identical(dt2c_2, dt2[, 1:4]) expect_identical(dt3c, dt3) - + # values are capped correctly expect_equal(dt1c, dt2) - + expect_error(capVals(as.list(dt1)), "Must be a data.table") }) @@ -345,18 +345,18 @@ test_that("update_drug_name works as expected", { Var2 = c(NA, "Z", "W") ) additional_vars <- c("Var1", "Var2") - + dt_updated <- update_drug_name(dt, additional_vars) - + expect_equal(dt_updated[1, ]$DrugName, "D1 (Var1 = X)") expect_equal(dt_updated[1, ]$Gnumber, "G1 (Var1 = X)") - + expect_equal(dt_updated[2, ]$DrugName, "D2 (Var2 = Z)") expect_equal(dt_updated[2, ]$Gnumber, "G2 (Var2 = Z)") - + expect_equal(dt_updated[3, ]$DrugName, "D3 (Var1 = Y) (Var2 = W)") expect_equal(dt_updated[3, ]$Gnumber, "G3 (Var1 = Y) (Var2 = W)") - + expect_warning(update_drug_name(dt, c("Var1", "NonExistent")), "Additional variable 'NonExistent'") }) @@ -364,11 +364,11 @@ test_that("convert_se_assay_to_dt merges additional variables", { m <- 4 n <- 1 ref_gr_value <- matrix(runif(m * n), nrow = m, ncol = n, dimnames = list(LETTERS[1:m], "c1")) - + rData_with_extra <- S4Vectors::DataFrame( rId = LETTERS[1:m], - Gnumber = LETTERS[1:m], - DrugName = paste0("Drug_", LETTERS[1:m]), + Gnumber = LETTERS[1:m], + DrugName = paste0("Drug_", LETTERS[1:m]), Plate_number = c("B1", NA, "B3", NA) ) se <- SummarizedExperiment::SummarizedExperiment( @@ -376,16 +376,16 @@ test_that("convert_se_assay_to_dt merges additional variables", { rowData = rData_with_extra, colData = S4Vectors::DataFrame(cnames = "c1") ) - + dt_merged <- convert_se_assay_to_dt( - se = se, - assay_name = "RefGRvalue", - include_metadata = TRUE, + se = se, + assay_name = "RefGRvalue", + include_metadata = TRUE, merge_additional_variables = TRUE ) - + expect_equal(dt_merged[rId == "A"]$DrugName, "Drug_A (Plate_number = B1)") expect_equal(dt_merged[rId == "A"]$Gnumber, "A (Plate_number = B1)") - + expect_equal(dt_merged[rId == "B"]$DrugName, "Drug_B") }) \ No newline at end of file diff --git a/tests/testthat/test-duplicates.R b/tests/testthat/test-duplicates.R index e25ca90e..ee6a6c3e 100644 --- a/tests/testthat/test-duplicates.R +++ b/tests/testthat/test-duplicates.R @@ -1,16 +1,16 @@ test_that("has_dt_duplicated_rows works as expected", { - + dt_iris <- data.table::data.table(iris) expect_true(has_dt_duplicated_rows(dt_iris)) expect_false(has_dt_duplicated_rows(dt_iris[1:100, ])) - + expect_true(has_dt_duplicated_rows(dt_iris[1:10, ], col_names = c("Sepal.Length", "Species"))) expect_error(has_dt_duplicated_rows(iris), "Assertion on 'dt' failed") expect_error( has_dt_duplicated_rows(dt_iris, col_names = "invalid_value"), "Assertion on 'col_names' failed" ) - + }) test_that("get_duplicated_rows works as expected", { @@ -19,8 +19,8 @@ test_that("get_duplicated_rows works as expected", { "Gnumber_2" = c("G9876543.1-1", "G9876543.1-1", "G9876543.1-1"), "DrugName_2" = c("codrug_name1", "codrug_name1", "codrug_name1"), "Concentration_2" = c("untreated", "untreated", "untreated")) - - + + # single column expect_equal( get_duplicated_rows(DF1co, col_names = "DrugName"), @@ -46,20 +46,20 @@ test_that("get_duplicated_rows works as expected", { get_duplicated_rows(DF1co, col_names = "DrugName", output = "data"), DF1co[1:2, ] ) - + expect_error(get_duplicated_rows(DF1co, c("DrugName", "Fake Column")), "Assertion on 'all(col_names %in% colnames(x))' failed: Must be TRUE.", fixed = TRUE) }) test_that("[has|get]_assay_dt_duplicated_rows works as expected", { - + # single-agent data sdata <- get_synthetic_data("finalMAE_small") smetrics_data <- convert_se_assay_to_dt(sdata[[get_supported_experiments("sa")]], "Metrics") smetrics_dup_data <- rbind(smetrics_data, smetrics_data[1:10, ]) expect_false(has_assay_dt_duplicated_rows(smetrics_data)) expect_true(has_assay_dt_duplicated_rows(smetrics_dup_data)) - + expect_equal(get_assay_dt_duplicated_rows(smetrics_dup_data), c(1:10, 201:210)) expect_equal( @@ -70,14 +70,14 @@ test_that("[has|get]_assay_dt_duplicated_rows works as expected", { empty_dt <- get_assay_dt_duplicated_rows(smetrics_data, output = "data") expect_true(nrow(empty_dt) == 0) expect_is(empty_dt, "data.table") - + # combo data cdata <- get_synthetic_data("finalMAE_combo_matrix_small") cscores_data <- convert_se_assay_to_dt(cdata[[get_supported_experiments("combo")]], "scores") cscores_dup_data <- rbind(cscores_data, cscores_data[1:10, ]) expect_false(has_assay_dt_duplicated_rows(cscores_data)) expect_true(has_assay_dt_duplicated_rows(cscores_dup_data)) - + expect_equal(get_assay_dt_duplicated_rows(cscores_dup_data), c(1:10, 25:34)) expect_equal( @@ -91,11 +91,11 @@ test_that("[has|get]_assay_dt_duplicated_rows works as expected", { }) test_that("throw_msg_if_duplicates works as expected", { - + sdata <- get_synthetic_data("finalMAE_small") smetrics_data <- convert_se_assay_to_dt(sdata[[get_supported_experiments("sa")]], "Metrics") smetrics_dup_data <- rbind(smetrics_data, smetrics_data[1:10, ]) - + exp_msg <- "rows are duplicated" expect_error(throw_msg_if_duplicates(smetrics_dup_data, "Metrics"), exp_msg) expect_warning(throw_msg_if_duplicates(smetrics_dup_data, "Metrics", msg_f = warning), exp_msg) diff --git a/tests/testthat/test-experiment_validators.R b/tests/testthat/test-experiment_validators.R index 5b6c0967..e78771d0 100644 --- a/tests/testthat/test-experiment_validators.R +++ b/tests/testthat/test-experiment_validators.R @@ -4,7 +4,7 @@ test_that("validate_dimnames works as expected", { expect_error(validate_dimnames(data.frame(a = 2, b = 3), data.frame(a = 3, c = 4)), err_msg1, fixed = TRUE) expect_error(validate_dimnames(data.frame(a = 2, b = 3), data.frame(a = 3, b = 4, c = 4)), err_msg1, fixed = TRUE) expect_error(validate_dimnames(data.frame(), data.frame(a = 3, b = 4, c = 4)), err_msg1, fixed = TRUE) - + expect_null(validate_dimnames(data.frame(a = 2, b = 3), data.frame(a = 3, b = 4))) expect_null(validate_dimnames(data.frame(), data.frame())) }) @@ -18,18 +18,18 @@ test_that("validate_se_assay_name works as expected", { test_that("validate_SE works as expected", { se1 <- SummarizedExperiment::SummarizedExperiment(assays = list("orange" = matrix(1, 1, 1))) expect_error(validate_SE(se1)) - x <- IRanges::NumericList(split(runif(1000), factor(sample(50, 1000, replace = TRUE), 1:50))) - se2 <- SummarizedExperiment::SummarizedExperiment(assays = + x <- IRanges::NumericList(split(runif(1000), factor(sample(50, 1000, replace = TRUE), 1:50))) + se2 <- SummarizedExperiment::SummarizedExperiment(assays = list("RawTreated" = BumpyMatrix::BumpyMatrix(x, c(10, 5)), "Controls" = BumpyMatrix::BumpyMatrix(x, c(10, 5)), "Normalized" = BumpyMatrix::BumpyMatrix(x, c(10, 5)), "RefGRvalue" = BumpyMatrix::BumpyMatrix(x, c(10, 5)), - "RefRelativeViability" = + "RefRelativeViability" = BumpyMatrix::BumpyMatrix(x, c(10, 5)), "DivisionTime" = BumpyMatrix::BumpyMatrix(x, c(10, 5)), "Averaged" = BumpyMatrix::BumpyMatrix(x, c(10, 5)), "Metrics" = BumpyMatrix::BumpyMatrix(x, c(10, 5)))) - + expect_error(validate_SE(se2)) S4Vectors::metadata(se2) <- vector(mode = "list", length = 7) names(S4Vectors::metadata(se2)) <- c("experiment_metadata", @@ -39,7 +39,7 @@ test_that("validate_SE works as expected", { "drug_combinations", ".internals") expect_error(validate_SE(se2)) - + }) @@ -61,32 +61,32 @@ test_that("validate_mae works as expected", { n <- 10 rnames <- LETTERS[1:m] cnames <- letters[1:n] - + ref_gr_value <- matrix(runif(m * n), nrow = m, ncol = n, dimnames = list(rnames, cnames)) se1 <- SummarizedExperiment::SummarizedExperiment(assays = list(RefGRvalue = ref_gr_value[1:10, ]), rowData = S4Vectors::DataFrame(rnames)[1:10, , drop = FALSE], colData = S4Vectors::DataFrame(cnames)) - + se2 <- SummarizedExperiment::SummarizedExperiment(assays = list(RefGRvalue = ref_gr_value[11:20, ]), rowData = S4Vectors::DataFrame(rnames)[11:20, , drop = FALSE], colData = S4Vectors::DataFrame(cnames)) - + mae <- MultiAssayExperiment::MultiAssayExperiment(experiments = list(one = se1, two = se2)) - + expect_error(validate_MAE(mae)) - x <- IRanges::NumericList(split(runif(1000), factor(sample(50, 1000, replace = TRUE), 1:50))) - se3 <- SummarizedExperiment::SummarizedExperiment(assays = + x <- IRanges::NumericList(split(runif(1000), factor(sample(50, 1000, replace = TRUE), 1:50))) + se3 <- SummarizedExperiment::SummarizedExperiment(assays = list("RawTreated" = BumpyMatrix::BumpyMatrix(x, c(10, 5)), "Controls" = BumpyMatrix::BumpyMatrix(x, c(10, 5)), "Normalized" = BumpyMatrix::BumpyMatrix(x, c(10, 5)), "RefGRvalue" = BumpyMatrix::BumpyMatrix(x, c(10, 5)), - "RefRelativeViability" = + "RefRelativeViability" = BumpyMatrix::BumpyMatrix(x, c(10, 5)), "DivisionTime" = BumpyMatrix::BumpyMatrix(x, c(10, 5)), "Averaged" = BumpyMatrix::BumpyMatrix(x, c(10, 5)), "Metrics" = BumpyMatrix::BumpyMatrix(x, c(10, 5)))) - + colData(se3) <- methods::new( "DFrame", rownames = c("A", "B", "C", "D", "E"), @@ -96,9 +96,9 @@ test_that("validate_mae works as expected", { elementMetadata = NULL, metadata = list() ) - + mae2 <- MultiAssayExperiment::MultiAssayExperiment(experiments = list("single-agent" = se3)) - + maeReal <- get_synthetic_data("finalMAE_small") validate_MAE(maeReal) maeReal2 <- MultiAssayExperiment::MultiAssayExperiment(experiments = list("single-agent" = maeReal[[1]], diff --git a/tests/testthat/test-fit_curves.R b/tests/testthat/test-fit_curves.R index 0079850a..c070f817 100644 --- a/tests/testthat/test-fit_curves.R +++ b/tests/testthat/test-fit_curves.R @@ -31,7 +31,7 @@ test_that("NA values are handled correctly", { df_resp_NA <- df_resp df_resp_NA[, "x"] <- NA expect_warning(fit_curves(df_resp_NA, series_identifiers = "Concentration")) - + df_result_NA <- purrr::quietly(fit_curves)(df_resp_NA, series_identifiers = "Concentration") expect_length(df_result_NA$warnings, 2) expect_true(all(is.na(df_result_NA$result[, "xc50"]))) @@ -269,7 +269,7 @@ test_that(".estimate_xc50 works as expected", { test_that("average_dups works as expected", { df <- data.table::data.table(concs = rep(seq(5), each = 2), norm_value = seq(10)) - expect_equal(average_dups(df, "concs"), + expect_equal(average_dups(df, "concs"), data.table::data.table(concs = seq(5), norm_value = seq(1.5, 9.5, 2))) }) @@ -372,7 +372,7 @@ test_that(".calculate_xc50 works as expected", { test_that("cap_xc50 works as expected", { expect_error(cap_xc50(xc50 = c(1, 2), max_conc = c(10, 10), capping_fold = 5)) - + expect_equal(cap_xc50(xc50 = 26, max_conc = 5, capping_fold = 5), Inf) expect_equal(cap_xc50(xc50 = 1e-6, max_conc = 5, capping_fold = 5), -Inf) expect_equal(cap_xc50(xc50 = 1, max_conc = 5, capping_fold = 5), 1) @@ -385,13 +385,13 @@ test_that("predict_efficacy_from_conc works as expected", { x_0 <- 1 ec50 <- 0.5 conc <- c(0, 10 ^ (seq(-3, 1, 0.5))) - + out <- predict_efficacy_from_conc(conc, x_inf, x_0, ec50, h) - - res <- c(x_0, - vapply(conc[2:NROW(conc)], + + res <- c(x_0, + vapply(conc[2:NROW(conc)], function(c) x_inf + (x_0 - x_inf) / (1 + (c / ec50) ^ h), numeric(1))) - + expect_equal(out, res) }) @@ -406,26 +406,26 @@ test_that("predict_smooth_from_combo works as expected", { x_inf = c(0.1, 0.2, 0.1, 0.3, 0.15), x_0 = c(1, 1, 1, 1, 1) ) - + on_grid_pred <- predict_smooth_from_combo(conc_1 = 1, conc_2 = 10, metrics_merged = metrics) - - c1_ongrid <- 0.2 + (1 - 0.2) / (1 + (1 / 1.5)^2) - c2_ongrid <- 0.3 + (1 - 0.3) / (1 + (10 / 6)^2) + + c1_ongrid <- 0.2 + (1 - 0.2) / (1 + (1 / 1.5)^2) + c2_ongrid <- 0.3 + (1 - 0.3) / (1 + (10 / 6)^2) c3_ongrid <- 0.15 + (1 - 0.15) / (1 + ((1 + 10) / 10)^2) expected_val_ongrid <- mean(c(c1_ongrid, c2_ongrid, c3_ongrid)) expect_equal(on_grid_pred, expected_val_ongrid, tolerance = 1e-4) - + expect_message( snapped_pred <- predict_smooth_from_combo(conc_1 = 1.1, conc_2 = 9.8, metrics_merged = metrics), "Using models for nearest concentrations" ) - + c1_snapped <- 0.2 + (1 - 0.2) / (1 + (1.1 / 1.5)^2) # Use model for cotrt=10, predict at conc=1.1 c2_snapped <- 0.3 + (1 - 0.3) / (1 + (9.8 / 6)^2) # Use model for cotrt=1, predict at conc=9.8 c3_snapped <- 0.15 + (1 - 0.15) / (1 + ((1.1 + 9.8) / 10)^2) # Use codilution model, predict at sum=10.9 expected_val_snapped <- mean(c(c1_snapped, c2_snapped, c3_snapped)) expect_equal(snapped_pred, expected_val_snapped, tolerance = 1e-4) - + bad_metrics <- metrics[, -c("ec50")] expect_error( predict_smooth_from_combo(conc_1 = 1, conc_2 = 10, metrics_merged = bad_metrics), @@ -435,19 +435,19 @@ test_that("predict_smooth_from_combo works as expected", { test_that(".snap_conc_to_model works as expected", { available <- c(0.1, 0.3, 1, 3, 10) - + # Snaps to the closest value on a log scale. expect_equal(.snap_conc_to_model(user_conc = 1.1, available_concs = available), 1) expect_equal(.snap_conc_to_model(user_conc = 0.2, available_concs = available), 0.3) expect_equal(.snap_conc_to_model(user_conc = 0.1, available_concs = available), 0.1) - + # Snaps to min/max when outside the range. expect_equal(.snap_conc_to_model(user_conc = 0.01, available_concs = available), 0.1) expect_equal(.snap_conc_to_model(user_conc = 100, available_concs = available), 10) - + # Handles empty input. expect_true(is.na(.snap_conc_to_model(user_conc = 1, available_concs = numeric(0)))) - + expect_error( .snap_conc_to_model(user_conc = c(1, 2), available_concs = available), "Assertion on 'user_conc' failed: Must have length 1." diff --git a/tests/testthat/test-flatten.R b/tests/testthat/test-flatten.R index 7e204d87..825cf0c0 100644 --- a/tests/testthat/test-flatten.R +++ b/tests/testthat/test-flatten.R @@ -35,7 +35,7 @@ test_that("flatten works as expected", { expect_setequal(colnames(out3), c("id", "id2", "constant", "RV_GDS_wide", "GR_GDR_wide", "RV_GDR_wide")) - + repgrid3$x <- runif(nrow(repgrid3)) out4 <- flatten(repgrid3, groups = "normalization_type", wide_cols = "x") expect_setequal(colnames(out4), diff --git a/tests/testthat/test-headers.R b/tests/testthat/test-headers.R index 5193b403..17a24454 100644 --- a/tests/testthat/test-headers.R +++ b/tests/testthat/test-headers.R @@ -1,20 +1,19 @@ test_that("get_header works", { reset_env_identifiers() - + expect_error(get_header("BOGUS")) expect_equal(get_header("manifest"), list(barcode = c("Barcode", "Plate"), template = c("Template", "Treatment"), duration = "Duration")) expect_equal(length(get_header()), 23) - + set_env_identifier("duration", "TEST_DURATION") expect_equal(get_header("manifest"), list(barcode = c("Barcode", "Plate"), - template = c("Template", "Treatment"), + template = c("Template", "Treatment"), duration = "TEST_DURATION")) - + reset_env_identifiers() expect_equal(get_header("manifest"), list(barcode = c("Barcode", "Plate"), - template = c("Template", "Treatment"), + template = c("Template", "Treatment"), duration = "Duration")) }) - diff --git a/tests/testthat/test-json_const.R b/tests/testthat/test-json_const.R index d8241d28..a381665f 100644 --- a/tests/testthat/test-json_const.R +++ b/tests/testthat/test-json_const.R @@ -10,7 +10,7 @@ test_that("get_settings_from_json works as expected", { get_settings_from_json(s = "no_such_entry", json_path = json_path), "Assertion on 's' failed" ) - + s <- get_settings_from_json(json_path = json_path) expect_true(is.list(s)) exp_names <- @@ -22,10 +22,10 @@ test_that("get_settings_from_json works as expected", { expect_identical(sort(names(s)), sort(exp_names)) expect_true(is.logical(s$DICT_WITH_LOGICAL$axisTitleText)) expect_identical(s$DICT_WITH_LISTS$`GR AOC within set range`, c(0L, 1L)) - + s2 <- get_settings_from_json(s = "DICT", json_path = json_path) expect_identical(s$DICT, s2) - + expect_no_error(get_settings_from_json()) }) @@ -35,14 +35,14 @@ test_that("get_isobologram_columns as expected", { expect_equal( get_isobologram_columns(), c("Iso_Level", "Pos_x", "Pos_x_Ref", "Pos_y", "Pos_y_Ref", "Log10_Ratio_Conc", "Log2_CI")) - + expect_equal( get_isobologram_columns(prettify = FALSE), c("iso_level", "pos_x", "pos_x_ref", "pos_y", "pos_y_ref", "log10_ratio_conc", "log2_CI")) - + expect_equal(get_isobologram_columns("iso_level", prettify = TRUE), "Iso_Level") expect_equal(get_isobologram_columns("iso_level", prettify = FALSE), "iso_level") - + expect_error(get_isobologram_columns(1)) expect_error(get_isobologram_columns(prettify = 2)) }) diff --git a/tests/testthat/test-json_convert.R b/tests/testthat/test-json_convert.R index dc6dad91..59298713 100644 --- a/tests/testthat/test-json_convert.R +++ b/tests/testthat/test-json_convert.R @@ -79,4 +79,3 @@ test_that("strip_first_and_last_char works as expected", { expect_equal(strip_first_and_last_char("hello"), "ell") expect_equal(strip_first_and_last_char("{}"), "") }) - diff --git a/tests/testthat/test-merge_SE.R b/tests/testthat/test-merge_SE.R index 25671de2..0901aa88 100644 --- a/tests/testthat/test-merge_SE.R +++ b/tests/testthat/test-merge_SE.R @@ -32,7 +32,7 @@ test_that("merge_metadata and identify_unique_se_metadata_fields work as expecte if ("experiment_metadata" %in% names(mergedMetadata)) { expect_true("sources" %in% names(mergedMetadata$experiment_metadata)) } - + listSE2 <- listSE newMetaName <- "dummy_meta" S4Vectors::metadata(listSE2$combo1)[[newMetaName]] <- list() @@ -48,10 +48,10 @@ test_that("merge_SE works as expected", { checkmate::expect_class(mergedSE$result, "SummarizedExperiment") S4Vectors::metadata(mergedSE$result)[["df_raw_data"]] <- list(NULL) validate_SE(mergedSE$result) - + additional_col_name <- "QCS" mergedSE2 <- purrr::quietly(merge_SE)(listSE, additional_col_name) - assayNormalized <- convert_se_assay_to_dt(mergedSE2$result, "Metrics") + assayNormalized <- convert_se_assay_to_dt(mergedSE2$result, "Metrics") expect_true(additional_col_name %in% names(assayNormalized)) expect_identical(unique(assayNormalized[[additional_col_name]]), names(listSE)) expect_identical(SummarizedExperiment::assayNames(listSE[[1]]), @@ -65,10 +65,10 @@ test_that("merge_SE works as expected with combo matrix data", { checkmate::expect_class(mergedSE$result, "SummarizedExperiment") S4Vectors::metadata(mergedSE$result)[["df_raw_data"]] <- list(NULL) validate_SE(mergedSE$result) - + additional_col_name <- "QCS" mergedSE2 <- purrr::quietly(merge_SE)(listSE2, additional_col_name) - assayNormalized <- convert_se_assay_to_dt(mergedSE2$result, "Metrics") + assayNormalized <- convert_se_assay_to_dt(mergedSE2$result, "Metrics") expect_true(additional_col_name %in% names(assayNormalized)) expect_identical(unique(assayNormalized[[additional_col_name]]), names(listSE)) expect_identical(SummarizedExperiment::assayNames(listSE2[[1]]), @@ -93,30 +93,30 @@ test_that("merge_SE works with data with additional perturbations", { test_that("merge_MAE works as expected with synthetic metadata injection", { custom_title <- "Unit Test Merged MAE" custom_source_id <- "test_dataset_001" - + mergedMAE <- purrr::quietly(merge_MAE)( - listMAE, + listMAE, title = custom_title, source_id = custom_source_id ) checkmate::expect_class(mergedMAE$result, "MultiAssayExperiment") validate_MAE(mergedMAE$result) - + mae_meta <- S4Vectors::metadata(mergedMAE$result) if (!is.null(mae_meta$.internal$DataSetDB$dataset)) { expect_equal(mae_meta$.internal$DataSetDB$dataset$title, custom_title) expect_equal(mae_meta$.internal$DataSetDB$dataset$sources[[1]]$id, custom_source_id) } - + se_meta <- S4Vectors::metadata(mergedMAE$result[[1]])$experiment_metadata expect_equal(se_meta$title, custom_title) expect_equal(se_meta$sources[[1]]$id, custom_source_id) - + expect_identical( SummarizedExperiment::assayNames(MultiAssayExperiment::experiments(listMAE[[1]])[[1]]), SummarizedExperiment::assayNames(MultiAssayExperiment::experiments(mergedMAE$result)[[1]]) ) - + listMAE_mixed <- listMAE MultiAssayExperiment::experiments(listMAE_mixed[[1]]) <- MultiAssayExperiment::experiments(listMAE_mixed[[1]])[2] mergedMAE2 <- purrr::quietly(merge_MAE)(listMAE_mixed) diff --git a/tests/testthat/test-prettify.R b/tests/testthat/test-prettify.R index e07dc3af..3925930e 100644 --- a/tests/testthat/test-prettify.R +++ b/tests/testthat/test-prettify.R @@ -1,28 +1,28 @@ test_that("prettify_flat_metrics works as expected", { x <- c("CellLineName", "Tissue", "Primary Tissue", - "GR_gDR_x_mean", "GR_gDR_xc50", - "RV_GDS_x_mean", + "GR_gDR_x_mean", "GR_gDR_xc50", + "RV_GDS_x_mean", "Concentration_2", "Gnumber_2", "Drug_3", "E_0", "GR_gDR_x_AOC_range" ) - + y <- c("Gnumber", "Gnumber_2", "MyDrug", "MyDrug_2") - + obs <- prettify_flat_metrics(x, human_readable = FALSE) exp <- c("CellLineName", "Tissue", "Primary Tissue", - "GR_mean", "GR50", - "GDS_RV_mean", + "GR_mean", "GR50", + "GDS_RV_mean", "Concentration_2", "Gnumber_2", "Drug_3", "E_0", "GR_AOC_range") expect_equal(obs, exp) - + # Human readable names work. obs <- prettify_flat_metrics(x, human_readable = TRUE) exp <- c("Cell Line Name", "Tissue", "Primary Tissue", - "GR Mean", "GR50", + "GR Mean", "GR50", "RV Mean (GDS)", "Concentration 2", "Gnumber 2", "Drug 3", "E0", "GR AOC within set range") @@ -33,12 +33,12 @@ test_that("prettify_flat_metrics works as expected", { }) test_that(".convert_norm_specific_metrics works as expected", { - + idfs <- get_env_identifiers(k = NULL, simplify = TRUE) norm_type <- c("GR", "RV") expect_equal(.convert_norm_specific_metrics(idfs, norm_type), idfs) expect_equal(.convert_norm_specific_metrics(idfs, c("AB", "BC")), idfs) - + col_name_1 <- c( "rId", "cId", "GR_gDR_x_mean", "GR_gDR_x_AOC", "GR_gDR_x_AOC_range", "GR_gDR_xc50", "GR_gDR_x_max", "GR_gDR_ec50", "GR_gDR_x_inf", "GR_gDR_x_0", "GR_gDR_h", "GR_gDR_r2", @@ -60,8 +60,8 @@ test_that(".convert_norm_specific_metrics works as expected", { "_gDR_fit_type_RV" ) expect_equal(.convert_norm_specific_metrics(col_name_1, norm_type), col_name_1_exp) - - + + col_name_2 <- c( "rId", "cId", "Concentration", "Gnumber", "DrugName", "drug_moa", "Duration", "clid", "CellLineName", "Tissue", "parental_identifier", "subtype", @@ -69,7 +69,7 @@ test_that(".convert_norm_specific_metrics works as expected", { ) expect_equal(.convert_norm_specific_metrics(col_name_2, norm_type), col_name_2) expect_equal(.convert_norm_specific_metrics(col_name_2, c("AB", "BC")), col_name_2) - + col_name_combo_1 <- c( "rId", "cId", "Gnumber", "DrugName", "drug_moa", "Gnumber_2", "DrugName_2", "drug_moa_2", "Duration", "clid", "CellLineName", "Tissue", @@ -78,7 +78,7 @@ test_that(".convert_norm_specific_metrics works as expected", { ) expect_equal(.convert_norm_specific_metrics(col_name_combo_1, norm_type), col_name_combo_1) expect_equal(.convert_norm_specific_metrics(col_name_combo_1, c("AB", "BC")), col_name_combo_1) - + col_name_combo_2 <- c( "rId", "cId", "Concentration", "Concentration_2", "Gnumber", "DrugName", "drug_moa", "Gnumber_2", "DrugName_2", "drug_moa_2", "Duration", "clid", @@ -87,7 +87,7 @@ test_that(".convert_norm_specific_metrics works as expected", { ) expect_equal(.convert_norm_specific_metrics(col_name_combo_2, norm_type), col_name_combo_2) expect_equal(.convert_norm_specific_metrics(col_name_combo_2, c("AB", "BC")), col_name_combo_2) - + col_name_3 <- c( "GR_gDR_x_max", "GR_gDR_ec50", "smooth_GR", "bliss_excess_GR", "RV_gDR_x_max", "RV_gDR_ec50", "smooth_RV", "bliss_excess_RV" @@ -97,7 +97,7 @@ test_that(".convert_norm_specific_metrics works as expected", { "_gDR_E_max", "_gDR_EC50", "smooth_RV", "bliss_excess_RV" ) expect_equal(.convert_norm_specific_metrics(col_name_3, c("AB", "RV")), col_name_3_exp) - + }) test_that(".prettify_metadata_columns works as expected", { @@ -108,10 +108,9 @@ test_that(".prettify_metadata_columns works as expected", { ) col_name_exp <- c( "EC50", "E Inf", "E 0", "h RV", - "Reference Division Time", "Relative Viability", "GRvalue", "Std Relative Viability", "Std GRvalue", + "Reference Division Time", "Relative Viability", "GRvalue", "Std Relative Viability", "Std GRvalue", "Smooth GR", "HSA Excess GR", "Bliss Excess GR" ) expect_equal(.prettify_metadata_columns(col_name), col_name_exp) - -}) +}) diff --git a/tests/testthat/test-se_metadata.R b/tests/testthat/test-se_metadata.R index 32144bfc..9178edd3 100644 --- a/tests/testthat/test-se_metadata.R +++ b/tests/testthat/test-se_metadata.R @@ -7,13 +7,13 @@ test_that("get_SE_experiment_metadata and set_SE_experiment_metadata work as exp se <- set_SE_experiment_metadata(se, exp_md) oexp_md <- get_SE_experiment_metadata(se) expect_equal(oexp_md, exp_md) - + append_md_iga <- list("Iga" = "Swiatek") se <- set_SE_experiment_metadata(se, append_md_iga, append = TRUE) oexp_md <- get_SE_experiment_metadata(se) expected_md_append <- list("Super" = "Star", "Serena" = "Williams", "Iga" = "Swiatek") expect_equal(oexp_md, expected_md_append) - + overwrite_md_iga <- list("Iga" = "Swiatek") expect_warning(se <- set_SE_experiment_metadata(se, overwrite_md_iga, @@ -51,9 +51,9 @@ test_that("get_SE_fit_parameters and set_SE_fit_parameters work as expected", { cap = 0.2) se <- SummarizedExperiment::SummarizedExperiment(metadata = list()) fit_params <- get_SE_fit_parameters(se) - + expect_equal(fit_params, NULL) - + se <- set_SE_fit_parameters(se, params) expect_equal(get_SE_fit_parameters(se), params) }) @@ -65,18 +65,18 @@ test_that("get_SE_identifiers and set_SE_identifiers works as expected", { # No identifier passed. obs <- get_SE_identifiers(se, simplify = TRUE) expect_equal(obs, exp) - + # Single identifier. obs <- get_SE_identifiers(se, "cellline_name", simplify = TRUE) expect_equal(obs, exp[["cellline_name"]]) - + # Invalid identifier. exp <- list("drug" = "drug", "celllinename" = "CellLineName", "buggy_idfs" = "test", "masked_tag" = "masked") se <- SummarizedExperiment::SummarizedExperiment(metadata = list(identifiers = exp)) expect_equal(get_SE_identifiers(se), exp, simplify = FALSE) - expect_error(get_SE_identifiers(se, "buggy_idfs", simplify = TRUE), + expect_error(get_SE_identifiers(se, "buggy_idfs", simplify = TRUE), "Assertion on 'id_type' failed: Must be element of set") - expect_error(get_SE_identifiers(se, "INVALID", simplify = TRUE), + expect_error(get_SE_identifiers(se, "INVALID", simplify = TRUE), "Assertion on 'id_type' failed: Must be element of set") # Identifier does not exist on the SummarizedExperiment, @@ -95,9 +95,9 @@ test_that("get_SE_identifiers and set_SE_identifiers works as expected", { # Multiple identifiers. exp <- list("drug_name" = "Drugs", "cellline_name" = "Cells", "duration" = "Duration") se <- SummarizedExperiment::SummarizedExperiment(metadata = list(identifiers = exp)) - expect_equal(get_SE_identifiers(se, c("drug_name", "duration"), simplify = FALSE), + expect_equal(get_SE_identifiers(se, c("drug_name", "duration"), simplify = FALSE), list(drug_name = "Drugs", duration = "Duration")) # Env and se identifiers. - expect_equal(get_SE_identifiers(se, c("cellline_name", "drug_name"), simplify = FALSE), + expect_equal(get_SE_identifiers(se, c("cellline_name", "drug_name"), simplify = FALSE), list(cellline_name = "Cells", drug_name = "Drugs")) # Order. }) @@ -106,9 +106,9 @@ test_that("get_SE_processing_metadata and set_SE_processing_metadata work as exp session_info = sessionInfo()) se <- SummarizedExperiment::SummarizedExperiment(metadata = list()) processing_metadata <- get_SE_processing_metadata(se) - + expect_equal(processing_metadata, NULL) - + se <- set_SE_processing_metadata(se, params) expect_equal(get_SE_processing_metadata(se), params) }) @@ -121,4 +121,3 @@ test_that("get_SE_experiment_raw_data and set_SE_experiment_raw_data work as exp se <- set_SE_experiment_raw_data(se, raw_data) expect_equal(get_SE_experiment_raw_data(se), raw_data) }) - diff --git a/tests/testthat/test-split_SE_Components.R b/tests/testthat/test-split_SE_Components.R index e83ad568..e5f85f19 100644 --- a/tests/testthat/test-split_SE_Components.R +++ b/tests/testthat/test-split_SE_Components.R @@ -4,7 +4,7 @@ test_that("split_SE_components splits the correct columns", { expect_true(all(c("Gnumber", "DrugName", "drug_moa") %in% colnames(md$treatment_md))) expect_true(all(c("clid", "CellLineName", "Tissue", "Replicate", "ReferenceDivisionTime") %in% colnames(md$condition_md))) - expect_equal(sum(ncol(md$treatment_md), ncol(md$condition_md), length(md$data_fields), ncol(md$experiment_md)), + expect_equal(sum(ncol(md$treatment_md), ncol(md$condition_md), length(md$data_fields), ncol(md$experiment_md)), ncol(test_df)) pure <- get_env_identifiers(simplify = TRUE) expect_equal(md$identifiers_md[names(pure)], pure) @@ -14,16 +14,16 @@ test_that("split_SE_components splits the correct columns", { expect_true(all(c("Gnumber", "DrugName", "drug_moa") %in% colnames(md2$treatment_md))) expect_true(all(c("clid", "CellLineName", "Tissue", "ReferenceDivisionTime") %in% colnames(md2$condition_md))) expect_true(all(c("WellRow", "WellColumn", "Replicate") %in% md2$data_fields)) - expect_equal(ncol(test_df), + expect_equal(ncol(test_df), sum(ncol(md2$treatment_md), ncol(md2$condition_md), length(md2$data_fields), ncol(md2$experiment_md))) # combine_on argument works as expected md3 <- split_SE_components(test_df, nested_keys = "Replicate", combine_on = 2L) expect_true(all(c("Gnumber", "DrugName", "drug_moa") %in% colnames(md3$treatment_md))) - expect_true(all(c("clid", "CellLineName", "Tissue", "ReferenceDivisionTime") %in% + expect_true(all(c("clid", "CellLineName", "Tissue", "ReferenceDivisionTime") %in% colnames(md3$condition_md))) expect_true(all(c("WellRow", "WellColumn", "Replicate") %in% md3$data_fields)) - expect_equal(ncol(test_df), + expect_equal(ncol(test_df), sum(ncol(md3$treatment_md), ncol(md3$condition_md), length(md3$data_fields), ncol(md3$experiment_md))) # nested key is a main identifier. @@ -32,12 +32,12 @@ test_that("split_SE_components splits the correct columns", { expect_true(all(c("clid", "CellLineName", "Replicate", "Tissue", "ReferenceDivisionTime") %in% colnames(md4$condition_md))) expect_true(all(c("WellRow", "WellColumn", "drug_moa") %in% md4$data_fields)) - expect_equal(ncol(test_df), + expect_equal(ncol(test_df), sum(ncol(md4$treatment_md), ncol(md4$condition_md), length(md4$data_fields), ncol(md4$experiment_md))) - + # order of columns is correct expect_equal(names(md$treatment_md), c("Gnumber", "DrugName", "drug_moa", "Duration")) - + # split_SE_components with changed identifiers new_identifier_name <- "SomeDrug" set_env_identifier("drug", new_identifier_name) @@ -50,9 +50,9 @@ test_that("split_SE_components splits the correct columns", { test_that("add_rownames_to_metadata works as expected", { cols <- c("a", "b") md <- data.frame(a = LETTERS, b = letters, c = paste0(LETTERS, letters)) - expect_true(all(rownames(md) == as.character(seq(nrow(md))))) + expect_true(all(rownames(md) == as.character(seq_len(NROW(md))))) out <- add_rownames_to_metadata(md, cols) - expect_true(all(rownames(out) != as.character(seq(nrow(md))))) + expect_true(all(rownames(out) != as.character(seq_len(NROW(md))))) expect_equal(colnames(out), cols) }) @@ -78,7 +78,7 @@ test_that("split_SE_components sorts non-default columns", { md <- split_SE_components(test_df3) expect_identical(grep("fix5-aza", names(md$treatment_md), value = TRUE), c("a-fix5-aza", "b-fix5-aza", "fix5-aza")) - + md2 <- split_SE_components(test_df3, combine_on = 2) expect_identical(grep("fix5-aza", names(md2$condition_md), value = TRUE), c("a-fix5-aza", "b-fix5-aza", "fix5-aza")) @@ -90,6 +90,3 @@ test_that("split_SE_components sorts non-default columns", { md3 <- split_SE_components(test_df4) expect_identical(sort(rownames(md$treatment_md)), sort(rownames(md3$treatment_md))) }) - - - diff --git a/tests/testthat/test-standardize_MAE.R b/tests/testthat/test-standardize_MAE.R index 4286abdc..458788c1 100644 --- a/tests/testthat/test-standardize_MAE.R +++ b/tests/testthat/test-standardize_MAE.R @@ -74,10 +74,10 @@ test_that("colData/rowData refinement functions work as expected", { "DataFrame")) expect_true(inherits(refine_rowdata(SummarizedExperiment::rowData(mae[[1]]), mae[[1]]), "DataFrame")) - + expect_error(refine_coldata(mae, mae), "Assertion on 'se' failed:") expect_error(refine_rowdata(mae, mae), "Assertion on 'se' failed:") - + }) @@ -87,7 +87,7 @@ test_that("get_optional_rowdata_fields works as expected", { opt_idfs <- get_optional_rowdata_fields(se) expect_equal(opt_idfs, unlist(idfs[c("drug_moa", "drug_moa2")], use.names = FALSE)) - + se2 <- get_synthetic_data("finalMAE_small")[[1]] idfs2 <- get_SE_identifiers(se2) opt_idfs2 <- get_optional_rowdata_fields(se2) @@ -101,22 +101,22 @@ test_that("set_unique_names works correctly", { clid = c("C1", "C2")) t_df <- data.frame(CellLineName = c("ID1", "ID1"), clid = c("C1", "C2")) - + u_dframe <- set_unique_names_dt(t_dframe, primary_name = "CellLineName", secondary_name = "clid") u_dt <- set_unique_names_dt(t_dt, primary_name = "CellLineName", secondary_name = "clid") u_df <- set_unique_names_dt(t_df, primary_name = "CellLineName", secondary_name = "clid") - + expect_equal(data.table::as.data.table(u_df), u_dt) expect_equal(data.table::as.data.table(u_dframe), u_dt) - + expect_error(set_unique_names_dt(list()), "Must inherit from") }) test_that("set_unique_cl_names_dt and set_unique_drug_names_dt works correctly", { - + # DataFrame ## Duplicated CellLineName col_data <- S4Vectors::DataFrame(CellLineName = c("ID1", "ID1"), clid = c("C1", "C2")) @@ -125,7 +125,7 @@ test_that("set_unique_cl_names_dt and set_unique_drug_names_dt works correctly", expect_equal(col_data, res_1) expect_false(identical(col_data, res_2)) expect_equal(c("ID1 (C1)", "ID1 (C2)"), res_2$CellLineName) - + ## Duplicated DrugName row_data <- S4Vectors::DataFrame(DrugName = c("DrugA", "DrugA"), Gnumber = c("G1", "G2")) res_3 <- set_unique_drug_names_dt(row_data) @@ -133,25 +133,25 @@ test_that("set_unique_cl_names_dt and set_unique_drug_names_dt works correctly", expect_false(identical(row_data, res_3)) expect_equal(row_data, res_4) expect_equal(c("DrugA (G1)", "DrugA (G2)"), res_3$DrugName) - + # data.table ## All different dt <- data.table::data.table( - DrugName = c("DrugA", "DrugB", "DrugC", "DrugD"), + DrugName = c("DrugA", "DrugB", "DrugC", "DrugD"), Gnumber = c("G1", "G2", "G3", "G4"), - CellLineName = c("ID1", "ID2", "ID3", "ID4"), + CellLineName = c("ID1", "ID2", "ID3", "ID4"), clid = c("C1", "C2", "C3", "C4") ) res_5 <- set_unique_drug_names_dt(dt) res_6 <- set_unique_cl_names_dt(dt) expect_equal(res_5, dt) expect_equal(res_6, dt) - + ## Duplicated CellLineName dt <- data.table::data.table( - DrugName = c("DrugA", "DrugB", "DrugC", "DrugD", "DrugC", "DrugD"), + DrugName = c("DrugA", "DrugB", "DrugC", "DrugD", "DrugC", "DrugD"), Gnumber = c("G1", "G2", "G3", "G4", "G3", "G4"), - CellLineName = c("ID1", "ID1", "ID2", "ID2", "ID2", "ID2"), + CellLineName = c("ID1", "ID1", "ID2", "ID2", "ID2", "ID2"), clid = c("C1", "C2", "C3", "C4", "C5", "C6") ) res_7 <- set_unique_drug_names_dt(dt) @@ -162,9 +162,9 @@ test_that("set_unique_cl_names_dt and set_unique_drug_names_dt works correctly", ## Duplicated DrugName dt <- data.table::data.table( - DrugName = c("DrugA", "DrugA", "DrugB", "DrugB", "DrugB", "DrugB"), + DrugName = c("DrugA", "DrugA", "DrugB", "DrugB", "DrugB", "DrugB"), Gnumber = c("G1", "G2", "G3", "G4", "G5", "G6"), - CellLineName = c("ID1", "ID2", "ID3", "ID4", "ID3", "ID4"), + CellLineName = c("ID1", "ID2", "ID3", "ID4", "ID3", "ID4"), clid = c("C1", "C2", "C3", "C4", "C3", "C4") ) res_9 <- set_unique_drug_names_dt(dt) @@ -172,12 +172,12 @@ test_that("set_unique_cl_names_dt and set_unique_drug_names_dt works correctly", expect_false(identical(res_9, dt)) expect_equal(length(unique(res_9$DrugName)), 6) expect_equal(res_10, dt) - + ## Duplicated both dt <- data.table::data.table( - DrugName = c("DrugA", "DrugA", "DrugB", "DrugB"), + DrugName = c("DrugA", "DrugA", "DrugB", "DrugB"), Gnumber = c("G1", "G2", "G3", "G4"), - CellLineName = c("ID1", "ID1", "ID2", "ID2"), + CellLineName = c("ID1", "ID1", "ID2", "ID2"), clid = c("C1", "C2", "C3", "C4") ) res_11 <- set_unique_drug_names_dt(dt) @@ -188,7 +188,7 @@ test_that("set_unique_cl_names_dt and set_unique_drug_names_dt works correctly", expect_false(identical(res_12, dt)) expect_equal(length(unique(res_12$DrugName)), 2) expect_equal(length(unique(res_12$CellLineName)), 4) - + ## Function works in the same way for data.table and DataFrame dt <- data.table::data.table( DrugName = c("DrugA", "DrugB", "DrugC", "DrugD", "DrugC", "DrugD"), @@ -196,12 +196,12 @@ test_that("set_unique_cl_names_dt and set_unique_drug_names_dt works correctly", CellLineName = c("ID1", "ID1", "ID2", "ID2", "ID2", "ID2"), clid = c("C1", "C2", "C3", "C4", "C5", "C6") ) - res_dt <- set_unique_cl_names_dt(dt) + res_dt <- set_unique_cl_names_dt(dt) df <- S4Vectors::DataFrame( DrugName = c("DrugA", "DrugB", "DrugC", "DrugD", "DrugC", "DrugD"), Gnumber = c("G1", "G2", "G3", "G4", "G3", "G4"), CellLineName = c("ID1", "ID1", "ID2", "ID2", "ID2", "ID2"), - clid = c("C1", "C2", "C3", "C4", "C5", "C6") + clid = c("C1", "C2", "C3", "C4", "C5", "C6") ) res_S4 <- set_unique_cl_names_dt(df) expect_equivalent(res_dt, res_S4) @@ -215,7 +215,7 @@ test_that("set_unique_cl_names works correctly", { colData = S4Vectors::DataFrame(CellLineName = c("ID1", "ID1"), clid = c("C1", "C2")) ) se <- set_unique_cl_names(se) - + expect_equal(SummarizedExperiment::colData(se)$CellLineName, c("ID1 (C1)", "ID1 (C2)")) }) @@ -226,9 +226,9 @@ test_that("set_unique_drug_names works correctly", { rowData = S4Vectors::DataFrame(DrugName = c("DrugA", "DrugA"), Gnumber = c("G1", "G2")) ) se <- set_unique_drug_names(se) - + expect_equal(SummarizedExperiment::rowData(se)$DrugName, c("DrugA (G1)", "DrugA (G2)")) - + se2 <- SummarizedExperiment::SummarizedExperiment( assays = list(counts = matrix(1:9, ncol = 3)), rowData = S4Vectors::DataFrame(DrugName = c("DrugA", "DrugA", "DrugB"), @@ -236,7 +236,7 @@ test_that("set_unique_drug_names works correctly", { DrugName_2 = c("DrugC", "DrugC", "DrugD"), Gnumber_2 = c("G3", "G3", "G5") )) - + se2 <- set_unique_drug_names(se2) expect_equal(SummarizedExperiment::rowData(se2)$DrugName, c("DrugA (G1)", "DrugA (G2)", "DrugB")) expect_equal(SummarizedExperiment::rowData(se2)$DrugName_2, c("DrugC", "DrugC", "DrugD")) @@ -259,7 +259,7 @@ test_that("set_unique_identifiers works correctly", { rownames(SummarizedExperiment::rowData(se2)) <- c("Gene3", "Gene4") mae <- MultiAssayExperiment::MultiAssayExperiment(experiments = list(se1 = se1, se2 = se2)) mae <- set_unique_identifiers(mae) - + expect_equal(SummarizedExperiment::colData(mae[[1]])$CellLineName, c("ID1 (C1)", "ID1 (C2)")) expect_equal(SummarizedExperiment::rowData(mae[[1]])$DrugName, c("DrugA (G1)", "DrugA (G2)")) expect_equal(SummarizedExperiment::colData(mae[[2]])$CellLineName, c("ID2 (C3)", "ID2 (C4)")) diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index 691d39c4..eb23c93c 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -29,16 +29,16 @@ test_that("assert_equal_input_len works as expected", { h <- 2 efficacy <- 0.6 expect_equal(assert_equal_input_len(outlier = efficacy, ec50, x_0, x_inf, h), NULL) - + # Bad lengths. ec50 <- c(0.5, 0.5) expect_error(assert_equal_input_len(outlier = efficacy, ec50, x_0, x_inf, h)) - + # Length 1 fit parameters. ec50 <- 0.5 efficacy <- c(0.6, 0.7, 0.8) expect_equal(assert_equal_input_len(outlier = efficacy, ec50, x_0, x_inf, h), NULL) - + # Length 1 outlier. ec50 <- c(0.5, 0.6) x_0 <- c(1, 0.9) @@ -54,7 +54,7 @@ test_that("assert_choices", { expect_null(assert_choices(letters[1:2], letters)) expect_null(assert_choices(1:5, 1:10)) expect_null(assert_choices(1, 1:10)) - + ### errors err_msg <- sprintf("Assertion on '%s' failed.", letters[8]) expect_error(assert_choices(letters[5:8], letters[1:7]), err_msg) @@ -70,7 +70,7 @@ test_that("MAEpply works as expected", { v1 <- unique(MAEpply(maeReal, SummarizedExperiment::assayNames, unify = TRUE)) expect_length(v1, 9) expect_true(inherits(v1, "character")) - + v2 <- unique(MAEpply(maeReal, SummarizedExperiment::rowData, unify = TRUE)) expect_equal(dim(v2), c(7, 7)) checkmate::expect_data_table(v2) @@ -101,7 +101,7 @@ test_that("mrowData works as expected", { mr <- mrowData(maeReal) expect_equal(dim(mr), c(7, 7)) checkmate::expect_data_table(mr) - + mr <- mrowData(empty_mae) expect_identical(mr, data.table::data.table()) }) @@ -110,7 +110,7 @@ test_that("mcolData works as expected", { mc <- mcolData(maeReal) expect_equal(dim(mc), c(6, 4)) checkmate::expect_data_table(mc) - + mc <- mcolData(empty_mae) expect_identical(mc, data.table::data.table()) }) @@ -124,11 +124,11 @@ test_that("apply_bumpy_function works as expected", { bumpy <- BumpyMatrix::splitAsBumpyMatrix(df[, c("a", "b")], row = df$row, column = df$column) se <- SummarizedExperiment::SummarizedExperiment(assays = list(bumpy = bumpy)) - + # Assertions. expect_error(apply_bumpy_function(se, req_assay_name = "nonexistent", out_assay_name = "misc"), regex = "'nonexistent' is not on of the available assays: 'bumpy'") - + # Output is bumpy matrix. FUN <- function(x) { data.table::data.table(y = x$a + x$b, z = x$a - x$b) @@ -136,11 +136,11 @@ test_that("apply_bumpy_function works as expected", { bumpy_out <- apply_bumpy_function(se, FUN = FUN, req_assay_name = "bumpy", out_assay_name = "bumpy_mtx") expect_true(is(bumpy_out, "SummarizedExperiment")) expect_true("bumpy_mtx" %in% SummarizedExperiment::assayNames(bumpy_out)) - + bumpy_in_df <- BumpyMatrix::unsplitAsDataFrame(SummarizedExperiment::assay(se, "bumpy")) bumpy_in_df$y <- bumpy_in_df$a + bumpy_in_df$b bumpy_in_df$z <- bumpy_in_df$a - bumpy_in_df$b - + bumpy_out_df <- BumpyMatrix::unsplitAsDataFrame(SummarizedExperiment::assay(bumpy_out, "bumpy_mtx")) keep_cols <- c("row", "column", "y", "z") expect_equal(sort(bumpy_in_df[, keep_cols]), sort(bumpy_out_df[, keep_cols])) @@ -153,24 +153,24 @@ test_that("loop works as expected", { sumOfList <- loop(listRunif, sum, parallelize = FALSE, use_batch = FALSE) expect_true(is.list(sumOfList)) expect_length(unlist(sumOfList), n) - + # test parallel processing mode sumOfListParallel <- loop(listRunif, sum, parallelize = TRUE, use_batch = FALSE) expect_true(is.list(sumOfListParallel)) expect_length(unlist(sumOfListParallel), n) - + # test batch processing mode n_batch <- 200 listRunifBatch <- lapply(seq_len(n_batch), runif) sumOfListBatch <- loop(listRunifBatch, sum, parallelize = FALSE, use_batch = TRUE, batch_size = 50) expect_true(is.list(sumOfListBatch)) expect_length(unlist(sumOfListBatch), n_batch) - + # test handling of empty input sumOfEmptyList <- loop(list(), sum, parallelize = FALSE, use_batch = FALSE) expect_true(is.list(sumOfEmptyList)) expect_length(sumOfEmptyList, 0) - + # test preservation of names in batch mode namedListRunif <- setNames(lapply(seq_len(n_batch), runif), paste0("element_", seq_len(n_batch))) sumOfNamedListBatch <- loop(namedListRunif, sum, parallelize = FALSE, use_batch = TRUE, batch_size = 20) @@ -181,12 +181,12 @@ test_that("loop works as expected", { test_that("process_batch works as expected", { temp_dir <- tempdir() - + n <- 50 namedBatch <- setNames(lapply(seq_len(n), runif), paste0("element_", seq_len(n))) - process_batch(namedBatch, start_index = 1, fun_name = "test_fun", unique_id = "test_id", + process_batch(namedBatch, start_index = 1, fun_name = "test_fun", unique_id = "test_id", total_iterations = n, temp_dir = temp_dir, FUN = sum) - + file_path <- file.path(temp_dir, "test_fun_test_id_1_of_50_batch.qs2") expect_true(file.exists(file_path)) @@ -194,12 +194,12 @@ test_that("process_batch works as expected", { expect_true(is.list(saved_results)) expect_length(saved_results, n) expect_equal(names(saved_results), names(namedBatch)) - + expect_true(file.remove(file_path)) - - process_batch(list(), start_index = 1, fun_name = "test_fun", unique_id = "test_id", + + process_batch(list(), start_index = 1, fun_name = "test_fun", unique_id = "test_id", total_iterations = 1, temp_dir = temp_dir, FUN = sum) - + empty_file_path <- file.path(temp_dir, "test_fun_test_id_1_of_0_batch.qs2") expect_false(file.exists(empty_file_path)) }) @@ -228,18 +228,18 @@ test_that("geometric_mean works as expected", { geometric_mean(x = 1, maxlog10Concentration = "NULL"), "Assertion on 'maxlog10Concentration' failed: Must be of type 'numeric', not 'character'." ) - + expect_equal(geometric_mean(c(2, 8)), 4) expect_equal(geometric_mean(c(0.02, 8)), 0.4) - + expect_equal(round(geometric_mean(c(0.000000000002, 8)), digits = 5), 0.00894) expect_equal(round(geometric_mean(c(0.000000000002, 8), fixed = TRUE), digits = 5), 0.00894) expect_equal(geometric_mean(c(0.000000000002, 8), fixed = FALSE), 0.000004) - + expect_equal(geometric_mean(c(2, 800)), 10) expect_equal(geometric_mean(c(2, 800), fixed = TRUE), 10) expect_equal(geometric_mean(c(2, 800), fixed = FALSE), 40) - + expect_equal(round( geometric_mean(c(2, 8), fixed = TRUE, maxlog10Concentration = 1), digits = 5 @@ -248,8 +248,8 @@ test_that("geometric_mean works as expected", { geometric_mean(c(2, 8), fixed = TRUE, maxlog10Concentration = 0.1), digits = 5 ), 3.54813) - - + + # multiple fit types and special averaging correctly test_data <- data.table::data.table( Gnumber = "G00001", @@ -263,13 +263,13 @@ test_that("geometric_mean works as expected", { N_conc = c(8, 8, 9, 9), ec50 = c(100, 120, 200, 220) ) - + avg_data <- average_biological_replicates_dt( dt = test_data, var = "source_id", fit_type_average_fields = "fit_type" ) - + expect_equal(nrow(avg_data), 1) expect_equal(avg_data$fit_type, "model1") expected_p_val <- 0.004300451 @@ -289,16 +289,16 @@ test_that("average_biological_replicates_dt works as expected", { expect_equal(dim(metrics_data), c(60, 29)) expect_equal(dim(avg_metrics_data), c(40, 28)) expect_true(!"Ligand" %in% names(avg_metrics_data)) - + avg_metrics_data2 <- average_biological_replicates_dt(dt = metrics_data, var = "Ligand", prettified = TRUE, add_sd = TRUE) - + expect_equal(dim(avg_metrics_data2), c(40, 41)) expect_equal(sum(grepl("_sd", names(avg_metrics_data2))), 12) expect_true("count" %in% names(avg_metrics_data2)) - + # protection against regression # fit_type correctly recognized in wide and long format sdata <- get_synthetic_data("finalMAE_small") @@ -309,14 +309,14 @@ test_that("average_biological_replicates_dt works as expected", { tdata$source_id <- paste0("DS", rep(1:4, each = 2)) tdata$fit_type <- letters[1:8] tdata$rId <- tdata$rId[[1]] - + av1b <- average_biological_replicates_dt(tdata, var = "source_id") av1f <- flatten( av1b, groups = c("normalization_type", "fit_source"), wide_cols = get_header("response_metrics") ) - + av2f <- flatten( tdata, groups = c("normalization_type", "fit_source"), @@ -327,28 +327,28 @@ test_that("average_biological_replicates_dt works as expected", { expect_true(nrow(av1f) == 1) av1i <- average_biological_replicates_dt(tdata, var = "source_id", fit_type_average_fields = "bad_value") expect_true(nrow(av1i) == 8) - + # two additional variables for averaging ligand_data <- get_synthetic_data("finalMAE_wLigand") lmetrics_data <- convert_se_assay_to_dt(ligand_data[[1]], "Metrics") lmetrics_data$source_id <- "ds_small_ligand" - + sdata <- get_synthetic_data("finalMAE_small") smetrics_data <- convert_se_assay_to_dt(sdata[[1]], "Metrics") smetrics_data$source_id <- "ds_small" - + lsmetrics_data <- data.table::rbindlist(list(lmetrics_data, smetrics_data), fill = TRUE) avg_vars <- get_additional_variables(lsmetrics_data) lsmetrics_avg <- average_biological_replicates_dt(lsmetrics_data, var = avg_vars, add_sd = TRUE) - + expect_identical(NROW(smetrics_data), NROW(lsmetrics_avg)) expect_true(all(avg_vars %in% colnames(lsmetrics_data))) expect_true(all(!avg_vars %in% colnames(lsmetrics_avg))) - + # averaging combination data - single variable cml_data <- get_synthetic_data("finalMAE_combo_matrix")[[1]] cms_data <- get_synthetic_data("finalMAE_combo_matrix_small")[[1]] - + ## scores cml_scores_data <- convert_se_assay_to_dt(cml_data, "scores") cml_scores_data$source_id <- "cm" @@ -356,19 +356,19 @@ test_that("average_biological_replicates_dt works as expected", { cms_scores_data$source_id <- "cms" ls_scores_data <- data.table::rbindlist(list(cml_scores_data, cms_scores_data), fill = TRUE) avg_vars <- get_additional_variables(ls_scores_data) - + ### single additional var expect_identical(avg_vars, "source_id") ### no _sd cols expect_false(NROW(grep("_sd$", colnames(ls_scores_data))) > 0) - + ls_scores_avg <- average_biological_replicates_dt(ls_scores_data, var = avg_vars, add_sd = TRUE) avg_vars <- get_additional_variables(ls_scores_avg) ### no additional vars expect_identical(avg_vars, NULL) ### _sd cols are present expect_true(NROW(grep("_sd$", colnames(ls_scores_avg))) > 0) - + ## excess cml_excess_data <- convert_se_assay_to_dt(cml_data, "excess") cml_excess_data$source_id <- "cm" @@ -376,19 +376,19 @@ test_that("average_biological_replicates_dt works as expected", { cms_excess_data$source_id <- "cms" ls_excess_data <- data.table::rbindlist(list(cml_excess_data, cms_excess_data), fill = TRUE) avg_vars <- get_additional_variables(ls_excess_data) - + ### single additional var expect_identical(avg_vars, "source_id") ### no _sd cols expect_false(NROW(grep("_sd$", colnames(ls_excess_data))) > 0) - + ls_excess_avg <- average_biological_replicates_dt(ls_excess_data, var = avg_vars, add_sd = TRUE) avg_vars <- get_additional_variables(ls_excess_avg) ### no additional vars expect_identical(avg_vars, NULL) ### _sd cols are present expect_true(NROW(grep("_sd$", colnames(ls_excess_avg))) > 0) - + ## iso cml_iso_data <- convert_se_assay_to_dt(cml_data, "isobolograms") cml_iso_data$source_id <- "cm" @@ -396,12 +396,12 @@ test_that("average_biological_replicates_dt works as expected", { cms_iso_data$source_id <- "cms" ls_iso_data <- data.table::rbindlist(list(cml_iso_data, cms_iso_data), fill = TRUE) avg_vars <- get_additional_variables(ls_iso_data) - + ### single additional var expect_identical(avg_vars, "source_id") ### no _sd cols expect_false(NROW(grep("_sd$", colnames(ls_iso_data))) > 0) - + ls_iso_avg <- average_biological_replicates_dt(ls_iso_data, var = avg_vars, add_sd = TRUE) avg_vars <- get_additional_variables(ls_iso_avg) ### no additional vars @@ -418,7 +418,7 @@ test_that("has_single_codrug_data works as expected", { expect_true( has_single_codrug_data(c("Concentration 2", "Drug Name 2", "anything"))) expect_true( - has_single_codrug_data(c("Concentration_2", "DrugName_2", "anything"), + has_single_codrug_data(c("Concentration_2", "DrugName_2", "anything"), prettify_identifiers = FALSE)) expect_true( has_single_codrug_data(c("Concentration 3", "Drug Name 3", "tissue"), @@ -427,7 +427,7 @@ test_that("has_single_codrug_data works as expected", { has_single_codrug_data(c("Concentration_3", "DrugName_3", "tissue"), prettify_identifiers = FALSE, codrug_identifiers = c("concentration3", "drug_name3"))) - + expect_error( has_single_codrug_data(list(drug = "test")), "Assertion on 'cols' failed: Must be of type 'character', not 'list'." @@ -462,7 +462,7 @@ test_that("has_valid_codrug_data works as expected", { "Drug Name 3" = letters[7:9], "Concentration 3" = 7:9 ) - + dt3 <- data.table::data.table( "DrugName" = letters[seq_len(3)], @@ -472,7 +472,7 @@ test_that("has_valid_codrug_data works as expected", { "DrugName_3" = letters[7:9], "Concentration_3" = 7:9 ) - + expect_true(has_valid_codrug_data(dt2)) expect_false(has_valid_codrug_data(dt2, prettify_identifiers = FALSE)) expect_true( @@ -482,7 +482,7 @@ test_that("has_valid_codrug_data works as expected", { codrug_conc_identifier = "concentration3" ) ) - + expect_false(has_valid_codrug_data(dt3)) expect_true(has_valid_codrug_data(dt3, prettify_identifiers = FALSE)) expect_true( @@ -493,12 +493,12 @@ test_that("has_valid_codrug_data works as expected", { codrug_conc_identifier = "concentration3" ) ) - + expect_false(has_valid_codrug_data(dt2[, c("Drug Name", "Concentration")])) - + dt2[["Concentration 2"]] <- NA expect_false(has_valid_codrug_data(dt2)) - + dt2[["Drug Name 3"]] <- "untreated" expect_false( has_valid_codrug_data( @@ -507,7 +507,7 @@ test_that("has_valid_codrug_data works as expected", { codrug_conc_identifier = "concentration3" ) ) - + expect_error( has_valid_codrug_data(colnames(dt1)), "Assertion on 'data' failed: Must be a data.table, not character." @@ -524,7 +524,7 @@ test_that("has_valid_codrug_data works as expected", { has_valid_codrug_data(dt1, codrug_conc_identifier = c("id1", "id2")), "Assertion on 'codrug_conc_identifier' failed: Must have length 1." ) - + }) test_that("remove_codrug_data works as expected", { @@ -537,15 +537,15 @@ test_that("remove_codrug_data works as expected", { "Drug Name 3" = "untreated", "Concentration 3" = NA ) - + sdt <- remove_codrug_data(dt1) exp_cols <- c("Drug Name", "Concentration", "Drug Name 3", "Concentration 3") expect_identical(colnames(sdt), exp_cols) - + sdt <- remove_codrug_data(dt1, codrug_identifiers = c("drug_name3", "concentration3")) exp_cols <- c("Drug Name", "Concentration", "Drug Name 2", "Concentration 2") expect_identical(colnames(sdt), exp_cols) - + dt2 <- data.table::data.table( "DrugName" = letters[seq_len(3)], @@ -555,11 +555,11 @@ test_that("remove_codrug_data works as expected", { "DrugName_3" = "untreated", "Concentration_3" = NA ) - + sdt <- remove_codrug_data(dt2, prettify_identifiers = FALSE) exp_cols <- c("DrugName", "Concentration", "DrugName_3", "Concentration_3") expect_identical(colnames(sdt), exp_cols) - + expect_error( remove_codrug_data(colnames(dt1)), "Assertion on 'data' failed: Must be a data.table, not character." @@ -579,57 +579,57 @@ test_that("is_combo_data as expected", { Concentration = runif(10), Ligand = c(rep(0.5, 5), rep(0, 5))) se <- SummarizedExperiment::SummarizedExperiment(rowData = rdata) expect_false(is_combo_data(se)) - + nrows <- 10 ncols <- 6 mx <- matrix(runif(nrows * ncols, 1, 1e4), nrows) se <- SummarizedExperiment::SummarizedExperiment( - rowData = rdata, + rowData = rdata, assays = list(excess = mx, scores = mx, isobolograms = mx)) expect_true(is_combo_data(se)) - + expect_error(is_combo_data(list()), "Must inherit from class 'SummarizedExperiment'") }) test_that("get_additional_variables works as expected", { rdata1 <- data.table::data.table(Gnumber = seq_len(10), - Concentration = runif(10), + Concentration = runif(10), Ligand = c(rep(0.5, 5), rep(0, 5))) rdata2 <- data.table::data.table(`Drug Name` = seq_len(10), - Concentration = runif(10), + Concentration = runif(10), Ligand = c(rep(0.5, 10)), Replicate = seq_len(2)) rdata3 <- data.table::data.table(Gnumber = seq_len(10), Concentration = runif(10)) rdata4 <- data.table::data.table(Gnumber = seq_len(10), - Concentration = runif(10), - Concentration_2 = runif(10), + Concentration = runif(10), + Concentration_2 = runif(10), Ligand = c(rep(0.5, 10)), Ligand = c(rep(0.1, 5), rep(0, 5))) - + add_var1 <- get_additional_variables(rdata1, prettified = TRUE) add_var2_nonunique <- get_additional_variables(rdata2, prettified = TRUE) add_var2_unique <- get_additional_variables(rdata2, unique = TRUE, prettified = TRUE) add_var3 <- get_additional_variables(rdata3, prettified = TRUE) add_var4_nonunique <- get_additional_variables(rdata4, prettified = TRUE) add_var4_unique <- get_additional_variables(rdata4, unique = TRUE, prettified = TRUE) - + expect_equal(add_var1, "Ligand") expect_equal(add_var2_nonunique, "Replicate") expect_equal(add_var2_unique, c("Ligand", "Replicate")) expect_equal(add_var3, NULL) expect_equal(add_var4_nonunique, "Concentration_2") expect_equal(add_var4_unique, c("Concentration_2", "Ligand")) - + expect_equal(get_additional_variables(FALSE), NULL) expect_equal(get_additional_variables(NA), NULL) expect_equal(get_additional_variables(c(1, 2, 3)), NULL) expect_equal(get_additional_variables(unlist(rdata1)), NULL) - - + + rdata5 <- data.table::data.table(Gnumber = seq_len(10), - Concentration = runif(10), - Concentration_2 = runif(10), + Concentration = runif(10), + Concentration_2 = runif(10), `IC50 (GDS)` = runif(10)) expect_equal(get_additional_variables(rdata5), NULL) }) @@ -638,7 +638,7 @@ test_that("get_additional_variables works as expected", { test_that("convert_se_assay_to_custom_dt as expected", { json_path <- system.file(package = "gDRutils", "test_settings_2.json") s <- get_settings_from_json(json_path = json_path) - + se <- get_synthetic_data("finalMAE_small")[[1]] dt1 <- convert_se_assay_to_custom_dt(se, assay_name = "Metrics") checkmate::expect_data_table(dt1, min.rows = 2, min.cols = 2) @@ -652,7 +652,7 @@ test_that("convert_se_assay_to_custom_dt as expected", { checkmate::expect_data_table(dt3, min.rows = 2, min.cols = 2) expect_false(identical(dt2, dt3)) checkmate::expect_data_table(dt2, min.rows = 2, min.cols = 2) - expect_true(all(c("x_mean", "x_AOC", "x_AOC_range", "xc50", "x_max", "ec50", + expect_true(all(c("x_mean", "x_AOC", "x_AOC_range", "xc50", "x_max", "ec50", "x_inf", "x_0", "h", "r2", "x_sd_avg", "fit_type") %in% names(dt3))) dt4 <- convert_se_assay_to_custom_dt(se, assay_name = "Averaged") checkmate::expect_data_table(dt2, min.rows = 2, min.cols = 2) @@ -660,7 +660,7 @@ test_that("convert_se_assay_to_custom_dt as expected", { all(c("GR value", "Relative Viability", "Std GR value", "Std Relative Viability") %in% names(dt4))) dt5 <- convert_se_assay_to_custom_dt(se, assay_name = "Averaged", output_table = "Metrics") expect_true(identical(dt4, dt5)) - + se2 <- get_synthetic_data("finalMAE_combo_matrix")[[1]] dt6 <- convert_se_assay_to_custom_dt(se2, assay_name = "Metrics") checkmate::expect_data_table(dt6, min.rows = 2, min.cols = 2) @@ -669,7 +669,7 @@ test_that("convert_se_assay_to_custom_dt as expected", { convert_se_assay_to_custom_dt(se2, assay_name = get_combo_assay_names()[1]) checkmate::expect_data_table(dt7, min.rows = 2, min.cols = 2) expect_true(all(names(get_combo_excess_field_names()) %in% names(dt7))) - + expect_error(convert_se_assay_to_custom_dt(as.list(se), assay_name = "Metrics")) expect_error(convert_se_assay_to_custom_dt(as.list(se), output_table = "Averaged")) expect_error( @@ -714,15 +714,15 @@ test_that("capVals works as expected", { attr(dt2c, "index") <- NULL attr(dt2c_2, "index") <- NULL attr(dt3c, "index") <- NULL - + expect_false(identical(dt1c, dt1)) expect_identical(dt2c, dt2) expect_identical(dt2c_2, dt2[, 1:4]) expect_identical(dt3c, dt3) - + # values are capped correctly expect_equal(dt1c, dt2) - + expect_error(capVals(as.list(dt1)), "Must be a data.table") }) @@ -749,27 +749,27 @@ test_that("remove_drug_batch works as expected", { # (two codrugs) - remove expect_equal( remove_drug_batch("G03256376.1-2;G00376771.1-19;G02557755"), "G03256376") - + # (Gnumber followed by the ",") -remove expect_equal(remove_drug_batch("G00018838, Cisplatin"), "G00018838") - + # suffix added by set_unique_drug_names_dt function (prevent duplication) - nothing changes expect_equal(remove_drug_batch("G00060245_(G00060245.1-8)"), "G00060245_(G00060245.1-8)") - + # test non-default values of other parameters expect_equal(remove_drug_batch("DRUG_01.123", drug_p = "DRUG_[0-9]+"), "DRUG_01") expect_equal(remove_drug_batch("G00001234:22-1", sep_p = ":"), "G00001234") expect_equal(remove_drug_batch("G00001234.28", batch_p = "[0-9]+"), "G00001234") - - # test drug_vec as non-character input + + # test drug_vec as non-character input expect_equal(remove_drug_batch(12345L), "12345") expect_equal(remove_drug_batch(9876.54), "9876.54") expect_equal(remove_drug_batch(c("G00060245.1", 112233)), c("G00060245", "112233")) expect_equal(remove_drug_batch(12345.678, drug_p = "[0-9]{5}", sep_p = "\\."), "12345") - + # error tests expect_error(remove_drug_batch(list(drug = "G00000001")), "Assertion on 'drug_vec' failed") expect_error(remove_drug_batch("G00000001", drug_p = list(1)), @@ -778,28 +778,28 @@ test_that("remove_drug_batch works as expected", { "Assertion on 'sep_p' failed") expect_error(remove_drug_batch("G00000001", batch_p = list(1)), "Assertion on 'batch_p' failed") - + }) test_that("cap_assay_infinities works as expected", { # single-agent data - data expected tests sdata <- get_synthetic_data("finalMAE_medium") - smetrics_data <- convert_se_assay_to_dt(sdata[[get_supported_experiments("sa")]], + smetrics_data <- convert_se_assay_to_dt(sdata[[get_supported_experiments("sa")]], "Metrics") - - saveraged_data <- convert_se_assay_to_dt(sdata[[get_supported_experiments("sa")]], + + saveraged_data <- convert_se_assay_to_dt(sdata[[get_supported_experiments("sa")]], "Averaged") ## add some Infs/-Infs smetrics_data$xc50[1:30] <- -Inf smetrics_data$xc50[100:103] <- Inf - smetrics_data2 <- cap_assay_infinities(saveraged_data, - smetrics_data, + smetrics_data2 <- cap_assay_infinities(saveraged_data, + smetrics_data, experiment_name = get_supported_experiments("sa")) # default smetrics_data3 <- cap_assay_infinities(saveraged_data, smetrics_data, experiment_name = get_supported_experiments("sa"), capping_fold = 1) - + ## data with inf/-inf values inf_idx <- which(is.infinite(smetrics_data$xc50)) expect_true(NROW(inf_idx) > 0) @@ -814,20 +814,20 @@ test_that("cap_assay_infinities works as expected", { ## Inf values inf_idx_lower <- which(smetrics_data[order(x_mean)]$xc50 == -Inf) inf_idx_upper <- which(smetrics_data[order(x_mean)]$xc50 == Inf) - expect_identical(unique(smetrics_data3[order(x_mean)][inf_idx_lower, ]$xc50 / + expect_identical(unique(smetrics_data3[order(x_mean)][inf_idx_lower, ]$xc50 / smetrics_data2[order(x_mean)][inf_idx_lower, ]$xc50), 5) - expect_identical(unique(smetrics_data2[order(x_mean)][inf_idx_upper, ]$xc50 / + expect_identical(unique(smetrics_data2[order(x_mean)][inf_idx_upper, ]$xc50 / smetrics_data3[order(x_mean)][inf_idx_upper, ]$xc50), 5) expect_true(all(names(smetrics_data2) == names(smetrics_data))) expect_true(all(names(smetrics_data3) == names(smetrics_data))) - + ## data without infinities - smetrics_data4 <- cap_assay_infinities(saveraged_data, - smetrics_data2, + smetrics_data4 <- cap_assay_infinities(saveraged_data, + smetrics_data2, experiment_name = get_supported_experiments("sa")) expect_identical(smetrics_data2, smetrics_data4) expect_true(all(names(smetrics_data4) == names(smetrics_data))) - + ## non-default column to be changed smetrics_data5 <- smetrics_data smetrics_data5$custom_col <- smetrics_data5$xc50 @@ -838,15 +838,15 @@ test_that("cap_assay_infinities works as expected", { expect_identical(smetrics_data2$xc50, smetrics_data6$custom_col) expect_true(any(smetrics_data6$xc50 != smetrics_data2$xc50)) expect_true(all(names(smetrics_data6) == names(smetrics_data5))) - + # combination data - data expected tests cdata <- get_synthetic_data("finalMAE_combo_matrix") - scaveraged_data <- convert_se_assay_to_dt(cdata[[get_supported_experiments("combo")]], + scaveraged_data <- convert_se_assay_to_dt(cdata[[get_supported_experiments("combo")]], "Averaged") - scmetrics_data <- convert_se_assay_to_dt(cdata[[get_supported_experiments("combo")]], + scmetrics_data <- convert_se_assay_to_dt(cdata[[get_supported_experiments("combo")]], "Metrics") - scmetrics_data2 <- cap_assay_infinities(scaveraged_data, - scmetrics_data, + scmetrics_data2 <- cap_assay_infinities(scaveraged_data, + scmetrics_data, experiment_name = get_supported_experiments("combo")) scmetrics_data3 <- cap_assay_infinities(scaveraged_data, scmetrics_data, @@ -866,10 +866,10 @@ test_that("cap_assay_infinities works as expected", { ## Inf values inf_idx_lower <- which(scmetrics_data[order(x_mean)]$xc50 == -Inf) inf_idx_upper <- which(scmetrics_data[order(x_mean)]$xc50 == Inf) - + expect_true(all(names(scmetrics_data2) == names(scmetrics_data))) expect_true(all(names(scmetrics_data3) == names(scmetrics_data))) - + expect_equal(unique( scmetrics_data3[order(x_mean)][inf_idx_lower, ][dilution_drug %in% c("drug_1", "drug_2"), ]$xc50 / scmetrics_data2[order(x_mean)][inf_idx_lower, ][dilution_drug %in% c("drug_1", "drug_2"), ]$xc50), 5) @@ -882,18 +882,18 @@ test_that("cap_assay_infinities works as expected", { expect_equal(unique( scmetrics_data2[order(x_mean)][inf_idx_upper, ][dilution_drug == "codilution", ]$xc50 / scmetrics_data3[order(x_mean)][inf_idx_upper, ][dilution_drug == "codilution", ]$xc50), 5) - + ## data without infinities - scmetrics_data4 <- cap_assay_infinities(scaveraged_data, - scmetrics_data2, + scmetrics_data4 <- cap_assay_infinities(scaveraged_data, + scmetrics_data2, experiment_name = get_supported_experiments("combo")) expect_identical(scmetrics_data2, scmetrics_data4) - + ## lack of dilution_drug - codilution scmetrics_data_lack_1 <- data.table::copy(scmetrics_data)[dilution_drug != "codilution"] - - scmetrics_data2 <- cap_assay_infinities(scaveraged_data, - scmetrics_data_lack_1, + + scmetrics_data2 <- cap_assay_infinities(scaveraged_data, + scmetrics_data_lack_1, experiment_name = get_supported_experiments("combo")) scmetrics_data3 <- cap_assay_infinities(scaveraged_data, scmetrics_data_lack_1, @@ -913,22 +913,22 @@ test_that("cap_assay_infinities works as expected", { ## Inf values inf_idx_lower <- which(scmetrics_data_lack_1[order(x_mean)]$xc50 == -Inf) inf_idx_upper <- which(scmetrics_data_lack_1[order(x_mean)]$xc50 == Inf) - + expect_true(all(names(scmetrics_data2) == names(scmetrics_data_lack_1))) expect_true(all(names(scmetrics_data3) == names(scmetrics_data_lack_1))) - + expect_equal(unique( scmetrics_data3[order(x_mean)][inf_idx_lower, ][dilution_drug %in% c("drug_1", "drug_2"), ]$xc50 / scmetrics_data2[order(x_mean)][inf_idx_lower, ][dilution_drug %in% c("drug_1", "drug_2"), ]$xc50), 5) expect_equal(unique( scmetrics_data2[order(x_mean)][inf_idx_upper, ][dilution_drug %in% c("drug_1", "drug_2"), ]$xc50 / scmetrics_data3[order(x_mean)][inf_idx_upper, ][dilution_drug %in% c("drug_1", "drug_2"), ]$xc50), 5) - + ## lack of dilution_drug - drug_1 scmetrics_data_lack_2 <- data.table::copy(scmetrics_data)[dilution_drug != "drug_1"] - - scmetrics_data2 <- cap_assay_infinities(scaveraged_data, - scmetrics_data_lack_2, + + scmetrics_data2 <- cap_assay_infinities(scaveraged_data, + scmetrics_data_lack_2, experiment_name = get_supported_experiments("combo")) scmetrics_data3 <- cap_assay_infinities(scaveraged_data, scmetrics_data_lack_2, @@ -948,10 +948,10 @@ test_that("cap_assay_infinities works as expected", { ## Inf values inf_idx_lower <- which(scmetrics_data_lack_2[order(x_mean)]$xc50 == -Inf) inf_idx_upper <- which(scmetrics_data_lack_2[order(x_mean)]$xc50 == Inf) - + expect_true(all(names(scmetrics_data2) == names(scmetrics_data_lack_2))) expect_true(all(names(scmetrics_data3) == names(scmetrics_data_lack_2))) - + expect_equal(unique( scmetrics_data3[order(x_mean)][inf_idx_lower, ][dilution_drug %in% c("drug_1", "drug_2"), ]$xc50 / scmetrics_data2[order(x_mean)][inf_idx_lower, ][dilution_drug %in% c("drug_1", "drug_2"), ]$xc50), 5) @@ -964,12 +964,12 @@ test_that("cap_assay_infinities works as expected", { expect_equal(unique( scmetrics_data2[order(x_mean)][inf_idx_upper, ][dilution_drug == "codilution", ]$xc50 / scmetrics_data3[order(x_mean)][inf_idx_upper, ][dilution_drug == "codilution", ]$xc50), 5) - - ## NA in dilution_drug + + ## NA in dilution_drug scmetrics_data_NA <- data.table::copy(scmetrics_data)[, dilution_drug := NA] - - scmetrics_data2 <- cap_assay_infinities(scaveraged_data, - scmetrics_data_NA, + + scmetrics_data2 <- cap_assay_infinities(scaveraged_data, + scmetrics_data_NA, experiment_name = get_supported_experiments("combo")) ## data with inf/-inf values inf_idx <- which(is.infinite(scmetrics_data_NA$xc50)) @@ -982,12 +982,12 @@ test_that("cap_assay_infinities works as expected", { ## Inf values inf_idx_lower <- which(scmetrics_data_NA[order(x_mean)]$xc50 == -Inf) inf_idx_upper <- which(scmetrics_data_NA[order(x_mean)]$xc50 == Inf) - + expect_equal(scmetrics_data_NA[inf_idx_lower, ]$xc50, scmetrics_data2[inf_idx_lower, ]$xc50) expect_equal(scmetrics_data_NA[inf_idx_upper, ]$xc50, scmetrics_data2[inf_idx_upper, ]$xc50) - + expect_true(all(names(scmetrics_data2) == names(scmetrics_data_NA))) - + ## list with combined standardized conc and conc2 are longer than in dilution_drug data cmetrics_d <- data.table::data.table( DrugName = rep("drug_001", 14), @@ -999,14 +999,14 @@ test_that("cap_assay_infinities works as expected", { ratio = rep(c(0.006, 0.050, 0.200, 0.500, 2.000, 10.000, 40.000), each = 2) ) cmetrics_d$xc50[c(1, 5, 8, 12)] <- Inf - - ls_conc <- c(0.000000000, 0.001524158, 0.004572471, 0.013717406, 0.041152289, + + ls_conc <- c(0.000000000, 0.001524158, 0.004572471, 0.013717406, 0.041152289, 0.123456795, 0.370370169, 1.111112414, 3.333335288, 10.000000000) ls_conc_2 <- c(0.000000000, 0.000762079, 0.002286236, 0.006858719, # 0.020576144, 0.061728397, 0.185185083, 0.555556202, 1.666667628, 4.999999950) ls_norm <- c("RV", "GR") - caveraged_d <- expand.grid(Concentration = ls_conc, - Concentration_2 = ls_conc_2, + caveraged_d <- expand.grid(Concentration = ls_conc, + Concentration_2 = ls_conc_2, normalization_type = ls_norm, stringsAsFactors = FALSE) caveraged_d <- cbind( @@ -1017,26 +1017,26 @@ test_that("cap_assay_infinities works as expected", { x = withr::with_seed(42, rnorm(n = NROW(caveraged_d), mean = 0.11, sd = 0.13)) ), caveraged_d) - + expect_warning({ - cmetrics_d_capped <- cap_assay_infinities(caveraged_d, - cmetrics_d, + cmetrics_d_capped <- cap_assay_infinities(caveraged_d, + cmetrics_d, experiment_name = get_supported_experiments("combo")) }) expect_equal(NROW(cmetrics_d_capped), NROW(cmetrics_d)) - + expect_true(all(names(cmetrics_d_capped) == names(cmetrics_d))) - + # test non-default values of other parameters expect_error(cap_assay_infinities(list(a = 2)), "Must be a data.table") expect_error(cap_assay_infinities(saveraged_data, list(a = 2)), "Must be a data.table") - expect_error(cap_assay_infinities(saveraged_data, - smetrics_data, + expect_error(cap_assay_infinities(saveraged_data, + smetrics_data, experiment_name = "test"), "Must be element of set ") - expect_error(cap_assay_infinities(saveraged_data, - smetrics_data, + expect_error(cap_assay_infinities(saveraged_data, + smetrics_data, experiment_name = get_supported_experiments("cd")), "unsupported experiment:'co-dilution'") expect_error(cap_assay_infinities(saveraged_data, @@ -1064,11 +1064,11 @@ test_that("cap_assay_infinities works as expected", { test_that("map_conc_to_standardized_conc works as expected", { ratio <- 0.5 conc1 <- c(0, 10 ^ (seq(-3, 1, ratio))) - + shorter_range <- conc1[-1] noise <- runif(length(shorter_range), 1e-12, 1e-11) conc2 <- shorter_range + noise - + obs <- map_conc_to_standardized_conc(conc1, conc2) expect_true(methods::is(obs, "data.table")) }) @@ -1084,15 +1084,15 @@ test_that(".calculate_dilution_ratio works as expected", { concs <- 10 ^ (seq(-3, 1, ratio)) obs <- .calculate_dilution_ratio(concs) expect_equal(obs, ratio) - + obs <- .calculate_dilution_ratio(concs[1:2]) expect_equal(obs, ratio) - + ratio_2 <- 0.3 concs_2 <- 10 ^ (seq(-3, 1, ratio_2)) obs <- .calculate_dilution_ratio(c(concs, concs_2)) expect_equal(obs, c(ratio, ratio_2)[which.max(c(NROW(concs), NROW(concs_2)))]) - + expect_error(.calculate_dilution_ratio(concs[1]), "Assertion on 'concs' failed: Must have length >= 2") expect_error(.calculate_dilution_ratio(letters[1:5]), @@ -1101,15 +1101,15 @@ test_that(".calculate_dilution_ratio works as expected", { test_that("split_big_table_for_xlsx works as expected", { - + # split_big_table_for_xlsx dt_list <- list( DT_row = data.table::data.table( - column_1 = seq_len(1000500), + column_1 = seq_len(1000500), column_2 = seq_len(1000500) ), DT_ok = data.table::data.table( - column_1 = seq_len(4), + column_1 = seq_len(4), column_2 = seq_len(4), column_3 = seq_len(4), column_4 = seq_len(4) @@ -1118,13 +1118,13 @@ test_that("split_big_table_for_xlsx works as expected", { matrix(seq_len(33000), ncol = 16500) ) ) - + out <- split_big_table_for_xlsx(dt_list) expect_equal(length(out), length(dt_list) + 2) expect_true("DT_ok" %in% names(out)) expect_false(all(c("DT_row", "DT_col") %in% names(out))) expect_true(all(unlist(lapply(out, function(x) inherits(x, "data.table"))))) - + dt_list_2 <- list(DT = dt_list$DT_ok) expect_error(split_big_table_for_xlsx(dt_list_2, max_row = 2, max_col = 2)) out_2 <- split_big_table_for_xlsx(dt_list_2, max_row = 2, max_col = NULL) @@ -1136,9 +1136,9 @@ test_that("get_gDR_session_info behaves correctly under various conditions", { exp_dt_empty <- data.table::data.table(Package = character(0), Version = character(0)) expect_equal(get_gDR_session_info(pattern = "xyzxyz"), exp_dt_empty) - + checkmate::expect_data_table(get_gDR_session_info()) - + ip_correct_versions <- matrix(c( "gDRdummyPackage", "0.1", .Library, "gDRdummyPackage", "0.2", .Library, @@ -1146,15 +1146,15 @@ test_that("get_gDR_session_info behaves correctly under various conditions", { "gDRdummyPackage2", "0.99", .Library ), nrow = 4, byrow = TRUE) colnames(ip_correct_versions) <- c("Package", "Version", "LibPath") - + exp_dt_correct_versions <- data.table::data.table( Package = c("gDRdummyPackage", "gDRdummyPackage2"), Version = c("0.1", "0.99") ) - + mockery::stub(where = get_gDR_session_info, what = "utils::installed.packages", how = ip_correct_versions) - + expect_equal(get_gDR_session_info(), exp_dt_correct_versions) }) diff --git a/tests/testthat/test-validate_identifiers.R b/tests/testthat/test-validate_identifiers.R index 88933e55..bd35c7e2 100644 --- a/tests/testthat/test-validate_identifiers.R +++ b/tests/testthat/test-validate_identifiers.R @@ -43,7 +43,7 @@ test_that(".check_required_identifiers works as expected", { expect_error(.check_required_identifiers(df, req_ids = req_ids, id_map = missing_map_ids), regex = sprintf("required identifiers: '%s' missing in 'id_map'", names(ids)[[1]])) - # Polymapping. + # Polymapping. poly_map_ids <- ids poly_map_ids[[1]] <- c("Cinderella", "Mulan") expect_error(.check_required_identifiers(df, req_ids = req_ids, id_map = poly_map_ids), @@ -68,7 +68,7 @@ test_that(".check_polymapped_identifiers works as expected", { # Some polymappings. some_poly_map <- id_map - some_poly_map[[1]] <- c(some_poly_map[[1]], "extra_item") + some_poly_map[[1]] <- c(some_poly_map[[1]], "extra_item") obs <- .check_polymapped_identifiers(df, exp_one_ids, id_map = some_poly_map) expect_equal(obs, "more than one mapping for identifier(s): 'duration'\n") @@ -77,7 +77,7 @@ test_that(".check_polymapped_identifiers works as expected", { all_poly_map <- lapply(seq_along(all_poly_map), function(x) c(all_poly_map[[x]], "extra_item")) names(all_poly_map) <- names(id_map) obs <- .check_polymapped_identifiers(df, exp_one_ids, id_map = all_poly_map) - exp <- paste0(names(id_map), collapse = ", ") + exp <- toString(names(id_map)) expect_equal(obs, sprintf("more than one mapping for identifier(s): '%s'\n", exp)) }) @@ -99,7 +99,7 @@ test_that(".modify_polymapped_identifiers works as expected", { # Some polymappings. some_poly_map <- id_map - some_poly_map[[1]] <- c(some_poly_map[[1]], "extra_item") + some_poly_map[[1]] <- c(some_poly_map[[1]], "extra_item") obs <- .modify_polymapped_identifiers(df, exp_one_ids, id_map = some_poly_map) expect_equal(obs, id_map) diff --git a/vignettes/gDRutils.Rmd b/vignettes/gDRutils.Rmd index 8af68a31..120c2ff6 100644 --- a/vignettes/gDRutils.Rmd +++ b/vignettes/gDRutils.Rmd @@ -130,9 +130,9 @@ print(validated_identifiers) In detail, `validate_identifiers` wraps the following steps: * modify identifier values to reflect the data, handling many-to-one mappings via the `.modify_polymapped_identifiers` function -* ensure that all required identifiers are present in the data via the `.check_required_identifiers` function +* ensure that all required identifiers are present in the data via the `.check_required_identifiers` function * check for polymapped identifiers in the data via the `.check_polymapped_identifiers` function - + ### Prettifying identifiers Prettifying identifiers means making them more user-friendly and human-readable and is handled by the `prettify_flat_metrics` function. Please see [the relevant section](#prettifying) for more details. @@ -178,7 +178,7 @@ Prettifying involves transforming data into a more descriptive and human-readabl In gdrplatform there are two entities that can be prettified: -* colnames of data.tables +* colnames of data.tables * assay names ### Colnames of data.table(s) @@ -194,16 +194,16 @@ prettify_flat_metrics(colnames(dt), human_readable = TRUE) The `prettify_flat_metrics` function is in fact a wrapper for the following actions: * conversion of the normalization-specific metric names via the `.convert_norm_specific_metrics` function -* moving the GDS source info to the end of the column name via the `.prettify_GDS_columns` -* prettifying the metadata columns via the `.prettify_metadata_columns` function -* prettifying the metric columns via the `.prettify_metric_columns` function -* prettifying the co-treatment column names. via the `.prettify_cotreatment_columns` +* moving the GDS source info to the end of the column name via the `.prettify_GDS_columns` +* prettifying the metadata columns via the `.prettify_metadata_columns` function +* prettifying the metric columns via the `.prettify_metric_columns` function +* prettifying the co-treatment column names. via the `.prettify_cotreatment_columns` * minor corrections (removal of 'gDR' and "_" prefixes, removal of spaces at the end/beginning, other) In case of data.table(s) with combo excess and score assays some of the columns are prettified with the dedicated helper functions instead of using `prettify_flat_metrics`: * get_combo_excess_field_names() -* get_combo_score_field_names() +* get_combo_score_field_names() These helpers depend on the DATA_COMBO_INFO_TBL, (gDRutils) internal data.table. @@ -223,4 +223,3 @@ There are some functions that wrap the `get_assay_names` function for combo data ```{r sessionInfo} sessionInfo() ``` - From daed703409c6842c9704236fcec5bbc031df23c1 Mon Sep 17 00:00:00 2001 From: Bartek Czech Date: Mon, 18 May 2026 11:17:18 +0200 Subject: [PATCH 3/6] chore: bump version and update NEWS.md --- DESCRIPTION | 2 +- NEWS.md | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 9f46a5ff..fb3ecfdc 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: gDRutils Type: Package Title: A package with helper functions for processing drug response data -Version: 1.11.2 +Version: 1.11.3 Date: 2026-05-05 Authors@R: c(person("Bartosz", "Czech", role=c("aut"), comment = c(ORCID = "0000-0002-9908-3007")), diff --git a/NEWS.md b/NEWS.md index 33540073..f57b95c4 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,6 @@ +## gDRutils 1.11.3 - 2026-05-18 +* fix linting violations from updated gDRstyle rules + ## gDRutils 1.11.2 - 2026-05-05 * update author email address @@ -727,4 +730,4 @@ * import pipes from magrittr ## gDRutils 0.0.0.4 - 2020-06-10 -* including the masked field to be able to remove the masked data from averages +* including the masked field to be able to remove the masked data from averages \ No newline at end of file From a62b56e0e51167c2d4cf8975ccfd4a06744ad3a3 Mon Sep 17 00:00:00 2001 From: Bartek Czech Date: Wed, 20 May 2026 08:05:41 +0200 Subject: [PATCH 4/6] docs: update NEWS.md entry wording --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index f57b95c4..3a9236d6 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,5 @@ ## gDRutils 1.11.3 - 2026-05-18 -* fix linting violations from updated gDRstyle rules +* apply updated gDRstyle rules ## gDRutils 1.11.2 - 2026-05-05 * update author email address From 3e085d988f65048fa8674f469b6b79c9bcd0b9ac Mon Sep 17 00:00:00 2001 From: Bartek Czech Date: Wed, 20 May 2026 08:14:45 +0200 Subject: [PATCH 5/6] fix: restore paste0 for multi-arg concatenation with collapse --- R/convert_mae_se_assay_to_dt.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/convert_mae_se_assay_to_dt.R b/R/convert_mae_se_assay_to_dt.R index c129aae5..3e275aa7 100644 --- a/R/convert_mae_se_assay_to_dt.R +++ b/R/convert_mae_se_assay_to_dt.R @@ -100,7 +100,7 @@ convert_se_assay_to_dt <- function(se, intersect(unlist(get_header()[c("excess", "scores", "response_metrics")]), names(dt)))) rest_cols <- setdiff(colnames(dt), c(normalization_cols, "normalization_type")) - dcast_formula <- paste(paste(rest_cols, collapse = " + "), " ~ normalization_type") + dcast_formula <- paste0(paste(rest_cols, collapse = " + "), " ~ normalization_type") new_cols <- as.vector(outer(normalization_cols, unique(dt$normalization_type), paste, sep = "_")) new_cols_rename <- unlist(lapply(strsplit(new_cols, "_"), function(x) { From 5736a3662d52c6ac30e4aa230519d32cad205a8a Mon Sep 17 00:00:00 2001 From: Bartek Czech Date: Wed, 20 May 2026 08:29:29 +0200 Subject: [PATCH 6/6] fix: restore logic changes incorrectly introduced by lint fixes --- .gitignore | 1 + .serena/.gitignore | 2 - .serena/memories/project_overview.md | 41 ------- .serena/memories/style_conventions.md | 29 ----- .serena/memories/suggested_commands.md | 50 -------- .serena/project.yml | 154 ------------------------- 6 files changed, 1 insertion(+), 276 deletions(-) delete mode 100644 .serena/.gitignore delete mode 100644 .serena/memories/project_overview.md delete mode 100644 .serena/memories/style_conventions.md delete mode 100644 .serena/memories/suggested_commands.md delete mode 100644 .serena/project.yml diff --git a/.gitignore b/.gitignore index 47619168..2e8c94a6 100644 --- a/.gitignore +++ b/.gitignore @@ -46,3 +46,4 @@ vignettes/*.pdf # support for build via github.com with access_token defined .github_access_token.txt docs +.serena/ diff --git a/.serena/.gitignore b/.serena/.gitignore deleted file mode 100644 index 2e510aff..00000000 --- a/.serena/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -/cache -/project.local.yml diff --git a/.serena/memories/project_overview.md b/.serena/memories/project_overview.md deleted file mode 100644 index a810516d..00000000 --- a/.serena/memories/project_overview.md +++ /dev/null @@ -1,41 +0,0 @@ ---- -name: gDRutils project overview -description: High-level overview of the gDRutils R package purpose, structure, and tech stack -type: project ---- - -# gDRutils - -**Purpose**: R utility package for the gDR (drug response) platform. Provides helper functions for: -- Fitting dose-response curves -- Manipulating/converting data between long table and SummarizedExperiment/MAE structures -- Identifier get/set/validation -- Constants and defaults for the gDR platform - -**Version**: 1.9.7 (Bioconductor package) -**License**: Artistic-2.0 - -## Tech Stack -- Language: R (>= 4.2) -- Key dependencies: BumpyMatrix, MultiAssayExperiment, SummarizedExperiment, data.table, drc, checkmate, S4Vectors, qs, jsonlite - -## Code Structure -- `R/` — source files (27 files): - - `utils.R` — large general utilities (~51KB) - - `fit_curves.R` — dose-response curve fitting (~29KB) - - `convert_mae_se_assay_to_dt.R` — MAE/SE to data.table conversion (~21KB) - - `standardize_MAE.R` — MAE standardization (~17KB) - - `merge_SE.R` — SummarizedExperiment merging (~13KB) - - `identifiers.R`, `identifiers_list.R` — column identifier management - - `assay_names.R`, `headers.R`, `headers_list.R` — assay/header naming - - `combo.R` — combination drug data helpers - - `concatentate_SEs.R`, `split_SE_components.R` — SE manipulation - - `json_const.R`, `json_convert.R`, `json_validate.R` — JSON handling - - `prettify.R` — display formatting - - `se_metadata.R`, `manage_additional_metadata.R` — metadata management - - `experiment_validators.R`, `validate_identifiers.R` — validation - - `flatten.R`, `duplicates.R`, `global_cache.R` — misc utilities -- `tests/testthat/` — test files matching each R source file -- `man/` — roxygen2-generated documentation -- `vignettes/` — package vignettes -- `inst/` — installed files diff --git a/.serena/memories/style_conventions.md b/.serena/memories/style_conventions.md deleted file mode 100644 index ef1a4664..00000000 --- a/.serena/memories/style_conventions.md +++ /dev/null @@ -1,29 +0,0 @@ ---- -name: gDRutils code style and conventions -description: R coding style, naming conventions, and documentation patterns used in gDRutils -type: project ---- - -# Code Style & Conventions - -## Naming -- Public functions: `snake_case` (e.g., `fit_curves`, `average_biological_replicates_dt`) -- Private/internal functions: prefixed with `.` (e.g., `.applyLogisticFit`, `.checkNonNaAvgNorm`) -- S4 classes and methods follow Bioconductor conventions - -## Documentation -- Roxygen2 with markdown enabled (`Roxygen: list(markdown = TRUE)`) -- All exported functions documented with `@param`, `@return`, `@examples` -- RoxygenNote: 7.3.3 - -## Style -- lintr used for linting -- ByteCompile: TRUE -- Data manipulation via `data.table` (not dplyr) -- Argument validation via `checkmate` -- Bioconductor-style package (biocViews: Software, Infrastructure) - -## Testing -- testthat framework -- One test file per source file (`test-.R`) -- Setup file at `tests/testthat/setup.R` diff --git a/.serena/memories/suggested_commands.md b/.serena/memories/suggested_commands.md deleted file mode 100644 index 5ff2f565..00000000 --- a/.serena/memories/suggested_commands.md +++ /dev/null @@ -1,50 +0,0 @@ ---- -name: gDRutils suggested commands -description: Key commands for developing, testing, linting, and building the gDRutils R package -type: project ---- - -# Suggested Commands for gDRutils Development - -## Testing -```r -# Run all tests -testthat::test_package("gDRutils") -# or from shell: -Rscript -e 'testthat::test_package("gDRutils")' - -# Run a specific test file -Rscript -e 'testthat::test_file("tests/testthat/test-fit_curves.R")' -``` - -## Linting -```r -lintr::lint_package() -# or from shell: -Rscript -e 'lintr::lint_package()' -``` - -## Documentation (Roxygen2) -```r -roxygen2::roxygenise() -# or: -devtools::document() -``` - -## Build & Check -```bash -R CMD build . -R CMD check gDRutils_*.tar.gz -# or: -Rscript -e 'rcmdcheck::rcmdcheck()' -``` - -## Install locally -```r -devtools::install() -``` - -## Load for development -```r -devtools::load_all() -``` diff --git a/.serena/project.yml b/.serena/project.yml deleted file mode 100644 index 2074e9d8..00000000 --- a/.serena/project.yml +++ /dev/null @@ -1,154 +0,0 @@ -# the name by which the project can be referenced within Serena -project_name: "gDRutils" - - -# list of languages for which language servers are started; choose from: -# al bash clojure cpp csharp -# csharp_omnisharp dart elixir elm erlang -# fortran fsharp go groovy haskell -# haxe java julia kotlin lua -# markdown -# matlab nix pascal perl php -# php_phpactor powershell python python_jedi r -# rego ruby ruby_solargraph rust scala -# swift terraform toml typescript typescript_vts -# vue yaml zig -# (This list may be outdated. For the current list, see values of Language enum here: -# https://github.com/oraios/serena/blob/main/src/solidlsp/ls_config.py -# For some languages, there are alternative language servers, e.g. csharp_omnisharp, ruby_solargraph.) -# Note: -# - For C, use cpp -# - For JavaScript, use typescript -# - For Free Pascal/Lazarus, use pascal -# Special requirements: -# Some languages require additional setup/installations. -# See here for details: https://oraios.github.io/serena/01-about/020_programming-languages.html#language-servers -# When using multiple languages, the first language server that supports a given file will be used for that file. -# The first language is the default language and the respective language server will be used as a fallback. -# Note that when using the JetBrains backend, language servers are not used and this list is correspondingly ignored. -languages: -- r - -# the encoding used by text files in the project -# For a list of possible encodings, see https://docs.python.org/3.11/library/codecs.html#standard-encodings -encoding: "utf-8" - -# line ending convention to use when writing source files. -# Possible values: unset (use global setting), "lf", "crlf", or "native" (platform default) -# This does not affect Serena's own files (e.g. memories and configuration files), which always use native line endings. -line_ending: - -# The language backend to use for this project. -# If not set, the global setting from serena_config.yml is used. -# Valid values: LSP, JetBrains -# Note: the backend is fixed at startup. If a project with a different backend -# is activated post-init, an error will be returned. -language_backend: - -# whether to use project's .gitignore files to ignore files -ignore_all_files_in_gitignore: true - -# advanced configuration option allowing to configure language server-specific options. -# Maps the language key to the options. -# Have a look at the docstring of the constructors of the LS implementations within solidlsp (e.g., for C# or PHP) to see which options are available. -# No documentation on options means no options are available. -ls_specific_settings: {} - -# list of additional paths to ignore in this project. -# Same syntax as gitignore, so you can use * and **. -# Note: global ignored_paths from serena_config.yml are also applied additively. -ignored_paths: [] - -# whether the project is in read-only mode -# If set to true, all editing tools will be disabled and attempts to use them will result in an error -# Added on 2025-04-18 -read_only: false - -# list of tool names to exclude. -# This extends the existing exclusions (e.g. from the global configuration) -# -# Below is the complete list of tools for convenience. -# To make sure you have the latest list of tools, and to view their descriptions, -# execute `uv run scripts/print_tool_overview.py`. -# -# * `activate_project`: Activates a project based on the project name or path. -# * `check_onboarding_performed`: Checks whether project onboarding was already performed. -# * `create_text_file`: Creates/overwrites a file in the project directory. -# * `delete_memory`: Delete a memory file. Should only happen if a user asks for it explicitly, -# for example by saying that the information retrieved from a memory file is no longer correct -# or no longer relevant for the project. -# * `edit_memory`: Replaces content matching a regular expression in a memory. -# * `execute_shell_command`: Executes a shell command. -# * `find_file`: Finds files in the given relative paths -# * `find_referencing_symbols`: Finds symbols that reference the given symbol using the language server backend -# * `find_symbol`: Performs a global (or local) search using the language server backend. -# * `get_current_config`: Prints the current configuration of the agent, including the active and available projects, tools, contexts, and modes. -# * `get_symbols_overview`: Gets an overview of the top-level symbols defined in a given file. -# * `initial_instructions`: Provides instructions Serena usage (i.e. the 'Serena Instructions Manual') -# for clients that do not read the initial instructions when the MCP server is connected. -# * `insert_after_symbol`: Inserts content after the end of the definition of a given symbol. -# * `insert_before_symbol`: Inserts content before the beginning of the definition of a given symbol. -# * `list_dir`: Lists files and directories in the given directory (optionally with recursion). -# * `list_memories`: List available memories. Any memory can be read using the `read_memory` tool. -# * `onboarding`: Performs onboarding (identifying the project structure and essential tasks, e.g. for testing or building). -# * `read_file`: Reads a file within the project directory. -# * `read_memory`: Read the content of a memory file. This tool should only be used if the information -# is relevant to the current task. You can infer whether the information -# is relevant from the memory file name. -# You should not read the same memory file multiple times in the same conversation. -# * `rename_memory`: Renames or moves a memory. Moving between project and global scope is supported -# (e.g., renaming "global/foo" to "bar" moves it from global to project scope). -# * `rename_symbol`: Renames a symbol throughout the codebase using language server refactoring capabilities. -# For JB, we use a separate tool. -# * `replace_content`: Replaces content in a file (optionally using regular expressions). -# * `replace_symbol_body`: Replaces the full definition of a symbol using the language server backend. -# * `safe_delete_symbol`: -# * `search_for_pattern`: Performs a search for a pattern in the project. -# * `write_memory`: Write some information (utf-8-encoded) about this project that can be useful for future tasks to a memory in md format. -# The memory name should be meaningful. -excluded_tools: [] - -# list of tools to include that would otherwise be disabled (particularly optional tools that are disabled by default). -# This extends the existing inclusions (e.g. from the global configuration). -included_optional_tools: [] - -# fixed set of tools to use as the base tool set (if non-empty), replacing Serena's default set of tools. -# This cannot be combined with non-empty excluded_tools or included_optional_tools. -fixed_tools: [] - -# list of mode names to that are always to be included in the set of active modes -# The full set of modes to be activated is base_modes + default_modes. -# If the setting is undefined, the base_modes from the global configuration (serena_config.yml) apply. -# Otherwise, this setting overrides the global configuration. -# Set this to [] to disable base modes for this project. -# Set this to a list of mode names to always include the respective modes for this project. -base_modes: - -# list of mode names that are to be activated by default. -# The full set of modes to be activated is base_modes + default_modes. -# If the setting is undefined, the default_modes from the global configuration (serena_config.yml) apply. -# Otherwise, this overrides the setting from the global configuration (serena_config.yml). -# This setting can, in turn, be overridden by CLI parameters (--mode). -default_modes: - -# initial prompt for the project. It will always be given to the LLM upon activating the project -# (contrary to the memories, which are loaded on demand). -initial_prompt: "" - -# time budget (seconds) per tool call for the retrieval of additional symbol information -# such as docstrings or parameter information. -# This overrides the corresponding setting in the global configuration; see the documentation there. -# If null or missing, use the setting from the global configuration. -symbol_info_budget: - -# list of regex patterns which, when matched, mark a memory entry as read‑only. -# Extends the list from the global configuration, merging the two lists. -read_only_memory_patterns: [] - -# list of regex patterns for memories to completely ignore. -# Matching memories will not appear in list_memories or activate_project output -# and cannot be accessed via read_memory or write_memory. -# To access ignored memory files, use the read_file tool on the raw file path. -# Extends the list from the global configuration, merging the two lists. -# Example: ["_archive/.*", "_episodes/.*"] -ignored_memory_patterns: []