diff --git a/DESCRIPTION b/DESCRIPTION index 6acb9be..fbff30f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -13,4 +13,4 @@ LazyData: true Imports: compositions, ggplot2 Suggests: testthat -RoxygenNote: 7.1.2 +RoxygenNote: 7.2.3 diff --git a/R/check_input_args.R b/R/check_input_args.R index 59180a8..d5b45de 100644 --- a/R/check_input_args.R +++ b/R/check_input_args.R @@ -6,6 +6,7 @@ #' @param dataf A \code{data.frame} containing data #' @param y Name (as string/character vector of length 1) of outcome variable in \code{dataf} #' @param comps Character vector of names of compositions in \code{dataf}. See details for more information. +#' @param comps_fup Character vector of names of compositions at follow up in \code{dataf}. See details for more information. #' @param covars Character vector of covariates names (non-comp variables) in \code{dataf} or NULL for none (default). #' @param deltas A vector of time-component changes (as proportions of compositions , i.e., values between -1 and 1). Optional. #' @export @@ -22,10 +23,7 @@ # # - - - -check_input_args <- function(dataf, y, comps, covars, deltas) { +check_input_args <- function(dataf, y, comps, comps_fup, covars, deltas) { if (!is.data.frame(dataf)) { @@ -46,6 +44,14 @@ check_input_args <- function(dataf, y, comps, covars, deltas) { stop("At least two compositional components are required to create an ilr linear regression.") } + if (!is_null_or_na(comps_fup)) { + if (!is.character(comps_fup)) { + stop("Please supply a character string of the compositional component column names in dataf.") + } else if (length(comps_fup) < 2) { + stop("At least two compositional components are required to create an ilr linear regression.") + } + } + if (!is_null_or_na(covars) & !is.character(covars)) { stop("Please supply a character string of the covariate column names in dataf (optionally NULL or NA for no covariates).") } diff --git a/R/plot_delta_comp.R b/R/plot_delta_comp.R index a70356f..6c1408e 100644 --- a/R/plot_delta_comp.R +++ b/R/plot_delta_comp.R @@ -53,7 +53,7 @@ globalVariables(c( plot_delta_comp <- function(dc_obj, comp_total = NULL, units_lab = NULL) { - + if (!is_deltacomp_obj(dc_obj)) { stop("Input needs to be a deltacomp object. i.e., data.frame returned by predict_delta_comps().") } @@ -104,7 +104,7 @@ plot_delta_comp <- function(dc_obj, comp_total = NULL, units_lab = NULL) { ) + theme(legend.position = "none") - + if (comparisons == "one-v-one") { ggp <- ggp + facet_grid(`comp-` ~ `comp+`, labeller = label_parsed) @@ -114,10 +114,6 @@ plot_delta_comp <- function(dc_obj, comp_total = NULL, units_lab = NULL) { } return(ggp) - - + + } - - - - diff --git a/R/predict_delta_comps.R b/R/predict_delta_comps.R index 52c8aca..8d069d0 100644 --- a/R/predict_delta_comps.R +++ b/R/predict_delta_comps.R @@ -4,11 +4,13 @@ #' @description Provided the data (containing outcome, compositional components and covariates), fit a ilr multiple linear regression model and provide predictions from reallocating compositional values pairwise amunsnst the components model. #' @param dataf A \code{data.frame} containing data #' @param y Name (as string/character vector of length 1) of outcome variable in \code{dataf} -#' @param comps Character vector of names of compositions in \code{dataf}. See details for more information. +#' @param comps Optional character vector of names of compositions in \code{dataf}. See details for more information. +#' @param comps_fup Character vector of names of compositions at follow-up in \code{dataf}. See details for more information. #' @param covars Optional. Character vector of covariates names (non-comp variables) in \code{dataf}. Defaults to NULL. #' @param deltas A vector of time-component changes (as proportions of compositions , i.e., values between -1 and 1). Optional. #' Changes in compositions to be computed pairwise. Defaults to 0, 10 and 20 minutes as a proportion of the 1440 minutes #' in a day (i.e., approximately \code{0.000}, \code{0.007} and \code{0.014}). +#' @param analysis_type Currently two choices: \code{"cross-sectional"} (default) or \code{"longitudinal"}. Please see details for explanation of these methods. #' @param comparisons Currently two choices: \code{"one-v-one"} or \code{"prop-realloc"} (default). Please see details for explanation of these methods. #' @param alpha Optional. Level of significance. Defaults to 0.05. #' @export @@ -19,6 +21,9 @@ #' Please see the \code{deltacomp} package \code{README.md} file for examples and explanation of the \code{comparisons = "prop-realloc"} and \code{comparisons = "one-v-one"} options. #' #' Note from version 0.1.0 to current version, \code{comparisons == "one-v-all"} is depreciated, \code{comparisons == "prop-realloc"} is probably the alternative you are after. +#' +#' \code{analysis_type == "longitudinal"} expects a second measurement of the composition (\code{comps_fup}). If longitudinal, the ilr coordinates of the follow-up composition will be calculated and used as covariates to adjust the regression model. +#' #' @examples #' predict_delta_comps( #' dataf = fat_data, @@ -40,29 +45,33 @@ #' comparisons = "one-v-one", #' alpha = 0.05 #' ) -#' predict_delta_comps <- function( - dataf, # data.frame of data - y, # character name of outcome in dataf - comps, # character vector of names of compositions in dataf - covars = NULL, # character vector of names of covariates (non-comp variables) in dataf - deltas = c(0, 10, 20) / (24 * 60), # changes in compositions to be computed pairwise - comparisons = c("prop-realloc", "one-v-one")[1], - alpha = 0.05 + dataf, # data.frame of data + y, # character name of outcome in dataf + comps, # character vector of names of compositions in dataf + comps_fup = NULL, # character vector of names of compositions at follow up in dataf + covars = NULL, # character vector of names of covariates (non-comp variables) in dataf + analysis_type = c("cross-sectional", "longitudinal")[1], + comparisons = c("prop-realloc", "one-v-one")[1], + deltas = c(0, 10, 20) / (24 * 60), # changes in compositions to be computed pairwise + alpha = 0.05 ){ - + # perform some basic input checks - throws errors where input incorrect - check_input_args(dataf, y, comps, covars, deltas) + check_input_args(dataf, y, comps, comps_fup, covars, deltas) if (is_null_or_na(covars)) { # convert 0 length vecs and NAs to NULL covars <- NULL } - dataf <- rm_na_data_rows(dataf, c(y, comps, covars)) + if (is_null_or_na(comps_fup)) { # convert 0 length vecs and NAs to NULL + comps_fup <- NULL + } + dataf <- rm_na_data_rows(dataf, c(y, comps, comps_fup, covars)) # in case the data are much smaller after removing NAs - check_input_args(dataf, y, comps, covars, deltas) + check_input_args(dataf, y, comps, comps_fup, covars, deltas) check_strictly_positive_vals(dataf, comps) comparisons <- get_comp_type(comparisons) - + # set up function constants n <- nrow(dataf) n_comp <- length(comps) @@ -71,6 +80,7 @@ predict_delta_comps <- function( # standardise comps dataf <- standardise_comps(dataf, comps) + if (!is.null(comps_fup) & analysis_type == "longitudinal") dataf <- standardise_comps(dataf, comps_fup) # get the mean of the compositions on the geometric scale mean_comps <- @@ -81,6 +91,16 @@ predict_delta_comps <- function( robust = FALSE ) + if (!is.null(comps_fup) & analysis_type != "cross-sectional") { + mean_comps_fup <- + compositions::mean.acomp( + compositions::acomp( + dataf[, comps_fup] + ), + robust = FALSE + ) + } + if(!all.equal(1, sum(mean_comps), tolerance = 1e-5)) stop("Calculated mean composition does not sum to 1") @@ -91,6 +111,9 @@ predict_delta_comps <- function( ### and covariates m_cov <- NULL mean_X <- as.data.frame(t(mean_comps)) + if (!is.null(comps_fup) & analysis_type != "cross-sectional") { + mean_X = cbind(mean_X, as.data.frame(t(mean_comps_fup))) + } if (n_covar > 0) { m_cov <- get_avg_covs(dataf, covars) # testing: @@ -98,7 +121,7 @@ predict_delta_comps <- function( mean_X <- cbind(mean_X, m_cov) # print(tibble::as_tibble(mean_X)) } - + # The ilr basis matrix psi <- create_v_mat(n_comp) ### previously: @@ -107,13 +130,33 @@ predict_delta_comps <- function( # add ilr coords to dataset dataf <- append_ilr_coords(dataf, comps, psi) mean_X <- append_ilr_coords(mean_X, comps, psi) + + if (!is.null(comps_fup) & analysis_type == "longitudinal") { + # change baseline ilr names to avoid confusion + colnames(dataf)[1:(n_comp - 1)] = paste0(colnames(dataf)[1:(n_comp - 1)], "_bl") + colnames(mean_X)[1:(n_comp - 1)] = paste0(colnames(mean_X)[1:(n_comp - 1)], "_bl") + dataf <- append_ilr_coords(dataf, comps_fup, psi) + mean_X <- append_ilr_coords(mean_X, comps_fup, psi) + # change baseline ilr names to avoid confusion + colnames(dataf)[1:(n_comp - 1)] = paste0(colnames(dataf)[1:(n_comp - 1)], "_fup") + colnames(mean_X)[1:(n_comp - 1)] = paste0(colnames(mean_X)[1:(n_comp - 1)], "_fup") + # reset baseline ilr names + colnames(dataf)[(n_comp):(n_comp + (n_comp - 2))] = gsub("_bl", "", colnames(dataf)[(n_comp):(n_comp + (n_comp - 2))]) + colnames(mean_X)[(n_comp):(n_comp + (n_comp - 2))] = gsub("_bl", "", colnames(mean_X)[(n_comp):(n_comp + (n_comp - 2))]) + } # print(tibble::as_tibble(mean_X)) print_ilr_trans(comps) # print to console the ilr transformation for the user # create dataset X only consisting of outcome, ilr coords and covars ilr_names <- paste0("ilr", 1:(n_comp - 1)) + # X <- dataf[, colnames(dataf) %in% c(y, ilr_names, covars)] X <- dataf[, c(y, ilr_names, covars)] # force order + if (!is.null(comps_fup) & analysis_type == "longitudinal") { + ilr_names_fup = paste0(ilr_names, "_fup") + X <- dataf[, c(y, ilr_names, ilr_names_fup, covars)] # force order + covars = c(covars, ilr_names_fup) + } # fit model lm_X <- fit_lm(y, X) @@ -122,7 +165,6 @@ predict_delta_comps <- function( # stop one column from data.frame becoming vector compare_two_lm(y, X[, c(y, covars), drop = FALSE], X[, c(y, covars, ilr_names)]) - mean_pred <- get_mean_pred(lm_X, mean_X, alpha = alpha) # extract linear model quantities required for further calculations lm_quants <- extract_lm_quantities(lm_X, alpha = alpha) @@ -134,6 +176,7 @@ predict_delta_comps <- function( m_comps <- matrix(rep(mean_comps, n_preds), nrow = n_preds, byrow = TRUE) m_delta <- m_comps + delta_mat + m_delta_less_0 <- rowSums(m_delta < 0) if(any(m_delta_less_0 > 0)) { warning( @@ -157,12 +200,14 @@ predict_delta_comps <- function( attr(ilr_means, "class") <- NULL x0_star <- get_x0_star(lm_quants$dmX, n_preds, ilr_names, ilr_delta, ilr_means) + y0_star <- x0_star %*% lm_quants$beta_hat se_y0_star <- get_se_y0_star(x0_star, lm_quants$s_e, lm_quants$XtX_inv) # get labels and deltas for reallocations realloc_nms <- get_realloc_nms(comps, comparisons, poss_comps) + delta_list <- get_pred_deltas(delta_mat, realloc_nms) @@ -197,5 +242,5 @@ predict_delta_comps <- function( attr(ret_obj, "mean_pred") <- mean_pred return(ret_obj) - + } diff --git a/man/check_input_args.Rd b/man/check_input_args.Rd index 339592d..a27bc6b 100644 --- a/man/check_input_args.Rd +++ b/man/check_input_args.Rd @@ -4,7 +4,7 @@ \alias{check_input_args} \title{Sanity checks for arguments passed to predict_delta_comps()} \usage{ -check_input_args(dataf, y, comps, covars, deltas) +check_input_args(dataf, y, comps, comps_fup, covars, deltas) } \arguments{ \item{dataf}{A \code{data.frame} containing data} @@ -13,6 +13,8 @@ check_input_args(dataf, y, comps, covars, deltas) \item{comps}{Character vector of names of compositions in \code{dataf}. See details for more information.} +\item{comps_fup}{Character vector of names of compositions at follow up in \code{dataf}. See details for more information.} + \item{covars}{Character vector of covariates names (non-comp variables) in \code{dataf} or NULL for none (default).} \item{deltas}{A vector of time-component changes (as proportions of compositions , i.e., values between -1 and 1). Optional.} diff --git a/man/predict_delta_comps.Rd b/man/predict_delta_comps.Rd index dffbc85..e20f0ca 100644 --- a/man/predict_delta_comps.Rd +++ b/man/predict_delta_comps.Rd @@ -8,9 +8,11 @@ predict_delta_comps( dataf, y, comps, + comps_fup = NULL, covars = NULL, - deltas = c(0, 10, 20)/(24 * 60), + analysis_type = c("cross-sectional", "longitudinal")[1], comparisons = c("prop-realloc", "one-v-one")[1], + deltas = c(0, 10, 20)/(24 * 60), alpha = 0.05 ) } @@ -19,16 +21,20 @@ predict_delta_comps( \item{y}{Name (as string/character vector of length 1) of outcome variable in \code{dataf}} -\item{comps}{Character vector of names of compositions in \code{dataf}. See details for more information.} +\item{comps}{Optional character vector of names of compositions in \code{dataf}. See details for more information.} + +\item{comps_fup}{Character vector of names of compositions at follow-up in \code{dataf}. See details for more information.} \item{covars}{Optional. Character vector of covariates names (non-comp variables) in \code{dataf}. Defaults to NULL.} +\item{analysis_type}{Currently two choices: \code{"cross-sectional"} (default) or \code{"longitudinal"}. Please see details for explanation of these methods.} + +\item{comparisons}{Currently two choices: \code{"one-v-one"} or \code{"prop-realloc"} (default). Please see details for explanation of these methods.} + \item{deltas}{A vector of time-component changes (as proportions of compositions , i.e., values between -1 and 1). Optional. Changes in compositions to be computed pairwise. Defaults to 0, 10 and 20 minutes as a proportion of the 1440 minutes in a day (i.e., approximately \code{0.000}, \code{0.007} and \code{0.014}).} -\item{comparisons}{Currently two choices: \code{"one-v-one"} or \code{"prop-realloc"} (default). Please see details for explanation of these methods.} - \item{alpha}{Optional. Level of significance. Defaults to 0.05.} } \description{ @@ -41,6 +47,8 @@ values as the function normalises the compositions row-wise to sum to 1 in part Please see the \code{deltacomp} package \code{README.md} file for examples and explanation of the \code{comparisons = "prop-realloc"} and \code{comparisons = "one-v-one"} options. Note from version 0.1.0 to current version, \code{comparisons == "one-v-all"} is depreciated, \code{comparisons == "prop-realloc"} is probably the alternative you are after. + +\code{analysis_type == "longitudinal"} expects a second measurement of the composition (\code{comps_fup}). If longitudinal, the ilr coordinates of the follow-up composition will be calculated and used as covariates to adjust the regression model. } \examples{ predict_delta_comps( @@ -62,7 +70,6 @@ predict_delta_comps( comparisons = "one-v-one", alpha = 0.05 ) - } \author{ Ty Stanford diff --git a/tests/testthat/test_check_input_args.R b/tests/testthat/test_check_input_args.R index 5a30779..ee97fdd 100644 --- a/tests/testthat/test_check_input_args.R +++ b/tests/testthat/test_check_input_args.R @@ -57,30 +57,36 @@ test_that("predict_delta_comps() correctly throws errors via check_input_args() ) expect_error( - predict_delta_comps(dataf = fairclough, fa_y, "sed", fa_covars, fa_deltas, fa_comparisons, fa_alpha), + predict_delta_comps(dataf = fairclough, y = fa_y, comps = "sed", covars = fa_covars, + deltas = fa_deltas, comparisons = fa_comparisons, alpha = fa_alpha), "At least two compositional components" ) expect_error( - predict_delta_comps(dataf = fairclough, fa_y, fa_comps, 1, fa_deltas, fa_comparisons, fa_alpha), + predict_delta_comps(dataf = fairclough, y = fa_y, comps = fa_comps, covars = 1, + deltas = fa_deltas, comparisons = fa_comparisons, alpha = fa_alpha), "Please supply a character string of the covariate" ) expect_error( - predict_delta_comps(dataf = fairclough, fa_y, fa_comps, TRUE, fa_deltas, fa_comparisons, fa_alpha), + predict_delta_comps(dataf = fairclough, y = fa_y, comps = fa_comps, covars = TRUE, + deltas = fa_deltas, comparisons = fa_comparisons, alpha = fa_alpha), "Please supply a character string of the covariate" ) expect_error( - predict_delta_comps(dataf = fairclough, fa_y, fa_comps, fa_covars, -1.1, fa_comparisons, fa_alpha), + predict_delta_comps(dataf = fairclough, y = fa_y, comps = fa_comps, covars = fa_covars, + deltas = -1.1, comparisons = fa_comparisons, alpha = fa_alpha), "deltas must be specified as positive and negative proportions" ) expect_error( - predict_delta_comps(dataf = fairclough, fa_y, fa_comps, fa_covars, +1.1, fa_comparisons, fa_alpha), + predict_delta_comps(dataf = fairclough, y = fa_y, comps = fa_comps, covars = fa_covars, + deltas = +1.1, comparisons = fa_comparisons, alpha = fa_alpha), "deltas must be specified as positive and negative proportions" ) expect_error( - predict_delta_comps(dataf = fairclough[1:6, ], fa_y, fa_comps, fa_covars, fa_deltas, fa_comparisons, fa_alpha), + predict_delta_comps(dataf = fairclough[1:6,], y = fa_y, comps = fa_comps, covars = fa_covars, + deltas = fa_deltas, comparisons = fa_comparisons, alpha = fa_alpha), "The number of rows.*" ) diff --git a/tests/testthat/test_check_strictly_positive_vals.R b/tests/testthat/test_check_strictly_positive_vals.R index ae4f463..ab944f5 100644 --- a/tests/testthat/test_check_strictly_positive_vals.R +++ b/tests/testthat/test_check_strictly_positive_vals.R @@ -38,17 +38,19 @@ test_that("check_strictly_positive_vals() correctly throws errors for bad input" "Values less than" ) expect_error( - predict_delta_comps(dataf = f2, fa_y, fa_comps, fa_covars, fa_deltas, fa_comparisons, fa_alpha), + predict_delta_comps(dataf = f2, y = fa_y, comps = fa_comps, covars = fa_covars, + deltas = fa_deltas, comparisons = fa_comparisons, alpha = fa_alpha), "Values less than" ) expect_error( - check_strictly_positive_vals(dataf = f3, fa_comps), + check_strictly_positive_vals(dataf = f3, comps = fa_comps), "Values less than" ) expect_error( - predict_delta_comps(dataf = f3, fa_y, fa_comps, fa_covars, fa_deltas, fa_comparisons, fa_alpha), + predict_delta_comps(dataf = f3, y = fa_y, comps = fa_comps, covars = fa_covars, + deltas = fa_deltas, comparisons = fa_comparisons, alpha = fa_alpha), "Values less than" ) diff --git a/tests/testthat/test_rm_na_data_rows.R b/tests/testthat/test_rm_na_data_rows.R index 8f6141a..05f4002 100644 --- a/tests/testthat/test_rm_na_data_rows.R +++ b/tests/testthat/test_rm_na_data_rows.R @@ -36,7 +36,8 @@ test_that("rm_na_data_rows() correctly removes NAs", { f1[-c(5, 10, 6, 2), c(fa_y, fa_comps, fa_covars)] ) expect_error( - predict_delta_comps(dataf = f1, fa_y, fa_comps, fa_covars, fa_deltas, fa_comparisons, fa_alpha), + predict_delta_comps(dataf = f1, y = fa_y, comps = fa_comps, covars = fa_covars, + deltas = fa_deltas, comparisons = fa_comparisons, alpha = fa_alpha), "The number of rows in dataf" ) @@ -45,7 +46,8 @@ test_that("rm_na_data_rows() correctly removes NAs", { f2[0, c(fa_y, fa_comps, fa_covars)] ) expect_error( - predict_delta_comps(dataf = f2, fa_y, fa_comps, fa_covars, fa_deltas, fa_comparisons, fa_alpha), + predict_delta_comps(dataf = f2, y = fa_y, comps = fa_comps, covars = fa_covars, + deltas = fa_deltas, comparisons = fa_comparisons, alpha = fa_alpha), "dataf supplied must" )