diff --git a/DESCRIPTION b/DESCRIPTION index 6a046d8..ab49724 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: easybgm Type: Package Title: Extracting and Visualizing Bayesian Graphical Models -Version: 0.3.2 +Version: 0.4.0 Authors@R: c( person("Karoline", "Huth", , "k.huth@uva.nl", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-0662-1591")), @@ -10,11 +10,12 @@ Authors@R: c( person("Gali", "Geller", role = c("ctb")) ) Maintainer: Karoline Huth -Description: Fit and visualize the results of a Bayesian analysis of networks commonly found in psychology. - The package supports cross-sectional network models fitted using the packages 'BDgraph', 'bgms' and 'BGGM', - as well as network comparison tests fitted using the packages 'bgms' and 'BBGM'. - The package provides the parameter estimates, posterior inclusion probabilities, inclusion Bayes factor, and the - posterior density of the parameters. In addition, for 'BDgraph' and 'bgms' it allows to assess the posterior +Description: Fit and visualize the results of a Bayesian analysis of networks commonly found in psychology. + The package supports cross-sectional network models for ordinal, binary, continuous, and mixed data, + fitted using the packages 'bgms' (default), 'BDgraph', and 'BGGM', + as well as network comparison tests fitted using the packages 'bgms' and 'BGGM'. + The package provides the parameter estimates, posterior inclusion probabilities, inclusion Bayes factor, and the + posterior density of the parameters. In addition, for 'BDgraph' and 'bgms' it allows to assess the posterior structure space. Furthermore, the package comes with an extensive suite for visualizing results. License: GPL (>= 2) URL: https://github.com/KarolineHuth/easybgm @@ -28,7 +29,7 @@ Depends: Imports: BDgraph, BGGM, - bgms (>= 0.1.4), + bgms (>= 0.1.6.4), dplyr, ggplot2, HDInterval, diff --git a/R/easybgm.R b/R/easybgm.R index 95e8f83..5593207 100644 --- a/R/easybgm.R +++ b/R/easybgm.R @@ -1,133 +1,266 @@ #' @title Bayesian analysis of networks #' -#' @description Easy estimation of a Bayesian analysis of networks to obtain conditional (in)dependence relations between variables in a network. +#' @description Easy estimation of a Bayesian graphical models to obtain +#' conditional (in)dependence relations between variables in a network. #' #' @name easybgm #' -#' @param data An n x p matrix or dataframe containing the variables for n independent observations on p variables. -#' @param type What is the data type? Options: continuous, mixed, ordinal, binary, or blume-capel. -#' @param package The R-package that should be used for fitting the network model; supports BGGM, BDgraph, and bgms. Optional argument; -#' default values are specified depending on the datatype. -#' @param not_cont If data-type is mixed, a vector of length p, specifying the not-continuous -#' variables (1 = not continuous, 0 = continuous). -#' @param iter number of iterations for the sampler. Default is set to 1e3 but the recommended number of samples depends on the underlying package and sampler used. For data fit with BDgraph and BGGM, the default is set to 1e4. -#' @param save Logical. Should the posterior samples be obtained (default = FALSE)? -#' @param centrality Logical. Should the centrality measures be extracted (default = FALSE)? Note, that it will significantly increase the computation time. -#' @param progress Logical. Should a progress bar be shown (default = TRUE)? -#' @param posterior_method Determines how the posterior samples of the edge weight parameters are obtained for models fit with BDgraph. The argument can be either MAP for the maximum-a-posteriori or model-averaged. If MAP, samples are obtained for the edge weights only for the most likely structure. If model-averaged, samples are obtained for all plausible structures weighted by their posterior probability. Default is model-averaged. -#' @param ... Additional arguments that are handed to the fitting functions of the packages, e.g., informed prior specifications. -#' -#' @return The returned object of \code{easybgm} contains several elements: -#' -#' \itemize{ -#' -#' \item \code{parameters} A p x p matrix containing partial associations. -#' -#' \item \code{inc_probs} A p x p matrix containing the posterior inclusion probabilities. -#' -#' \item \code{BF} A p x p matrix containing the posterior inclusion Bayes factors. -#' -#' \item \code{structure} Adjacency matrix of the median probability model (i.e., edges with a posterior probability larger 0.5). -#' } +#' @param data An n x p matrix or dataframe containing the variables for n +#' independent observations on p variables. #' +#' @param type Specifies the type of data. There are two ways to use this argument: #' -#' @return In addition, for `BDgraph` and `bgms`, the function returns: -#' -#' \itemize{ -#' -#' \item \code{structure_probabilities} A vector containing the posterior probabilities of all visited structures, between 0 and 1. -#' -#' \item \code{graph_weights} A vector containing the number of times a particular structure was visited. -#' -#' \item \code{sample_graphs} A vector containing the indexes of a particular structure. -#' -#' For the \code{bgms} package, when \code{edge_prior = "Stochastic-Block"}, -#' the function will also return an object \code{sbm} which contains: +#' \strong{1. A single string} (applies the same type to all variables): #' \itemize{ +#' \item \code{"continuous"}: For continuous (Gaussian) data. Estimates a +#' Gaussian Graphical Model (GGM). +#' \item \code{"ordinal"}: For ordinal (Likert-type) data. Estimates an +#' Ordinal Markov Random Field (OMRF). +#' \item \code{"binary"}: For binary (0/1) data. Estimates an OMRF internally +#' (which reduces to the Ising model when all variables are binary). +#' \item \code{"blume-capel"}: For Blume-Capel ordinal data. Requires a +#' reference category via the \code{baseline_category} argument. +#' +#' \item \code{"mixed"}: For data with both continuous and discrete +#' variables. Requires the \code{not_cont} argument to indicate which +#' variables are not continuous (see below). Internally, this constructs a +#' per-variable type vector. This value should only be specified when +#' fitting a mixed model using \code{package = "BDgraph"} or \code{package = "BGGM"}. +#' See below on how to specify mixed models using bgms. +#' } #' -#' \item \code{posterior_num_blocks} A data frame with the estimated posterior -#' probability of the possible number of clusters. +#' \strong{2. A character vector of length p} (per-variable specification, +#' bgms only): Each element specifies the type of the corresponding column in +#' \code{data}. Valid values are \code{"ordinal"}, \code{"continuous"}, +#' \code{"blume-capel"}, and \code{"binary"} (treated as \code{"ordinal"} +#' internally). This provides full control over mixed-type data without needing +#' the \code{not_cont} argument. #' -#' \item \code{posterior_mean_allocations} The posterior mean of the cluster assignments of the nodes. +#' For example: \code{type = c("ordinal", "ordinal", "continuous")} specifies +#' that the first two columns are ordinal and the third is continuous. #' -#' \item \code{posterior_mode_allocations} The posterior mode of the cluster assignments of the nodes. +#' \emph{Note:} Per-variable type vectors are only supported with the +#' \code{bgms} package. Mixed models with other packages can be fit using +#' \code{type = "mixed"} and the \code{not_cont} argument (see above). #' -#' \item \code{posterior_mean_coclustering_matrix} A p x p matrix containing the estimated -#' pairwise proportions of cluster occurrence of every variable. This matrix -#' can be plotted to visually inspect the estimated number of clusters -#' and visually inspect nodes that tend to switch clusters. +#' @param package The R-package used for fitting the network model. Optional; +#' if not specified, \code{bgms} is used as the default for all data types. +#' Supported options: +#' \itemize{ +#' \item \code{"bgms"} (Default): Supports the following data types: continuous, +#' ordinal, binary, and blume-capel or a combination of the three. +#' Requires bgms >= 0.1.6.4. +#' \item \code{"BDgraph"}: Supports continuous (fits a GGM), mixed (fits a +#' GCGM). For continuous data, missing values are not +#' allowed; use \code{na.omit()} on the data first. Binary data is always +#' routed to bgms regardless of the package argument. +#' \item \code{"BGGM"}: Supports continuous and mixed data. Binary data is +#' always routed to bgms regardless of the package argument. +#' } #' +#' @param not_cont A binary vector of length p, required when +#' \code{type = "mixed"} and a single string is used. Each element indicates +#' whether the corresponding variable is not continuous +#' (\code{1} = not continuous/ordinal, \code{0} = continuous). This is used to +#' construct a per-variable type vector internally. Only neccesary when +#' using \code{package = "BDgraph"} or \code{package = "BGGM"}. +#' +#' For example, for 5 variables where the first two are ordinal and the rest +#' continuous: \code{not_cont = c(1, 1, 0, 0, 0)}. +#' +#' \emph{Note:} This argument is not needed when using a per-variable type +#' vector for the \code{type} argument when \code{package = "bgms"} (default). +#' +#' @param iter Number of iterations for the sampler. The default depends on the +#' package: +#' \itemize{ +#' \item \code{bgms}: 1e3 (1,000 iterations) +#' \item \code{BDgraph}: 1e4 (10,000 iterations) +#' \item \code{BGGM}: 1e4 (10,000 iterations) #' } +#' The recommended number of iterations depends on the data, model complexity, +#' and desired precision. Check the convergence diagnostics in the output to +#' determine if more iterations are needed. +#' +#' @param save Logical. Should the posterior samples be obtained +#' (default = \code{FALSE})? If \code{TRUE}, the output includes a +#' \code{samples_posterior} matrix with the posterior samples for each edge +#' weight parameter. Setting \code{centrality = TRUE} automatically sets +#' \code{save = TRUE}. +#' +#' @param centrality Logical. Should the centrality measures be extracted +#' (default = \code{FALSE})? Note that this will significantly increase +#' computation time. Automatically sets \code{save = TRUE}. +#' +#' @param progress Logical. Should a progress bar be shown +#' (default = \code{TRUE})? +#' +#' @param posterior_method Determines how the posterior samples of the edge +#' weight parameters are obtained for models fit with \code{BDgraph}. Can be +#' either \code{"MAP"} (maximum-a-posteriori) or \code{"model-averaged"} +#' (default). If \code{"MAP"}, samples are obtained for the edge weights only +#' for the most likely structure. If \code{"model-averaged"}, samples are +#' obtained for all plausible structures weighted by their posterior +#' probability. +#' +#' @param ... Additional arguments passed to the fitting functions of the +#' underlying packages (e.g., prior specifications). See the +#' \strong{Prior specification} section in Details for available options per +#' package. +#' +#' @return An object of class \code{easybgm} containing the following elements: +#' +#' \strong{Always returned:} +#' \itemize{ +#' \item \code{parameters}: A p x p matrix of posterior mean partial +#' association estimates. +#' \item \code{inc_probs}: A p x p matrix of posterior inclusion +#' probabilities. +#' \item \code{inc_BF}: A p x p matrix of posterior inclusion Bayes factors. +#' \item \code{structure}: A p x p adjacency matrix of the median probability +#' model (edges with posterior inclusion probability > 0.5). +#' \item \code{model}: A string indicating the model type (e.g., +#' \code{"continuous"}, \code{"ordinal"}, \code{"mixed"}). +#' \item \code{thresholds}: Threshold/intercept parameters (bgms only). The +#' format depends on the model type: a matrix for ordinal/binary models, +#' \code{NULL} for continuous models, or a list for mixed models. #' } #' -#' If using version 0.1.6.1 or higher of the \code{bgms} package, the function also returns the -#' the Gelman-Rubin convergence statistic for each edge weight parameter. As well as the -#' 95% Monte Carlo confidence interval for the inclusion Bayes factor. -#' -#' @return For all packages, when setting `save = TRUE` and `centrality = TRUE`, the function will return the following objects respectively: -#' +#' \strong{Returned for bgms and BDgraph:} #' \itemize{ +#' \item \code{structure_probabilities}: Posterior probabilities of all +#' visited graph structures (values between 0 and 1). +#' \item \code{graph_weights}: Number of times each graph structure was +#' visited. +#' \item \code{sample_graphs}: Identifiers for each visited graph structure. +#' } #' -#' \item \code{samples_posterior} A k x iter matrix containing the posterior samples for each parameter (i.e., k = (p/(p-1))/2) at each iteration (i.e., iter) of the sampler. -#' -#' \item \code{centrality} A p x iter matrix containing the centrality of a node at each iteration of the sampler. +#' \strong{Returned for bgms only:} +#' \itemize{ +#' \item \code{convergence_parameter}: The Gelman-Rubin (R-hat) convergence +#' statistic for each edge weight parameter. Values close to 1 indicate good +#' convergence. +#' \item \code{MCSE_BF}: A matrix with the 95 percent Monte Carlo confidence +#' interval for each inclusion Bayes factor. #' } #' -#' @details +#' \strong{Returned when edge_prior = "Stochastic-Block" (bgms only):} +#' \itemize{ +#' \item \code{sbm}: A list containing Stochastic Block Model results, including +#' \code{posterior_num_blocks} (posterior probabilities for each number of +#' clusters), \code{posterior_mean_allocations} (posterior mean cluster +#' assignments), \code{posterior_mode_allocations} (posterior mode cluster +#' assignments), and \code{posterior_mean_coclustering_matrix} (a p x p matrix +#' of pairwise co-clustering proportions). +#' } #' -#' Users may oftentimes wish to deviate from the default, usually uninformative, prior specifications of the -#' packages to informed priors. This can be done by simply adding additional arguments to the \code{easybgm} function. -#' Depending on the package that is running the underlying network estimation, researcher can specify different prior -#' arguments. We give an overview of the prior arguments per package below. +#' \strong{Interpretable parameter scales (bgms only):} #' -#' \strong{bgms}: +#' In addition to the raw pairwise interaction parameters in +#' \code{parameters}, the following transformations are provided when the +#' model type supports them. They are \code{NULL} otherwise. +#' \itemize{ +#' \item \code{partial_correlations}: A matrix of posterior mean partial +#' correlations. Available for continuous (GGM) models (full p x p matrix) +#' and for the continuous block of mixed models. \code{NULL} for ordinal +#' models. +#' \item \code{precision_matrix}: The posterior mean precision +#' (inverse covariance) matrix. Same availability as partial correlations. +#' \item \code{log_odds}: A matrix of posterior mean log adjacent-category +#' odds ratios. Available for ordinal/binary models (full p x p matrix) +#' and for the discrete block of mixed models. \code{NULL} for continuous +#' models. +#' } #' +#' \strong{Returned when save = TRUE:} #' \itemize{ +#' \item \code{samples_posterior}: A k x iter matrix of posterior samples for +#' each edge weight parameter (k = p*(p-1)/2 edges). +#' } #' -#' \item \code{interaction_scale} the scale of the Cauchy distribution that is used as a -#' prior for the pairwise interaction parameters. The default is 2.5. +#' \strong{Returned when centrality = TRUE:} +#' \itemize{ +#' \item \code{centrality}: An iter x p matrix of centrality values for each +#' node at each iteration. +#' } #' -#' \item \code{edge_prior} prior on the graph structure, which can be either "Bernoulli", "Beta-Bernoulli" or "Stochastic Block". The default is "Bernoulli". +#' @details #' -#' \item \code{inclusion_probability} prior edge inclusion probability for the "Bernoulli" distribution. The default is 0.5. +#' \strong{Data types and package support} #' -#' \item \code{beta_bernoulli_alpha} and \code{beta_bernoulli_beta} the parameters of the "Beta-Bernoulli" or "Stochastic Block" priors. The default is 1 for both. +#' The table below summarizes which data types are supported by each backend +#' package: #' -#' \item \code{beta_bernoulli_alpha_between} and \code{beta_bernoulli_beta_between} the parameters of the "Stochastic Block" prior for edges between blocks. -#' This is currently only available in a developer version of bgms and will be available in version 0.1.6.2 or higher. +#' \tabular{lccc}{ +#' \strong{Data type} \tab \strong{bgms} \tab \strong{BDgraph} \tab \strong{BGGM} \cr +#' continuous \tab Yes (default) \tab Yes \tab Yes \cr +#' ordinal \tab Yes (default) \tab Yes \tab No \cr +#' binary \tab Yes (always) \tab No \tab No \cr +#' mixed \tab Yes (default) \tab Yes \tab Yes \cr +#' blume-capel \tab Yes (default) \tab No \tab No \cr +#' } #' -#' \item \code{dirichlet_alpha} The shape of the Dirichlet prior on the node-to-block -#' allocation parameters for the Stochastic Block prior on the graph structure. +#' \strong{How \code{type} and \code{not_cont} relate} +#' +#' There are two ways to specify mixed-type data: +#' \enumerate{ +#' \item \strong{Using \code{type = "mixed"} with \code{not_cont}}: Set +#' \code{type = "mixed"} and provide a binary vector \code{not_cont} of +#' length p. Internally, this is translated to a per-variable type vector +#' where \code{not_cont == 1} maps to \code{"ordinal"} and +#' \code{not_cont == 0} maps to \code{"continuous"}. +#' \item \strong{Using a per-variable type vector} (bgms only): Directly +#' specify the type of each variable, e.g., +#' \code{type = c("ordinal", "continuous", "ordinal")}. This is more +#' flexible and does not require the \code{not_cont} argument. +#' } #' -#' \item \code{threshold_alpha} and \code{threshold_beta} the parameters of the beta-prime distribution for the threshold parameters. The defaults are both set to 1. +#' \strong{Prior specification} #' -#' \item \code{variable_type} What kind of variables are there in \code{x}? Can be a -#' single character string specifying the variable type of all \code{p} -#' variables at once or a vector of character strings of length \code{p} -#' specifying the type for each variable in \code{x} separately. Currently, bgm -#' supports ``ordinal'' and ``blume-capel''. Binary variables are automatically -#' treated as ``ordinal’’. Defaults to \code{variable_type = "ordinal"}. +#' Users may wish to deviate from the default (uninformative) prior +#' specifications. This can be done by passing additional arguments via +#' \code{...} to the fitting function of the chosen package. We give an +#' overview of the available prior arguments per package below. #' +#' \emph{bgms}: +#' \itemize{ +#' \item \code{pairwise_scale}: Scale of the Cauchy prior on the pairwise +#' interaction parameters. Default is 2.5. +#' \item \code{edge_prior}: Prior on the graph structure: \code{"Bernoulli"} +#' (default), \code{"Beta-Bernoulli"}, or \code{"Stochastic-Block"}. +#' \item \code{inclusion_probability}: Prior edge inclusion probability for +#' the \code{"Bernoulli"} prior. Default is 0.5. This can also +#' be a symmetric pxp matrix of edge-specific inclusion probabilities. +#' \item \code{beta_bernoulli_alpha} and \code{beta_bernoulli_beta}: (Within) Shape +#' parameters of the \code{"Beta-Bernoulli"} or \code{"Stochastic-Block"} +#' priors. Both default to 1. +#' \item \code{beta_bernoulli_alpha_between} and +#' \code{beta_bernoulli_beta_between}: Shape parameters of the +#' \code{"Stochastic-Block"} prior for between-block edges. +#' \item \code{dirichlet_alpha}: Shape of the Dirichlet prior on node-to-block +#' allocations for the \code{"Stochastic-Block"} prior. +#' \item \code{threshold_alpha} and \code{threshold_beta}: Parameters of the +#' beta-prime prior on threshold parameters. Both default to 1. #' } #' -#' \strong{BDgraph}: -#' +#' \emph{BDgraph}: #' \itemize{ -#' -#' \item \code{df.prior} prior on the parameters (i.e., inverse covariance matrix), degrees of freedom of the prior G-Wishart distribution. The default is set to 3. -#' -#' \item \code{g.prior} prior probability of edge inclusion. This can be either a scalar, if it is the same for all edges, or a matrix, if it should be different among the edges. The default is set to 0.5. -#' +#' \item \code{df.prior}: Degrees of freedom of the prior G-Wishart +#' distribution on the precision matrix. Default is 3. +#' \item \code{g.prior}: Prior probability of edge inclusion. Can be a +#' scalar (same for all edges) or a matrix (edge-specific). +#' This can also be a symmetric pxp matrix of edge-specific inclusion probabilities. +#' Default is 0.5. #' } -#' \strong{BGGM}: #' +#' \emph{BGGM}: #' \itemize{ -#' -#' \item \code{prior_sd} the standard deviation of the prior distribution of the interaction parameters, approximately the scale of a beta distribution. The default is 0.25. - +#' \item \code{prior_sd}: Standard deviation of the prior on interaction +#' parameters (approximately the scale of a beta distribution). Default is +#' 0.25. #' } #' -#' We would always encourage researcher to conduct prior robustness checks. +#' We encourage researchers to conduct prior sensitivity checks. #' #' @export #' @@ -138,27 +271,39 @@ #' #' @examples #' -#' #' library(easybgm) #' library(bgms) #' #' data <- na.omit(Wenchuan) #' -#' # Fitting the Wenchuan PTSD data -#' +#' # --- Continuous data (default: bgms) --- #' fit <- easybgm(data, type = "continuous", -#' iter = 100 # for demonstration only +#' iter = 100 # for demonstration only; increase for real analyses #' ) -#' #' summary(fit) #' #' \donttest{ -#' # To extract the posterior parameter distribution -#' # and centrality measures -#' -#' fit <- easybgm(data, type = "continuous", -#' iter = 100, # for demonstration only -#' centrality = TRUE, save = TRUE) +#' # --- Mixed data using per-variable type vector (bgms only) --- +#' dat3 <- data[, 1:3] +#' fit_vec <- easybgm(dat3, +#' type = c("ordinal", "ordinal", "continuous"), +#' iter = 100) +#' +#' # --- Extract posterior samples and centrality --- +#' fit_full <- easybgm(data, type = "continuous", +#' iter = 100, +#' centrality = TRUE, save = TRUE) +#' +#' # --- Using BDgraph for continuous data --- +#' fit_bd <- easybgm(data, type = "continuous", +#' package = "BDgraph", +#' iter = 100) +#' +#' # --- Using BGGM for continuous data --- +#' fit_bggm <- easybgm(data, type = "continuous", +#' package = "BGGM", +#' iter = 100) +#' #' } @@ -167,8 +312,38 @@ easybgm <- function(data, type, package = NULL, not_cont = NULL, iter = 1e3, sav centrality = FALSE, progress = TRUE, posterior_method = "model-averaged", ...){ + # --- Handle vector type (per-variable specification) --- + # When type is a vector of length > 1 (e.g., c("ordinal", "ordinal", "continuous")), + # it specifies the variable type for each column. This is only supported with bgms. + is_vector_type <- length(type) > 1 + + if(is_vector_type) { + valid_types <- c("ordinal", "continuous", "blume-capel", "binary") + invalid <- type[!type %in% valid_types] + if(length(invalid) > 0) { + warning("The following variable type(s) are not recognized: ", + paste0("'", unique(invalid), "'", collapse = ", "), ". ", + "Valid types are: ", paste(valid_types, collapse = ", "), ". ", + "Please check for typos.", + call. = FALSE) + stop("Invalid variable types detected. See the warning message for more details.", + call. = FALSE) + } + if(length(type) != ncol(data)) { + stop("When 'type' is a vector, its length (", length(type), ") must equal ", + "the number of columns in 'data' (", ncol(data), ").", + call. = FALSE) + } + if(!is.null(package) && package != "bgms") { + stop("A per-variable 'type' vector is only supported with package = 'bgms'.", + call. = FALSE) + } + # Map "binary" to "ordinal" internally + type[type == "binary"] <- "ordinal" + package <- "package_bgms" + } - if(type == "mixed" & is.null(not_cont)){ + if(!is_vector_type && length(type) == 1 && type == "mixed" && is.null(not_cont)){ stop("Please provide a binary vector of length p specifying the not continuous variables (1 = not continuous, 0 = continuous).", call. = FALSE) @@ -178,8 +353,8 @@ easybgm <- function(data, type, package = NULL, not_cont = NULL, iter = 1e3, sav has_reference <- "reference_category" %in% names(dots) has_baseline <- "baseline_category" %in% names(dots) - # Example: If type == "blume-capel", then at least one must be present - if (type == "blume-capel" && !(has_reference || has_baseline)) { + # If type contains "blume-capel", a reference category must be present + if (any(type == "blume-capel") && !(has_reference || has_baseline)) { stop("For the Blume-Capel model, a reference category needs to be specified. If type is 'blume-capel' it specifies the reference category in the Blume-Capel model. Should be an integer within the range of integer scores observed for the @@ -198,18 +373,18 @@ easybgm <- function(data, type, package = NULL, not_cont = NULL, iter = 1e3, sav # Set default values for fitting if package is unspecified if(is.null(package)){ - if(type == "continuous") package <- "package_bggm" - if(type == "mixed") package <- "package_bggm" - if(type == "ordinal") package <- "package_bgms" - if(type == "binary") package <- "package_bgms" - if(type == "blume-capel") package <- "package_bgms" - } else { + if(length(type) == 1 && type == "continuous") package <- "package_bgms" + if(length(type) == 1 && type == "mixed") package <- "package_bgms" + if(length(type) == 1 && type == "ordinal") package <- "package_bgms" + if(length(type) == 1 && type == "binary") package <- "package_bgms" + if(length(type) == 1 && type == "blume-capel") package <- "package_bgms" + } else if(!is_vector_type) { if(package == "BDgraph") package <- "package_bdgraph" if(package == "BGGM") package <- "package_bggm" if(package == "bgms") package <- "package_bgms" if(type == "binary") package <- "package_bgms" } - + # change the default number of iterations depending on the underlying package if(iter == 1e3 && package == "package_bdgraph"){ iter <- 1e4 @@ -217,20 +392,14 @@ easybgm <- function(data, type, package = NULL, not_cont = NULL, iter = 1e3, sav iter <- 1e4 } - if(type =="continuous" & package == "package_bdgraph" & any(is.na(data))){ - warning("The data contains missing values which cannot be handled as continuous data by BDgraph. - Note that we switched the type to \"mixed\", which estimates a GCGM and can impute missing data.") - type <- "mixed" - not_cont <- rep(0, ncol(data)) + if(length(type) == 1 && type == "continuous" && package == "package_bdgraph" && any(is.na(data))){ + stop("The data contains missing values which cannot be handled as continuous data by BDgraph (GGM). ", + "Please either:\n", + " 1) Remove missing values first (e.g., data <- na.omit(data)), or\n", + " 2) Set type = 'mixed' to estimate a GCGM, which can handle missing data.", + call. = FALSE) } - if((package == "package_bgms") & (type %in% c("continuous", "mixed"))){ - warning("bgms can only fit ordinal, binary, or blume-capel datatypes. For continuous or mixed data, - choose either the BDgraph or BGGM package. By default we have changed the package to BDgraph", - call. = FALSE) - package <- "package_bdgraph" - - } fit <- list() diff --git a/R/easybgm_compare.R b/R/easybgm_compare.R index e541108..e12fad5 100644 --- a/R/easybgm_compare.R +++ b/R/easybgm_compare.R @@ -1,103 +1,181 @@ #' @title Compare networks across groups using Bayesian inference #' -#' @description Easy comparison of networks using Bayesian inference to extract differences in conditional (in)dependence across groups. -#' +#' @description Easy comparison of networks using Bayesian inference to extract +#' differences in conditional (in)dependence relations across groups. +#' #' @name easybgm_compare #' -#' @param data A list with two n x p matrices or dataframes containing the variables for n independent observations on -#' p variables for two groups. Note that the variables need to be the same in the two different dataframes. Alternatively, -#' when "bgms" version > 0.1.6 is installed, 'data' can also be a matrix of binary and ordinal responses from all groups. -#' If this is the case, the 'group_indicator' argument also needs to be specified. -#' @param type What is the data type? Options: continuous, mixed, ordinal, binary, or blume-capel. -#' @param package The R-package that should be used for fitting the network model; supports BGGM and bgms. Optional argument; -#' default values are specified depending on the datatype. -#' @param not_cont If data-type is mixed, a vector of length p, specifying the not-continuous -#' variables (1 = not continuous, 0 = continuous). -#' @param group_indicator Optional integer vector of group memberships for the rows of the dataframe (multi-group comparison), -#' when data is a matrix instead of a list of two dataframes. -#' @param iter number of iterations for the sampler. Default is 1e3 for data fit with bgms and to 1e4 for data with with BGGM. -#' @param save Logical. Should the posterior samples be obtained (default = TRUE)? -#' @param progress Logical. Should a progress bar be shown (default = TRUE)? -#' @param ... Additional arguments that are handed to the fitting functions of the packages, e.g., informed prior specifications. -#' -#' -#' @return The returned object of \code{easybgm} contains several elements: +#' @param data The data can be provided in two formats: #' -#' \itemize{ +#' \strong{1. A list of two dataframes} (two-group comparison): Each element +#' is an n x p matrix or dataframe for one group. The variables (columns) must +#' be the same across both dataframes. This format is supported by both +#' \code{bgms} and \code{BGGM}. #' -#' \item \code{parameters} A p x p matrix containing difference across partial associations. +#' \strong{2. A single matrix or dataframe} (multi-group comparison): An n x p +#' matrix containing responses from all groups combined. Requires the +#' \code{group_indicator} argument to specify which rows belong to which +#' group. This format supports two or more groups and is only available with +#' the \code{bgms} package. #' -#' \item \code{inc_probs} A p x p matrix containing the posterior inclusion probabilities of subgroup differences. +#' @param type Specifies the data type. Currently supported types for group +#' comparison: +#' \itemize{ +#' \item \code{"ordinal"}: For ordinal (Likert-type) data. Default package: +#' bgms. +#' \item \code{"binary"}: For binary (0/1) data. Always uses bgms. +#' \item \code{"blume-capel"}: For Blume-Capel ordinal data. Requires +#' \code{baseline_category}. Always uses bgms. +#' \item \code{"continuous"}: Only supported with +#' \code{package = "BGGM"} (must be set explicitly). Not yet supported via +#' bgms. +#' \item \code{"mixed"}: Only supported with +#' \code{package = "BGGM"} (must be set explicitly). Requires +#' \code{not_cont}. Not yet supported via bgms for group comparison. +#' } #' -#' \item \code{inc_BF} A p x p matrix containing the posterior inclusion Bayes factors of subgroup differences. +#' @param package The R-package used for fitting the comparison model. Optional; +#' if not specified, \code{bgms} is used as the default for ordinal, binary, +#' and blume-capel data. Supported options: +#' \itemize{ +#' \item \code{"bgms"} (Default): Supports ordinal, binary, and blume-capel +#' data. Supports both two-group (list input) and multi-group (single +#' dataframe + \code{group_indicator}) comparisons. +#' \item \code{"BGGM"}: Supports continuous and mixed data. Only supports +#' two-group comparison (list input). Binary data is always routed to +#' bgms regardless. +#' } #' -#' \item \code{structure} Adjacency matrix of the median probability model (i.e., edges with a posterior probability larger 0.5). -#' } +#' @param not_cont A binary vector of length p, required when +#' \code{type = "mixed"}. Each element indicates whether the corresponding +#' variable is not continuous (\code{1} = not continuous/ordinal, +#' \code{0} = continuous). #' +#' @param group_indicator An integer vector of length n specifying group +#' membership for each row in \code{data}. Required when \code{data} is a +#' single matrix/dataframe (multi-group comparison). Supports two or more +#' groups (e.g., \code{rep(c(1, 2, 3), each = 50)} for three groups of 50 +#' observations each). Only available with the \code{bgms} package. #' -#' @return In addition, for `bgms`, the function returns: +#' @param iter Number of iterations for the sampler. The default depends on the +#' package: +#' \itemize{ +#' \item \code{bgms}: 1e4 (10,000 iterations) +#' \item \code{BGGM}: 1e4 (10,000 iterations) +#' } +#' The recommended number of iterations depends on the data and model +#' complexity. Check convergence diagnostics in the output. #' -#' \itemize{ +#' @param save Logical. Should the posterior samples be obtained +#' (default = \code{TRUE})? If \code{TRUE}, the output includes a +#' \code{samples_posterior} matrix with posterior samples for each difference +#' parameter. #' -#' \item \code{structure_probabilities} A vector containing the posterior probabilities of all visited structures, between 0 and 1. +#' @param progress Logical. Should a progress bar be shown +#' (default = \code{TRUE})? #' -#' \item \code{graph_weights} A vector containing the number of times a particular structure was visited. +#' @param ... Additional arguments passed to the fitting functions of the +#' underlying packages (e.g., prior specifications). Consult the documentation +#' of \code{bgms} and \code{BGGM} for the specific options available. #' -#' \item \code{sample_graph} A vector containing the indexes of a particular structure. -#' -#' \item \code{convergence_parameter} A vector containing the R-hat (Gelman–Rubin) statistic for the difference parameter measuring how well MCMC chains have converged to the same target distribution. -#' } +#' @return An object of class \code{easybgm_compare} with the following +#' elements: #' -#' @return For both packages, when setting `save = TRUE`, the function will also return the following object: +#' \strong{Always returned:} +#' \itemize{ +#' \item \code{parameters}: A p x p matrix of posterior mean differences in +#' partial associations across groups. +#' \item \code{inc_probs}: A p x p matrix of posterior inclusion probabilities +#' for group differences (i.e., the probability that an edge differs between +#' groups). +#' \item \code{inc_BF}: A p x p matrix of inclusion Bayes factors for group +#' differences. +#' \item \code{structure}: A p x p adjacency matrix of the median probability +#' model for differences (edges with posterior inclusion probability > 0.5). +#' \item \code{model}: A string indicating the data type used. +#' } #' +#' \strong{Returned for bgms only:} #' \itemize{ +#' \item \code{structure_probabilities}: Posterior probabilities of all +#' visited graph structures. +#' \item \code{graph_weights}: Number of times each structure was visited. +#' \item \code{sample_graph}: Identifiers for each visited structure. +#' \item \code{convergence_parameter}: The Gelman-Rubin (R-hat) convergence +#' statistic for each difference parameter. Values close to 1 indicate +#' good convergence. +#' } #' -#' \item \code{samples_posterior} A k x iter matrix containing the posterior samples of parameter differences (i.e., k = (p/(p-1))/2) at each iteration (i.e., iter) of the sampler. - +#' \strong{Returned when save = TRUE:} +#' \itemize{ +#' \item \code{samples_posterior}: A k x iter matrix of posterior samples for +#' each difference parameter (k = p*(p-1)/2 edges). #' } #' #' @details -#' -#' Users may oftentimes wish to deviate from the default, usually uninformative, prior specifications of the -#' packages to informed priors. This can be done by simply adding additional arguments to the \code{easybgm} function. -#' Depending on the package that is running the underlying network estimation, researcher can specify different prior -#' arguments. Please consult the original packages "bgms" and "BGGM" for the specific informed prior options. #' -#' We always encourage researcher to conduct prior robustness checks. +#' \strong{Data types and package support for group comparison} +#' +#' \tabular{lcc}{ +#' \strong{Data type} \tab \strong{bgms} \tab \strong{BGGM} \cr +#' ordinal \tab Yes (default) \tab No \cr +#' binary \tab Yes (always) \tab No \cr +#' blume-capel \tab Yes (default) \tab No \cr +#' continuous \tab No \tab Yes (explicit)\cr +#' mixed \tab No \tab Yes (explicit)\cr +#' } +#' +#' For continuous or mixed data comparison, you must explicitly set +#' \code{package = "BGGM"}. If no package is specified for these types, the +#' function will return an informative error. +#' +#' \strong{Two-group vs. multi-group comparison} +#' +#' \itemize{ +#' \item \strong{Two-group}: Provide \code{data} as a list of two +#' dataframes. Supported by both \code{bgms} and \code{BGGM}. +#' \item \strong{Multi-group}: Provide \code{data} as a single +#' matrix/dataframe and specify \code{group_indicator}. Supports two or more +#' groups. Only available with \code{bgms}. +#' } +#' +#' \strong{Prior specification} +#' +#' Users may wish to adjust priors via the \code{...} argument. Consult the +#' documentation of \code{\link[bgms]{bgm}} and \code{\link[BGGM]{explore}} for +#' specific prior options. #' +#' We always encourage researchers to conduct prior sensitivity checks. #' #' @export #' @import bgms #' @importFrom BGGM explore select #' @importFrom utils packageVersion -#' +#' #' @examples -#' +#' #' \donttest{ #' library(easybgm) #' library(bgms) #' #' data <- na.omit(ADHD) #' +#' # --- Two-group comparison (list input) --- #' group1 <- data[1:10, 1:3] #' group2 <- data[11:20, 1:3] #' -#' # Fitting the Wenchuan PTSD data -#' -#' fit <- easybgm_compare(list(group1, group2), +#' fit <- easybgm_compare(list(group1, group2), #' type = "binary", save = TRUE, -#' iter = 50 # for demonstration only; more samples required, check the defaults for each sampler +#' iter = 50 # for demonstration only #' ) -#' #' summary(fit) -#' -#' # For multigroup estimation -#' fit_multi <- easybgm_compare(data[1:200, 1:5], +#' +#' # --- Multi-group comparison (single dataframe + group_indicator) --- +#' fit_multi <- easybgm_compare(data[1:200, 1:5], #' group_indicator = rep(c(1, 2, 3, 4), each = 50), -#' type = "binary", save = TRUE, -#' iter = 100 # for demonstration only; more samples required, check the defaults for each sampler +#' type = "binary", save = TRUE, +#' iter = 100 # for demonstration only #' ) -#' #' summary(fit_multi) #' } @@ -116,17 +194,22 @@ easybgm_compare <- function(data, stop("Your data can't be read. There are two options of providing your data: 1) Provide two datasets in a list containing only the two datasets, or for ordinal data with the bgms pacakge > 0.1.6. 2) provide the data as a matrix or data.frame together with specifying the 'group_indicator' argument, which then also allows for multi-group comparison.", call. = FALSE) } + if(type %in% c("continuous", "mixed") && (is.null(package) || package == "bgms")){ + stop("Group comparison via bgms currently only supports ordinal and binary data types. + For continuous or mixed data comparison, explicitly set package = 'BGGM'.", + call. = FALSE) + } if(type == "mixed" & is.null(not_cont)){ stop("Please provide a binary vector of length p specifying the not continuous variables (1 = not continuous, 0 = continuous).", call. = FALSE) } - - + + dots <- list(...) has_reference <- "reference_category" %in% names(dots) has_baseline <- "baseline_category" %in% names(dots) - + # Example: If type == "blume-capel", then at least one must be present if (type == "blume-capel" && !(has_reference || has_baseline)) { stop("For the Blume-Capel model, a reference category needs to be specified. @@ -146,8 +229,6 @@ easybgm_compare <- function(data, # Set default values for fitting if package is unspecified if(is.null(package)){ - if(type == "continuous") package <- "package_bggm_compare" - if(type == "mixed") package <- "package_bggm_compare" if(type == "ordinal") package <- "package_bgms_compare" if(type == "binary") package <- "package_bgms_compare" if(type == "blume-capel") package <- "package_bgms_compare" @@ -156,20 +237,12 @@ easybgm_compare <- function(data, if(package == "bgms") package <- "package_bgms_compare" if(type == "binary") package <- "package_bgms_compare" } - + # change default number of iterations for BGGM fits if (iter == 1e3 && package == "package_bgms_compare"){ iter <- 1e4 } - if((package == "package_bgms_compare") & (type %in% c("continuous", "mixed"))){ - warning("bgms can only fit ordinal or binary datatypes. For continuous or mixed data, - choose the BGGM package. By default we have changed the package to BGGM", - call. = FALSE) - package <- "package_bggm_compare" - - } - if(is.data.frame(data) && package == "package_bggm_compare"){ stop("Your data can't be read. For continuous data fit with BGGM, you can only provide two datasets in a list.", call. = FALSE) diff --git a/R/functions.bgms.R b/R/functions.bgms.R index d844d98..82ba978 100644 --- a/R/functions.bgms.R +++ b/R/functions.bgms.R @@ -4,30 +4,33 @@ #' @export bgm_fit.package_bgms <- function(fit, type, data, iter, save, not_cont, centrality, progress, ...){ - - - if(type == "binary") { - type <- "ordinal" + + # Store original easybgm type before mapping + original_type <- type + + # Map easybgm type to bgms variable_type + if(length(type) > 1) { + # Vector type: per-variable specification, map binary -> ordinal, pass to bgms + variable_type <- type + variable_type[variable_type == "binary"] <- "ordinal" + # Store a simplified label for display + original_type <- if(length(unique(type)) == 1) unique(type) else "mixed" + } else if(type == "binary") { + variable_type <- "ordinal" + } else if(type == "mixed") { + variable_type <- ifelse(not_cont == 1, "ordinal", "continuous") + } else { + variable_type <- type } - - if(packageVersion("bgms") > "0.1.4.2"){ - bgms_fit <- do.call( - bgm, c(list(x = data, iter = iter, - variable_type = type, - display_progress = progress, - ...)) - )} - if(packageVersion("bgms") < "0.1.6"){ - bgms_fit <- do.call( - bgm, c(list(x = data, iter = iter, save = T, - variable_type = type, - display_progress = progress, - ...)) - )} - - - - fit$model <- type + + bgms_fit <- do.call( + bgm, c(list(x = data, iter = iter, + variable_type = variable_type, + display_progress = progress, + ...)) + ) + + fit$model <- original_type fit$packagefit <- bgms_fit if(is.null(colnames(data))){ fit$var_names <- paste0("V", 1:ncol(data)) @@ -44,20 +47,24 @@ bgm_fit.package_bgms <- function(fit, type, data, iter, save, #' @export bgm_extract.package_bgms <- function(fit, type, save, iter, not_cont, data, centrality, ...){ - if (packageVersion("bgms") < "0.1.4") { - stop("easybgm now requires bgms version 0.1.4 or higher.") - } # --- Ensure proper bgms object and variable names --- + # Determine display-friendly model label + if(length(type) > 1) { + model_label <- if(length(unique(type)) == 1) unique(type) else "mixed" + } else { + model_label <- type + } + if (!inherits(fit, "bgms")) { varnames <- fit$var_names fit <- fit$packagefit class(fit) <- "bgms" } else { - varnames <- fit$arguments$data_columnnames + args_tmp <- extract_arguments(fit) + varnames <- args_tmp$data_columnnames if (is.null(varnames)) { - varnames <- paste0("V", 1:fit$arguments$no_variables)} - } - if(packageVersion("bgms") > "0.1.4.2"){ + varnames <- paste0("V", 1:args_tmp$num_variables) + } class(fit) <- "bgms" } @@ -73,7 +80,7 @@ bgm_extract.package_bgms <- function(fit, type, save, iter, args$edge_selection <- FALSE } } - if (args$edge_prior[1] == "Stochastic-Block" && packageVersion("bgms") > "0.1.6") { + if (args$edge_prior[1] == "Stochastic-Block") { bgms_res$sbm <- extract_sbm(fit) } # extract the prior inclusion probabilities @@ -133,11 +140,11 @@ bgm_extract.package_bgms <- function(fit, type, save, iter, } # --- Main extraction --- if (args$save) { - p <- args$no_variables + p <- args$num_variables pars <- extract_pairwise_interactions(fit) bgms_res$parameters <- vector2matrix(colMeans(pars), p = p) bgms_res$samples_posterior <- extract_pairwise_interactions(fit) - bgms_res$thresholds <- extract_category_thresholds(fit) + bgms_res$thresholds <- extract_main_effects(fit) colnames(bgms_res$parameters) <- varnames bgms_res$structure <- matrix(1, ncol = p, nrow = p) if (args$edge_selection) { @@ -150,7 +157,7 @@ bgm_extract.package_bgms <- function(fit, type, save, iter, (args$beta_bernoulli_alpha / args$beta_bernoulli_beta) } else if (args$edge_prior[1] == "Stochastic-Block") { # when there is only one set of hyperparameters for the beta bernoulli - if (is.null(args$beta_bernoulli_alpha_between) | packageVersion("bgms") < "0.1.6") { + if (is.null(args$beta_bernoulli_alpha_between)) { bgms_res$inc_BF <- (bgms_res$inc_probs / (1 - bgms_res$inc_probs)) / (args$beta_bernoulli_alpha / args$beta_bernoulli_beta) } else { @@ -181,10 +188,10 @@ bgm_extract.package_bgms <- function(fit, type, save, iter, bgms_res$sample_graph <- as.character(table_structures[, 1]) } } else { - p <- args$no_variables + p <- args$num_variables pars <- extract_pairwise_interactions(fit) bgms_res$parameters <- vector2matrix(colMeans(pars), p = p) - bgms_res$thresholds <- extract_category_thresholds(fit) + bgms_res$thresholds <- extract_main_effects(fit) colnames(bgms_res$parameters) <- varnames bgms_res$structure <- matrix(1, ncol = ncol(bgms_res$parameters), nrow = nrow(bgms_res$parameters)) @@ -234,20 +241,30 @@ bgm_extract.package_bgms <- function(fit, type, save, iter, bgms_res$centrality <- centrality(bgms_res) } - # --- For newer version compute convergence --- - if (packageVersion("bgms") > "0.1.4.2" && args$edge_selection == T) { - + # --- Compute convergence diagnostics --- + if (args$edge_selection == TRUE) { # extract the Rhat - bgms_res$convergence_parameter <- fit$posterior_summary_pairwise$Rhat + bgms_res$convergence_parameter <- extract_rhat(fit)$pairwise # calculate MC uncertainty - bgms_res$MCSE_BF <-BF_MCSE(gamma_mat = extract_indicators(fit), - BF_vec = bgms_res$inc_BF[lower.tri(bgms_res$inc_BF)], - ess = fit$posterior_summary_indicator$n_eff, - return = "ci", - smooth_bf = FALSE) + bgms_res$MCSE_BF <- BF_MCSE(gamma_mat = extract_indicators(fit), + BF_vec = bgms_res$inc_BF[lower.tri(bgms_res$inc_BF)], + ess = extract_ess(fit)$indicator, + return = "ci", + smooth_bf = FALSE) } + # --- Extract interpretable parameter scales --- + # These return NULL when the model type doesn't support them. + # tryCatch guards against edge cases (e.g., mixed models with only + # one variable of a given type, where the sub-matrix is a scalar). + bgms_res$partial_correlations <- tryCatch( + extract_partial_correlations(fit), error = function(e) NULL) + bgms_res$precision_matrix <- tryCatch( + extract_precision(fit), error = function(e) NULL) + bgms_res$log_odds <- tryCatch( + extract_log_odds(fit), error = function(e) NULL) + # --- Finalize output --- - bgms_res$model <- type + bgms_res$model <- model_label bgms_res$fit_arguments <- args output <- bgms_res class(output) <- c("package_bgms", "easybgm") diff --git a/R/functions.bgmscompare.R b/R/functions.bgmscompare.R index c17fbd1..c4a9ac4 100644 --- a/R/functions.bgmscompare.R +++ b/R/functions.bgmscompare.R @@ -4,26 +4,22 @@ #' @export bgm_fit.package_bgms_compare <- function(fit, type, data, group_indicator, iter, save, not_cont, progress, ...){ - if(packageVersion("bgms") < "0.1.4"){ - stop("The package requires at least 'bgms' version of 0.1.4.", - call. = FALSE) - } - + if(type == "binary") { type <- "ordinal" } if(!is.data.frame(data)){ group_indicator <- NULL } - - if(packageVersion("bgms") < "0.1.6.0"){ + + if(is.null(group_indicator)){ bgms_fit <- do.call( - bgmCompare, c(list(x = data[[1]], y = data[[2]], iter = iter, save = TRUE, - variable_type = type, - display_progress = progress, + bgmCompare, c(list(x = data[[1]], y = data[[2]], iter = iter, + variable_type = type, + display_progress = progress, ...)) ) - + fit$model <- type fit$packagefit <- bgms_fit if(is.null(colnames(data[[1]]))){ @@ -31,45 +27,23 @@ bgm_fit.package_bgms_compare <- function(fit, type, data, group_indicator, iter, } else { fit$var_names <- colnames(data[[1]]) } - } - if(packageVersion("bgms") > "0.1.4.2"){ - if(is.null(group_indicator)){ - bgms_fit <- do.call( - bgmCompare, c(list(x = data[[1]], y = data[[2]], iter = iter, - variable_type = type, - display_progress = progress, - ...)) - ) - - fit$model <- type - fit$packagefit <- bgms_fit - if(is.null(colnames(data[[1]]))){ - fit$var_names <- paste0("V", 1:ncol(data[[1]])) - } else { - fit$var_names <- colnames(data[[1]]) - } - } - if(!is.null(group_indicator)){ - bgms_fit <- do.call( - bgmCompare, c(list(x = data, group_indicator = group_indicator, - iter = iter, - variable_type = type, - display_progress = progress, - ...)) - ) - - fit$model <- type - fit$packagefit <- bgms_fit - if(is.null(colnames(data))){ - fit$var_names <- paste0("V", 1:ncol(data)) - } else { - fit$var_names <- colnames(data) - } + } else { + bgms_fit <- do.call( + bgmCompare, c(list(x = data, group_indicator = group_indicator, + iter = iter, + variable_type = type, + display_progress = progress, + ...)) + ) + + fit$model <- type + fit$packagefit <- bgms_fit + if(is.null(colnames(data))){ + fit$var_names <- paste0("V", 1:ncol(data)) + } else { + fit$var_names <- colnames(data) } - } - - class(fit) <- c("package_bgms_compare", "easybgm") return(fit) @@ -90,183 +64,134 @@ bgm_extract.package_bgms_compare <- function(fit, type, save, group_indicator, } else if (any(class(fit) == "bgmCompare")){ varnames <- extract_arguments(fit)$data_columnnames if(is.null(varnames)){ - varnames <- paste0("V", 1:extract_arguments(fit)$no_variables) + varnames <- paste0("V", 1:extract_arguments(fit)$num_variables) } } - + ######-------------------- - ## Old package two group estimation + ## Two group estimation ######-------------------- - - if(packageVersion("bgms") < "0.1.6.0"){ - args <- fit$arguments - - if (args$pairwise_difference_prior[1] == "Bernoulli") { - edge.prior <- args$inclusion_probability_difference + if(is.null(group_indicator)){ + class(fit) <- c("bgmCompare") + + args <- extract_arguments(fit) + args$save <- TRUE + dots <- list(...) + if (args$difference_prior[1] == "Bernoulli") { + if("difference_probability" %in% dots){ + edge.prior <- args$difference_probability + args$inclusion_probability_difference <- edge.prior + } else { + edge.prior <- 0.5 + args$inclusion_probability_difference <- edge.prior + } } else { # if BB or SBM edge.prior <- args$beta_bernoulli_alpha / (args$beta_bernoulli_alpha + args$beta_bernoulli_beta) - + # otherwise it saves the wrong values (could be done more elegantly) args$inclusion_probability_difference <- edge.prior } - - + bgms_res <- list() - - - p <- args$no_variables - pars <- fit$pairwise_difference - bgms_res$parameters <- vector2matrix(colMeans(pars), p = p) + + p <- args$num_variables + # TODO: replace with extractor when bgms adds one for difference means + pars <- fit$posterior_summary_pairwise_differences$mean + bgms_res$parameters <- vector2matrix(pars, p = p) colnames(bgms_res$parameters) <- varnames - bgms_res$structure <- matrix(1, ncol = ncol(bgms_res$parameters), + bgms_res$structure <- matrix(1, ncol = ncol(bgms_res$parameters), nrow = nrow(bgms_res$parameters)) - bgms_res$inc_probs <- vector2matrix(colMeans(fit$pairwise_difference_indicator), p = p) + indicators <- extract_indicators(fit) + bgms_res$inc_probs <- vector2matrix(colMeans(indicators[, grep("\\(pairwise\\)", colnames(indicators))]), p = p) bgms_res$inc_BF <- (bgms_res$inc_probs/(1-bgms_res$inc_probs))/(edge.prior /(1 - edge.prior)) bgms_res$structure <- 1*(bgms_res$inc_probs > 0.5) - - bgms_res$parameters_g1 <- vector2matrix(colMeans(fit$interactions), p = p) + bgms_res$parameters/2 - bgms_res$parameters_g2 <- vector2matrix(colMeans(fit$interactions), p = p) - bgms_res$parameters/2 - - structures <- apply(fit$pairwise_difference_indicator, 1, paste0, collapse="") + + #Obtain structure information + bgms_res$group_estimates <- extract_group_params(fit)$pairwise_effects_groups + bgms_res$parameters_g1 <- vector2matrix(extract_group_params(fit)$pairwise_effects_groups[, 1], p = p) + bgms_res$parameters_g2 <- vector2matrix(extract_group_params(fit)$pairwise_effects_groups[, 2], p = p) + + structures <- apply(extract_indicators(fit), 1, paste0, collapse="") table_structures <- as.data.frame(table(structures)) - bgms_res$structure_probabilities <- table_structures[,2]/nrow(fit$pairwise_difference_indicator) + bgms_res$structure_probabilities <- table_structures[,2]/nrow(extract_indicators(fit)) bgms_res$graph_weights <- table_structures[,2] bgms_res$sample_graph <- as.character(table_structures[, 1]) - bgms_res$samples_posterior <- fit$pairwise_difference + bgms_res$samples_posterior <- extract_pairwise_interactions(fit) + + bgms_res$convergence_parameter <- extract_rhat(fit)$pairwise_differences } - + ######-------------------- - ## Newer package + ## Multi-group estimation ######-------------------- - if(packageVersion("bgms") > "0.1.4.2"){ - ######-------------------- - ## Two group estimation - ######-------------------- - if(is.null(group_indicator)){ - class(fit) <- c("bgmCompare") - - args <- extract_arguments(fit) - args$save <- TRUE - dots <- list(...) - if (args$difference_prior[1] == "Bernoulli") { - if("difference_probability" %in% dots){ - edge.prior <- args$difference_probability - args$inclusion_probability_difference <- edge.prior - } else { - edge.prior <- 0.5 - args$inclusion_probability_difference <- edge.prior - } - } else { # if BB or SBM - edge.prior <- args$beta_bernoulli_alpha / - (args$beta_bernoulli_alpha + args$beta_bernoulli_beta) - - # otherwise it saves the wrong values (could be done more elegantly) + if(!is.null(group_indicator)){ + class(fit) <- c("bgmCompare") + + args <- extract_arguments(fit) + args$save <- TRUE + dots <- list(...) + if (args$difference_prior[1] == "Bernoulli") { + if("difference_probability" %in% dots){ + edge.prior <- args$difference_probability args$inclusion_probability_difference <- edge.prior - } - - bgms_res <- list() - - - p <- args$num_variables - pars <- fit$posterior_summary_pairwise_differences$mean - bgms_res$parameters <- vector2matrix(pars, p = p) - colnames(bgms_res$parameters) <- varnames - bgms_res$structure <- matrix(1, ncol = ncol(bgms_res$parameters), - nrow = nrow(bgms_res$parameters)) - indicators <- extract_indicators(fit) - bgms_res$inc_probs <- vector2matrix(colMeans(indicators[, grep("\\(pairwise\\)", colnames(indicators))]), p = p) - bgms_res$inc_BF <- (bgms_res$inc_probs/(1-bgms_res$inc_probs))/(edge.prior /(1 - edge.prior)) - bgms_res$structure <- 1*(bgms_res$inc_probs > 0.5) - - #Obtain structure information - bgms_res$group_estimates <- extract_group_params(fit)$pairwise_effects_groups - bgms_res$parameters_g1 <- vector2matrix(extract_group_params(fit)$pairwise_effects_groups[, 1], p = p) - bgms_res$parameters_g2 <- vector2matrix(extract_group_params(fit)$pairwise_effects_groups[, 2], p = p) - - structures <- apply(extract_indicators(fit), 1, paste0, collapse="") - table_structures <- as.data.frame(table(structures)) - bgms_res$structure_probabilities <- table_structures[,2]/nrow(extract_indicators(fit)) - bgms_res$graph_weights <- table_structures[,2] - bgms_res$sample_graph <- as.character(table_structures[, 1]) - bgms_res$samples_posterior <- extract_pairwise_interactions(fit) - - bgms_res$convergence_parameter <- fit$posterior_summary_pairwise_differences$Rhat - } - - ######-------------------- - ## Multi-group estimation - ######-------------------- - if(!is.null(group_indicator)){ - class(fit) <- c("bgmCompare") - - args <- extract_arguments(fit) - args$save <- TRUE - dots <- list(...) - if (args$difference_prior[1] == "Bernoulli") { - if("difference_probability" %in% dots){ - edge.prior <- args$difference_probability - args$inclusion_probability_difference <- edge.prior - } else { - edge.prior <- 0.5 - args$inclusion_probability_difference <- edge.prior - } - } else { # if BB or SBM - edge.prior <- args$difference_selection_alpha / - (args$difference_selection_alpha + args$difference_selection_beta) - - # otherwise it saves the wrong values (could be done more elegantly) + } else { + edge.prior <- 0.5 args$inclusion_probability_difference <- edge.prior } - - bgms_res <- list() - - - p <- args$num_variables - #Compute average group difference - diffs <- fit$posterior_summary_pairwise_differences - edge_labels <- sub(" .*", "", diffs$parameter) - edges <- unique(edge_labels) - average_difference <- numeric(length(edges)) - for (i in seq_along(edges)) { - edge_name <- edges[i] - idx <- edge_labels == edge_name - vals <- diffs$mean[idx] - average_difference[i] <- mean(vals, na.rm = TRUE) - } - bgms_res$parameters <- vector2matrix(average_difference, p = p) - colnames(bgms_res$parameters) <- varnames - bgms_res$overall_estimate <- vector2matrix(fit$posterior_summary_pairwise_baseline$mean, p = p) #overall group estimate - bgms_res$structure <- matrix(1, ncol = ncol(bgms_res$parameters), - nrow = nrow(bgms_res$parameters)) - indicators <- extract_indicators(fit) - bgms_res$inc_probs <- vector2matrix(colMeans(indicators[, grep("\\(pairwise\\)", colnames(indicators))]), p = p) - bgms_res$inc_BF <- (bgms_res$inc_probs/(1-bgms_res$inc_probs))/(edge.prior /(1 - edge.prior)) - bgms_res$structure <- 1*(bgms_res$inc_probs > 0.5) - - structures <- apply(extract_indicators(fit), 1, paste0, collapse="") - table_structures <- as.data.frame(table(structures)) - bgms_res$structure_probabilities <- table_structures[,2]/nrow(extract_indicators(fit)) - bgms_res$graph_weights <- table_structures[,2] - bgms_res$sample_graph <- as.character(table_structures[, 1]) - bgms_res$samples_posterior <- extract_pairwise_interactions(fit) - bgms_res$convergence_parameter <- fit$posterior_summary_pairwise_baseline$Rhat - bgms_res$group_estimates <- extract_group_params(fit)$pairwise_effects_groups - bgms_res$multi_group <- "Multi-group" + } else { # if BB or SBM + edge.prior <- args$difference_selection_alpha / + (args$difference_selection_alpha + args$difference_selection_beta) + # otherwise it saves the wrong values (could be done more elegantly) + args$inclusion_probability_difference <- edge.prior } + + bgms_res <- list() + + p <- args$num_variables + # Compute average group difference + # TODO: replace with extractor when bgms adds one for difference summary + diffs <- fit$posterior_summary_pairwise_differences + edge_labels <- sub(" .*", "", diffs$parameter) + edges <- unique(edge_labels) + average_difference <- numeric(length(edges)) + for (i in seq_along(edges)) { + edge_name <- edges[i] + idx <- edge_labels == edge_name + vals <- diffs$mean[idx] + average_difference[i] <- mean(vals, na.rm = TRUE) + } + bgms_res$parameters <- vector2matrix(average_difference, p = p) + colnames(bgms_res$parameters) <- varnames + bgms_res$overall_estimate <- vector2matrix(extract_group_params(fit)$pairwise_effects_groups[, 1], p = p) + bgms_res$structure <- matrix(1, ncol = ncol(bgms_res$parameters), + nrow = nrow(bgms_res$parameters)) + indicators <- extract_indicators(fit) + bgms_res$inc_probs <- vector2matrix(colMeans(indicators[, grep("\\(pairwise\\)", colnames(indicators))]), p = p) + bgms_res$inc_BF <- (bgms_res$inc_probs/(1-bgms_res$inc_probs))/(edge.prior /(1 - edge.prior)) + bgms_res$structure <- 1*(bgms_res$inc_probs > 0.5) + + structures <- apply(extract_indicators(fit), 1, paste0, collapse="") + table_structures <- as.data.frame(table(structures)) + bgms_res$structure_probabilities <- table_structures[,2]/nrow(extract_indicators(fit)) + bgms_res$graph_weights <- table_structures[,2] + bgms_res$sample_graph <- as.character(table_structures[, 1]) + bgms_res$samples_posterior <- extract_pairwise_interactions(fit) + bgms_res$convergence_parameter <- extract_rhat(fit)$pairwise_baseline + bgms_res$group_estimates <- extract_group_params(fit)$pairwise_effects_groups + bgms_res$multi_group <- "Multi-group" + } - # Adapt column names of output colnames(bgms_res$inc_probs) <- colnames(bgms_res$parameters) - colnames(bgms_res$inc_BF) <- colnames(bgms_res$parameters) - + colnames(bgms_res$inc_BF) <- colnames(bgms_res$parameters) + bgms_res$model <- type bgms_res$fit_arguments <- args - bgms_res$edge.prior <- edge.prior # otherwise it stores a whole matrix - #bgms_res$n <- c(args$no_cases_gr1, args$no_cases_gr2) - + bgms_res$edge.prior <- edge.prior # otherwise it stores a whole matrix + output <- bgms_res class(output) <- c("package_bgms_compare", "easybgm_compare", "easybgm") return(output) diff --git a/R/plottingfunctions.bgmCompare.R b/R/plottingfunctions.bgmCompare.R index d92160d..58bc29e 100644 --- a/R/plottingfunctions.bgmCompare.R +++ b/R/plottingfunctions.bgmCompare.R @@ -1,18 +1,8 @@ #' @export plot_structure_probabilities.bgmCompare <- function(output, as_BF = FALSE, ...) { - if(packageVersion("bgms") < "0.1.4"){ - stop("Your version of the package bgms is not supported anymore. Please update.") - } - fit_args <- bgms::extract_arguments(output) - if (packageVersion("bgms") < "0.1.6.0" ) { - if(!(fit_args$save)){ - stop("Please run your bgmCompare function with save = T.") - } - } - res <- bgm_extract.package_bgms_compare(fit = output, save = TRUE, type = NULL, not_cont = NULL, data = NULL, group_indicator = NULL, @@ -82,18 +72,8 @@ plot_structure_probabilities.bgmCompare <- function(output, as_BF = FALSE, ...) plot_complexity_probabilities.bgmCompare <- function(output, ...) { - if(packageVersion("bgms") < "0.1.4"){ - stop("Your version of the package bgms is not supported anymore. Please update.") - } - fit_args <- bgms::extract_arguments(output) - if (packageVersion("bgms") < "0.1.6.0") { - if(!(fit_args$save)){ - stop("Please run your bgmCompare function with save = T.") - } - } - res <- bgm_extract.package_bgms_compare(fit = output, save = TRUE, type = NULL, not_cont = NULL, data = NULL, group_indicator = NULL, @@ -150,21 +130,11 @@ plot_complexity_probabilities.bgmCompare <- function(output, ...) { #' @export plot_edgeevidence.bgmCompare <- function(output, evidence_thresh = 10, split = FALSE, show = "all", ...) { - - if(packageVersion("bgms") < "0.1.4"){ - stop("Your version of the package bgms is not supported anymore. Please update.") - } - + warning("Note, the plot indicates the edge evidence for the pairwise difference between the groups.") - + fit_args <- bgms::extract_arguments(output) - if (packageVersion("bgms") < "0.1.6.0") { - if(!(fit_args$save)){ - stop("Please run your bgmCompare function with save = T.") - } - } - res <- bgm_extract.package_bgms_compare(fit = output, save = TRUE, type = NULL, not_cont = NULL, data = NULL, group_indicator = NULL, @@ -305,20 +275,10 @@ plot_edgeevidence.bgmCompare <- function(output, evidence_thresh = 10, split = F plot_network.bgmCompare <- function(output, exc_prob = .5, evidence_thresh = 10, dashed = TRUE, ...) { - if(packageVersion("bgms") < "0.1.4"){ - stop("Your version of the package bgms is not supported anymore. Please update.") - } - warning("Note, the plot indicates the strength of the pairwise difference in edge parameters between the groups.") - + fit_args <- bgms::extract_arguments(output) - if (packageVersion("bgms") < "0.1.6.0") { - if(!(fit_args$save)){ - stop("Please run your bgmCompare function with save = T.") - } - } - res <- bgm_extract.package_bgms_compare(fit = output, save = TRUE, type = NULL, not_cont = NULL, data = NULL, @@ -381,19 +341,11 @@ plot_network.bgmCompare <- function(output, exc_prob = .5, evidence_thresh = 10, plot_structure.bgmCompare <- function(output, ...) { - if(packageVersion("bgms") < "0.1.4"){ - stop("Your version of the package bgms is not supported anymore. Please update.") - } warning("Note, the plot indicates the structure of the pairwise difference between the groups.") fit_args <- bgms::extract_arguments(output) - if (packageVersion("bgms") < "0.1.6.0") { - if(!(fit_args$save)){ - stop("Please run your bgmCompare function with save = T.") - } - } res <- bgm_extract.package_bgms_compare(fit = output, save = TRUE, type = NULL, not_cont = NULL, data = NULL, @@ -437,25 +389,13 @@ plot_structure.bgmCompare <- function(output, ...) { plot_parameterHDI.bgmCompare <- function(output, ...) { - if(packageVersion("bgms") < "0.1.4"){ - stop("Your version of the package bgms is not supported anymore. Please update.") - } - if (packageVersion("bgms") > "0.1.4.2") { - warning("Note, the plot indicates the posterior highest density interval of the overall group edges.") - } else { - warning("Note, the plot indicates the posterior highest density interval for subgroup differences.") - } + warning("Note, the plot indicates the posterior highest density interval of the overall group edges.") fit_args <- bgms::extract_arguments(output) - if (packageVersion("bgms") < "0.1.6.0") { - if(!(fit_args$save)){ - stop("Please run your bgmCompare function with save = T.") - } - } res <- bgm_extract.package_bgms_compare(fit = output, save = TRUE, diff --git a/R/plottingfunctions.bgms.R b/R/plottingfunctions.bgms.R index f7a12aa..7283125 100644 --- a/R/plottingfunctions.bgms.R +++ b/R/plottingfunctions.bgms.R @@ -1,15 +1,8 @@ #' @export plot_structure_probabilities.bgms <- function(output, as_BF = FALSE, ...) { - if(packageVersion("bgms") < "0.1.4"){ - stop("Your version of the package bgms is not supported anymore. Please update.") - } - fit_args <- bgms::extract_arguments(output) - - if(packageVersion("bgms") > "0.1.4.2"){ - fit_args$save <- TRUE - } + fit_args$save <- TRUE # Give error if save is false if(fit_args$save == FALSE){ @@ -86,14 +79,8 @@ plot_structure_probabilities.bgms <- function(output, as_BF = FALSE, ...) { plot_complexity_probabilities.bgms <- function(output, ...) { - if(packageVersion("bgms") < "0.1.4"){ - stop("Your version of the package bgms is not supported anymore. Please update.") - } - fit_args <- bgms::extract_arguments(output) - if(packageVersion("bgms") > "0.1.4.2"){ - fit_args$save <- TRUE - } + fit_args$save <- TRUE # Give error if save is false if(fit_args$save == FALSE){ @@ -170,14 +157,8 @@ plot_complexity_probabilities.bgms <- function(output, ...) { plot_edgeevidence.bgms <- function(output, evidence_thresh = 10, split = FALSE, show = "all", ...) { - if(packageVersion("bgms") < "0.1.4"){ - stop("Your version of the package bgms is not supported anymore. Please update.") - } - fit_args <- bgms::extract_arguments(output) - if(packageVersion("bgms") > "0.1.4.2"){ - fit_args$save <- TRUE - } + fit_args$save <- TRUE res <- bgm_extract.package_bgms(fit = output, save = fit_args$save, centrality = FALSE, @@ -319,14 +300,8 @@ plot_edgeevidence.bgms <- function(output, evidence_thresh = 10, split = FALSE, plot_network.bgms <- function(output, exc_prob = .5, evidence_thresh = 10, dashed = TRUE, ...) { - if(packageVersion("bgms") < "0.1.4"){ - stop("Your version of the package bgms is not supported anymore. Please update.") - } - fit_args <- bgms::extract_arguments(output) - if(packageVersion("bgms") > "0.1.4.2"){ - fit_args$save <- TRUE - } + fit_args$save <- TRUE @@ -390,14 +365,8 @@ plot_network.bgms <- function(output, exc_prob = .5, evidence_thresh = 10, dashe plot_structure.bgms <- function(output, ...) { - if(packageVersion("bgms") < "0.1.4"){ - stop("Your version of the package bgms is not supported anymore. Please update.") - } - fit_args <- bgms::extract_arguments(output) - if(packageVersion("bgms") > "0.1.4.2"){ - fit_args$save <- TRUE - } + fit_args$save <- TRUE res <- bgm_extract.package_bgms(fit = output, save = fit_args$save, centrality = FALSE, @@ -442,14 +411,8 @@ plot_structure.bgms <- function(output, ...) { plot_parameterHDI.bgms <- function(output, ...) { - if(packageVersion("bgms") < "0.1.4"){ - stop("Your version of the package bgms is not supported anymore. Please update.") - } - fit_args <- bgms::extract_arguments(output) - if(packageVersion("bgms") > "0.1.4.2"){ - fit_args$save <- TRUE - } + fit_args$save <- TRUE if(!fit_args$save){ stop("Samples of the posterior distribution required. When estimating the model with bgm, set \"save = TRUE\".") @@ -521,14 +484,8 @@ plot_parameterHDI.bgms <- function(output, ...) { plot_centrality.bgms <- function(output, group_names = NULL, ...){ - if(packageVersion("bgms") < "0.1.4"){ - stop("Your version of the package bgms is not supported anymore. Please update.") - } - fit_args <- bgms::extract_arguments(output) - if(packageVersion("bgms") > "0.1.4.2"){ - fit_args$save <- TRUE - } + fit_args$save <- TRUE if(!fit_args$save){ stop("Samples of the posterior distribution required. When estimating the model with bgm, set \"save = TRUE\".") diff --git a/R/plottingfunctions.easybgm.R b/R/plottingfunctions.easybgm.R index 368ce9e..41e7dcd 100644 --- a/R/plottingfunctions.easybgm.R +++ b/R/plottingfunctions.easybgm.R @@ -402,11 +402,7 @@ plot_parameterHDI.easybgm <- function(output, ...) { } if(any(class(output) == "easybgm_compare")){ - if (packageVersion("bgms") > "0.1.4.2") { - warning("Note, the plot indicates the posterior highest density interval of the overall group edges.") - } else { - warning("Note, the plot indicates the posterior highest density interval for subgroup differences.") - } + warning("Note, the plot indicates the posterior highest density interval of the overall group edges.") } def_args <- list( @@ -534,9 +530,6 @@ plot_centrality.list <- function(output, group_names = NULL, ...){ # Check for bgms package version if(any(class(output[[1]]) == "bgms")) { - if(packageVersion("bgms") < "0.1.3"){ - stop("Your version of the package bgms is not supported anymore. Please update.") - } res <- list() for(i in 1:length(output)) { @@ -648,9 +641,6 @@ plot_prior_sensitivity.list <- function(output, # Check for bgms package version if(any(class(output[[1]]) == "bgms")) { - if(packageVersion("bgms") < "0.1.3"){ - stop("Your version of the package bgms is not supported anymore. Please update.") - } res <- list() for(i in 1:length(output)) { diff --git a/R/summary.easybgm.R b/R/summary.easybgm.R index bdcf070..f1b4987 100644 --- a/R/summary.easybgm.R +++ b/R/summary.easybgm.R @@ -106,7 +106,7 @@ summary.easybgm <- function(object, evidence_thresh = 10, BF_uncertainty = FALSE ## ---- 2i. Create results data frame ---- ## ---- Create results data frame with convergence (newer bgms)---- - if("package_bgms" %in% class(object) && packageVersion("bgms") > "0.1.4.2"){ + if("package_bgms" %in% class(object)){ # if users want the BF uncertainty estimates if(BF_uncertainty){ results <- @@ -312,10 +312,26 @@ print.easybgm <- function(x, ...){ "\n EDGE SPECIFIC OVERVIEW", "\n") print(x$parameters, quote = FALSE, right = TRUE, row.names=F) + fit_obj <- x$fit_object + if("package_bgms" %in% class(x)){ + scales <- character(0) + if(!is.null(fit_obj$partial_correlations)) + scales <- c(scales, "partial correlations ($partial_correlations)") + if(!is.null(fit_obj$precision_matrix)) + scales <- c(scales, "precision matrix ($precision_matrix)") + if(!is.null(fit_obj$log_odds)) + scales <- c(scales, "log odds ($log_odds)") + if(length(scales) > 0){ + cat("\n The 'Estimate' column reports partial association parameters.", + "\n The rescaled equivalents of these parameters are available in the fitted object:", + "\n ", paste(scales, collapse = ", "), "\n") + } + } cat("\n Bayes Factors larger than", x$evidence_thresh, "were considered sufficient evidence for the classification", "\n Bayes factors were obtained using Bayesian model-averaging.", "\n ") - if("package_bgms" %in% class(x) && packageVersion("bgms") > "0.1.4.2" && isTRUE(x$BF_uncertainty)){ + # --- Note about available parameter scales (bgms only) --- + if("package_bgms" %in% class(x) && isTRUE(x$BF_uncertainty)){ cat( "\n Convergence diagnostics: The 'Convergence Estimate' is the R-hat (Gelman-Rubin) statistic, which measures how well MCMC chains have", "\n converged to the same target distribution for the edge weights. Values greater than about 1.01-1.05 are considered concerning,", @@ -327,7 +343,7 @@ print.easybgm <- function(x, ...){ "\n is not available.", "\n ---") } - if("package_bgms" %in% class(x) && packageVersion("bgms") > "0.1.4.2" && !isTRUE(x$BF_uncertainty)){ + if("package_bgms" %in% class(x) && !isTRUE(x$BF_uncertainty)){ cat("\n Convergence indicates the R-hat (Gelman-Rubin) statistic measuring how well MCMC chains have converged to", "\n the same target distribution. Values greater than about 1.01-1.05 are considered concerning, indicating", "\n potential lack of convergence for the estimates of the pairwise interactions.", @@ -361,5 +377,6 @@ print.easybgm <- function(x, ...){ "\n Number of possible structures:", x$possible_struc, "\n Posterior probability of most likely structure:", x$max_structure_prob, "\n---") + } } diff --git a/R/summary.easybgm_compare.R b/R/summary.easybgm_compare.R index f3c8cd7..7364739 100644 --- a/R/summary.easybgm_compare.R +++ b/R/summary.easybgm_compare.R @@ -50,7 +50,7 @@ summary.easybgm_compare <- function(object, evidence_thresh = 10, ...) { ## ---- 2d. Create results data frame ---- ## ---- Create results data frame with convergence (newer bgms)---- - if("package_bgms_compare" %in% class(object)&& packageVersion("bgms") > "0.1.4.2" ){ + if("package_bgms_compare" %in% class(object)){ if(is.null(object$multi_group)){ results <- data.frame( @@ -215,7 +215,7 @@ print.easybgm_compare <- function(x, ...){ cat("\n Bayes Factors larger than", x$evidence_thresh, "were considered sufficient evidence for the classification.", "\n Bayes factors were obtained using Bayesian model-averaging.", "\n ") - if("package_bgms_compare" %in% class(x) && packageVersion("bgms") > "0.1.4.2"){ + if("package_bgms_compare" %in% class(x)){ cat("\n Convergence indicates the R-hat (Gelman-Rubin) statistic measuring how well MCMC chains have converged to", "\n the same target distribution, and values greater than about 1.01-1.05 are considered concerning, ", "\n indicating potential lack of convergence. ", diff --git a/man/easybgm.Rd b/man/easybgm.Rd index e6079f0..a4107ab 100644 --- a/man/easybgm.Rd +++ b/man/easybgm.Rd @@ -18,163 +18,301 @@ easybgm( ) } \arguments{ -\item{data}{An n x p matrix or dataframe containing the variables for n independent observations on p variables.} +\item{data}{An n x p matrix or dataframe containing the variables for n +independent observations on p variables.} -\item{type}{What is the data type? Options: continuous, mixed, ordinal, binary, or blume-capel.} +\item{type}{Specifies the type of data. There are two ways to use this argument: -\item{package}{The R-package that should be used for fitting the network model; supports BGGM, BDgraph, and bgms. Optional argument; -default values are specified depending on the datatype.} - -\item{not_cont}{If data-type is mixed, a vector of length p, specifying the not-continuous -variables (1 = not continuous, 0 = continuous).} - -\item{iter}{number of iterations for the sampler. Default is set to 1e3 but the recommended number of samples depends on the underlying package and sampler used. For data fit with BDgraph and BGGM, the default is set to 1e4.} - -\item{save}{Logical. Should the posterior samples be obtained (default = FALSE)?} +\strong{1. A single string} (applies the same type to all variables): +\itemize{ +\item \code{"continuous"}: For continuous (Gaussian) data. Estimates a +Gaussian Graphical Model (GGM). +\item \code{"ordinal"}: For ordinal (Likert-type) data. Estimates an +Ordinal Markov Random Field (OMRF). +\item \code{"binary"}: For binary (0/1) data. Estimates an OMRF internally +(which reduces to the Ising model when all variables are binary). +\item \code{"blume-capel"}: For Blume-Capel ordinal data. Requires a +reference category via the \code{baseline_category} argument. + +\item \code{"mixed"}: For data with both continuous and discrete +variables. Requires the \code{not_cont} argument to indicate which +variables are not continuous (see below). Internally, this constructs a +per-variable type vector. This value should only be specified when +fitting a mixed model using \code{package = "BDgraph"} or \code{package = "BGGM"}. +See below on how to specify mixed models using bgms. +} -\item{centrality}{Logical. Should the centrality measures be extracted (default = FALSE)? Note, that it will significantly increase the computation time.} +\strong{2. A character vector of length p} (per-variable specification, +bgms only): Each element specifies the type of the corresponding column in +\code{data}. Valid values are \code{"ordinal"}, \code{"continuous"}, +\code{"blume-capel"}, and \code{"binary"} (treated as \code{"ordinal"} +internally). This provides full control over mixed-type data without needing +the \code{not_cont} argument. -\item{progress}{Logical. Should a progress bar be shown (default = TRUE)?} +For example: \code{type = c("ordinal", "ordinal", "continuous")} specifies +that the first two columns are ordinal and the third is continuous. -\item{posterior_method}{Determines how the posterior samples of the edge weight parameters are obtained for models fit with BDgraph. The argument can be either MAP for the maximum-a-posteriori or model-averaged. If MAP, samples are obtained for the edge weights only for the most likely structure. If model-averaged, samples are obtained for all plausible structures weighted by their posterior probability. Default is model-averaged.} +\emph{Note:} Per-variable type vectors are only supported with the +\code{bgms} package. Mixed models with other packages can be fit using +\code{type = "mixed"} and the \code{not_cont} argument (see above).} -\item{...}{Additional arguments that are handed to the fitting functions of the packages, e.g., informed prior specifications.} +\item{package}{The R-package used for fitting the network model. Optional; +if not specified, \code{bgms} is used as the default for all data types. +Supported options: +\itemize{ +\item \code{"bgms"} (Default): Supports the following data types: continuous, +ordinal, binary, and blume-capel or a combination of the three. +Requires bgms >= 0.1.6.4. +\item \code{"BDgraph"}: Supports continuous (fits a GGM), mixed (fits a +GCGM). For continuous data, missing values are not +allowed; use \code{na.omit()} on the data first. Binary data is always +routed to bgms regardless of the package argument. +\item \code{"BGGM"}: Supports continuous and mixed data. Binary data is +always routed to bgms regardless of the package argument. +}} + +\item{not_cont}{A binary vector of length p, required when +\code{type = "mixed"} and a single string is used. Each element indicates +whether the corresponding variable is not continuous +(\code{1} = not continuous/ordinal, \code{0} = continuous). This is used to +construct a per-variable type vector internally. Only neccesary when +using \code{package = "BDgraph"} or \code{package = "BGGM"}. + +For example, for 5 variables where the first two are ordinal and the rest +continuous: \code{not_cont = c(1, 1, 0, 0, 0)}. + +\emph{Note:} This argument is not needed when using a per-variable type +vector for the \code{type} argument when \code{package = "bgms"} (default).} + +\item{iter}{Number of iterations for the sampler. The default depends on the +package: +\itemize{ +\item \code{bgms}: 1e3 (1,000 iterations) +\item \code{BDgraph}: 1e4 (10,000 iterations) +\item \code{BGGM}: 1e4 (10,000 iterations) +} +The recommended number of iterations depends on the data, model complexity, +and desired precision. Check the convergence diagnostics in the output to +determine if more iterations are needed.} + +\item{save}{Logical. Should the posterior samples be obtained +(default = \code{FALSE})? If \code{TRUE}, the output includes a +\code{samples_posterior} matrix with the posterior samples for each edge +weight parameter. Setting \code{centrality = TRUE} automatically sets +\code{save = TRUE}.} + +\item{centrality}{Logical. Should the centrality measures be extracted +(default = \code{FALSE})? Note that this will significantly increase +computation time. Automatically sets \code{save = TRUE}.} + +\item{progress}{Logical. Should a progress bar be shown +(default = \code{TRUE})?} + +\item{posterior_method}{Determines how the posterior samples of the edge +weight parameters are obtained for models fit with \code{BDgraph}. Can be +either \code{"MAP"} (maximum-a-posteriori) or \code{"model-averaged"} +(default). If \code{"MAP"}, samples are obtained for the edge weights only +for the most likely structure. If \code{"model-averaged"}, samples are +obtained for all plausible structures weighted by their posterior +probability.} + +\item{...}{Additional arguments passed to the fitting functions of the +underlying packages (e.g., prior specifications). See the +\strong{Prior specification} section in Details for available options per +package.} } \value{ -The returned object of \code{easybgm} contains several elements: +An object of class \code{easybgm} containing the following elements: +\strong{Always returned:} \itemize{ - -\item \code{parameters} A p x p matrix containing partial associations. - -\item \code{inc_probs} A p x p matrix containing the posterior inclusion probabilities. - -\item \code{BF} A p x p matrix containing the posterior inclusion Bayes factors. - -\item \code{structure} Adjacency matrix of the median probability model (i.e., edges with a posterior probability larger 0.5). +\item \code{parameters}: A p x p matrix of posterior mean partial +association estimates. +\item \code{inc_probs}: A p x p matrix of posterior inclusion +probabilities. +\item \code{inc_BF}: A p x p matrix of posterior inclusion Bayes factors. +\item \code{structure}: A p x p adjacency matrix of the median probability +model (edges with posterior inclusion probability > 0.5). +\item \code{model}: A string indicating the model type (e.g., +\code{"continuous"}, \code{"ordinal"}, \code{"mixed"}). +\item \code{thresholds}: Threshold/intercept parameters (bgms only). The +format depends on the model type: a matrix for ordinal/binary models, +\code{NULL} for continuous models, or a list for mixed models. } -In addition, for \code{BDgraph} and \code{bgms}, the function returns: - +\strong{Returned for bgms and BDgraph:} \itemize{ +\item \code{structure_probabilities}: Posterior probabilities of all +visited graph structures (values between 0 and 1). +\item \code{graph_weights}: Number of times each graph structure was +visited. +\item \code{sample_graphs}: Identifiers for each visited graph structure. +} -\item \code{structure_probabilities} A vector containing the posterior probabilities of all visited structures, between 0 and 1. - -\item \code{graph_weights} A vector containing the number of times a particular structure was visited. - -\item \code{sample_graphs} A vector containing the indexes of a particular structure. - -For the \code{bgms} package, when \code{edge_prior = "Stochastic-Block"}, -the function will also return an object \code{sbm} which contains: +\strong{Returned for bgms only:} \itemize{ - -\item \code{posterior_num_blocks} A data frame with the estimated posterior -probability of the possible number of clusters. - -\item \code{posterior_mean_allocations} The posterior mean of the cluster assignments of the nodes. - -\item \code{posterior_mode_allocations} The posterior mode of the cluster assignments of the nodes. - -\item \code{posterior_mean_coclustering_matrix} A p x p matrix containing the estimated -pairwise proportions of cluster occurrence of every variable. This matrix -can be plotted to visually inspect the estimated number of clusters -and visually inspect nodes that tend to switch clusters. - -} +\item \code{convergence_parameter}: The Gelman-Rubin (R-hat) convergence +statistic for each edge weight parameter. Values close to 1 indicate good +convergence. +\item \code{MCSE_BF}: A matrix with the 95 percent Monte Carlo confidence +interval for each inclusion Bayes factor. } -If using version 0.1.6.1 or higher of the \code{bgms} package, the function also returns the -the Gelman-Rubin convergence statistic for each edge weight parameter. As well as the -95\% Monte Carlo confidence interval for the inclusion Bayes factor. +\strong{Returned when edge_prior = "Stochastic-Block" (bgms only):} +\itemize{ +\item \code{sbm}: A list containing Stochastic Block Model results, including +\code{posterior_num_blocks} (posterior probabilities for each number of +clusters), \code{posterior_mean_allocations} (posterior mean cluster +assignments), \code{posterior_mode_allocations} (posterior mode cluster +assignments), and \code{posterior_mean_coclustering_matrix} (a p x p matrix +of pairwise co-clustering proportions). +} -For all packages, when setting \code{save = TRUE} and \code{centrality = TRUE}, the function will return the following objects respectively: +\strong{Interpretable parameter scales (bgms only):} +In addition to the raw pairwise interaction parameters in +\code{parameters}, the following transformations are provided when the +model type supports them. They are \code{NULL} otherwise. \itemize{ +\item \code{partial_correlations}: A matrix of posterior mean partial +correlations. Available for continuous (GGM) models (full p x p matrix) +and for the continuous block of mixed models. \code{NULL} for ordinal +models. +\item \code{precision_matrix}: The posterior mean precision +(inverse covariance) matrix. Same availability as partial correlations. +\item \code{log_odds}: A matrix of posterior mean log adjacent-category +odds ratios. Available for ordinal/binary models (full p x p matrix) +and for the discrete block of mixed models. \code{NULL} for continuous +models. +} -\item \code{samples_posterior} A k x iter matrix containing the posterior samples for each parameter (i.e., k = (p/(p-1))/2) at each iteration (i.e., iter) of the sampler. +\strong{Returned when save = TRUE:} +\itemize{ +\item \code{samples_posterior}: A k x iter matrix of posterior samples for +each edge weight parameter (k = p*(p-1)/2 edges). +} -\item \code{centrality} A p x iter matrix containing the centrality of a node at each iteration of the sampler. +\strong{Returned when centrality = TRUE:} +\itemize{ +\item \code{centrality}: An iter x p matrix of centrality values for each +node at each iteration. } } \description{ -Easy estimation of a Bayesian analysis of networks to obtain conditional (in)dependence relations between variables in a network. +Easy estimation of a Bayesian graphical models to obtain +conditional (in)dependence relations between variables in a network. } \details{ -Users may oftentimes wish to deviate from the default, usually uninformative, prior specifications of the -packages to informed priors. This can be done by simply adding additional arguments to the \code{easybgm} function. -Depending on the package that is running the underlying network estimation, researcher can specify different prior -arguments. We give an overview of the prior arguments per package below. - -\strong{bgms}: - -\itemize{ - -\item \code{interaction_scale} the scale of the Cauchy distribution that is used as a -prior for the pairwise interaction parameters. The default is 2.5. - -\item \code{edge_prior} prior on the graph structure, which can be either "Bernoulli", "Beta-Bernoulli" or "Stochastic Block". The default is "Bernoulli". - -\item \code{inclusion_probability} prior edge inclusion probability for the "Bernoulli" distribution. The default is 0.5. - -\item \code{beta_bernoulli_alpha} and \code{beta_bernoulli_beta} the parameters of the "Beta-Bernoulli" or "Stochastic Block" priors. The default is 1 for both. - -\item \code{beta_bernoulli_alpha_between} and \code{beta_bernoulli_beta_between} the parameters of the "Stochastic Block" prior for edges between blocks. -This is currently only available in a developer version of bgms and will be available in version 0.1.6.2 or higher. +\strong{Data types and package support} + +The table below summarizes which data types are supported by each backend +package: + +\tabular{lccc}{ +\strong{Data type} \tab \strong{bgms} \tab \strong{BDgraph} \tab \strong{BGGM} \cr +continuous \tab Yes (default) \tab Yes \tab Yes \cr +ordinal \tab Yes (default) \tab Yes \tab No \cr +binary \tab Yes (always) \tab No \tab No \cr +mixed \tab Yes (default) \tab Yes \tab Yes \cr +blume-capel \tab Yes (default) \tab No \tab No \cr +} -\item \code{dirichlet_alpha} The shape of the Dirichlet prior on the node-to-block -allocation parameters for the Stochastic Block prior on the graph structure. +\strong{How \code{type} and \code{not_cont} relate} + +There are two ways to specify mixed-type data: +\enumerate{ +\item \strong{Using \code{type = "mixed"} with \code{not_cont}}: Set +\code{type = "mixed"} and provide a binary vector \code{not_cont} of +length p. Internally, this is translated to a per-variable type vector +where \code{not_cont == 1} maps to \code{"ordinal"} and +\code{not_cont == 0} maps to \code{"continuous"}. +\item \strong{Using a per-variable type vector} (bgms only): Directly +specify the type of each variable, e.g., +\code{type = c("ordinal", "continuous", "ordinal")}. This is more +flexible and does not require the \code{not_cont} argument. +} -\item \code{threshold_alpha} and \code{threshold_beta} the parameters of the beta-prime distribution for the threshold parameters. The defaults are both set to 1. +\strong{Prior specification} -\item \code{variable_type} What kind of variables are there in \code{x}? Can be a -single character string specifying the variable type of all \code{p} -variables at once or a vector of character strings of length \code{p} -specifying the type for each variable in \code{x} separately. Currently, bgm -supports \verb{ordinal'' and }blume-capel''. Binary variables are automatically -treated as ``ordinal’’. Defaults to \code{variable_type = "ordinal"}. +Users may wish to deviate from the default (uninformative) prior +specifications. This can be done by passing additional arguments via +\code{...} to the fitting function of the chosen package. We give an +overview of the available prior arguments per package below. +\emph{bgms}: +\itemize{ +\item \code{pairwise_scale}: Scale of the Cauchy prior on the pairwise +interaction parameters. Default is 2.5. +\item \code{edge_prior}: Prior on the graph structure: \code{"Bernoulli"} +(default), \code{"Beta-Bernoulli"}, or \code{"Stochastic-Block"}. +\item \code{inclusion_probability}: Prior edge inclusion probability for +the \code{"Bernoulli"} prior. Default is 0.5. This can also +be a symmetric pxp matrix of edge-specific inclusion probabilities. +\item \code{beta_bernoulli_alpha} and \code{beta_bernoulli_beta}: (Within) Shape +parameters of the \code{"Beta-Bernoulli"} or \code{"Stochastic-Block"} +priors. Both default to 1. +\item \code{beta_bernoulli_alpha_between} and +\code{beta_bernoulli_beta_between}: Shape parameters of the +\code{"Stochastic-Block"} prior for between-block edges. +\item \code{dirichlet_alpha}: Shape of the Dirichlet prior on node-to-block +allocations for the \code{"Stochastic-Block"} prior. +\item \code{threshold_alpha} and \code{threshold_beta}: Parameters of the +beta-prime prior on threshold parameters. Both default to 1. } -\strong{BDgraph}: - +\emph{BDgraph}: \itemize{ - -\item \code{df.prior} prior on the parameters (i.e., inverse covariance matrix), degrees of freedom of the prior G-Wishart distribution. The default is set to 3. - -\item \code{g.prior} prior probability of edge inclusion. This can be either a scalar, if it is the same for all edges, or a matrix, if it should be different among the edges. The default is set to 0.5. - +\item \code{df.prior}: Degrees of freedom of the prior G-Wishart +distribution on the precision matrix. Default is 3. +\item \code{g.prior}: Prior probability of edge inclusion. Can be a +scalar (same for all edges) or a matrix (edge-specific). +This can also be a symmetric pxp matrix of edge-specific inclusion probabilities. +Default is 0.5. } -\strong{BGGM}: +\emph{BGGM}: \itemize{ - -\item \code{prior_sd} the standard deviation of the prior distribution of the interaction parameters, approximately the scale of a beta distribution. The default is 0.25. +\item \code{prior_sd}: Standard deviation of the prior on interaction +parameters (approximately the scale of a beta distribution). Default is +0.25. } -We would always encourage researcher to conduct prior robustness checks. +We encourage researchers to conduct prior sensitivity checks. } \examples{ - library(easybgm) library(bgms) data <- na.omit(Wenchuan) -# Fitting the Wenchuan PTSD data - +# --- Continuous data (default: bgms) --- fit <- easybgm(data, type = "continuous", - iter = 100 # for demonstration only + iter = 100 # for demonstration only; increase for real analyses ) - summary(fit) \donttest{ -# To extract the posterior parameter distribution -# and centrality measures - -fit <- easybgm(data, type = "continuous", - iter = 100, # for demonstration only - centrality = TRUE, save = TRUE) +# --- Mixed data using per-variable type vector (bgms only) --- +dat3 <- data[, 1:3] +fit_vec <- easybgm(dat3, + type = c("ordinal", "ordinal", "continuous"), + iter = 100) + +# --- Extract posterior samples and centrality --- +fit_full <- easybgm(data, type = "continuous", + iter = 100, + centrality = TRUE, save = TRUE) + +# --- Using BDgraph for continuous data --- +fit_bd <- easybgm(data, type = "continuous", + package = "BDgraph", + iter = 100) + +# --- Using BGGM for continuous data --- +fit_bggm <- easybgm(data, type = "continuous", + package = "BGGM", + iter = 100) + } } diff --git a/man/easybgm_compare.Rd b/man/easybgm_compare.Rd index b084534..a8712da 100644 --- a/man/easybgm_compare.Rd +++ b/man/easybgm_compare.Rd @@ -17,74 +17,151 @@ easybgm_compare( ) } \arguments{ -\item{data}{A list with two n x p matrices or dataframes containing the variables for n independent observations on -p variables for two groups. Note that the variables need to be the same in the two different dataframes. Alternatively, -when "bgms" version > 0.1.6 is installed, 'data' can also be a matrix of binary and ordinal responses from all groups. -If this is the case, the 'group_indicator' argument also needs to be specified.} +\item{data}{The data can be provided in two formats: -\item{type}{What is the data type? Options: continuous, mixed, ordinal, binary, or blume-capel.} +\strong{1. A list of two dataframes} (two-group comparison): Each element +is an n x p matrix or dataframe for one group. The variables (columns) must +be the same across both dataframes. This format is supported by both +\code{bgms} and \code{BGGM}. -\item{package}{The R-package that should be used for fitting the network model; supports BGGM and bgms. Optional argument; -default values are specified depending on the datatype.} +\strong{2. A single matrix or dataframe} (multi-group comparison): An n x p +matrix containing responses from all groups combined. Requires the +\code{group_indicator} argument to specify which rows belong to which +group. This format supports two or more groups and is only available with +the \code{bgms} package.} -\item{not_cont}{If data-type is mixed, a vector of length p, specifying the not-continuous -variables (1 = not continuous, 0 = continuous).} - -\item{group_indicator}{Optional integer vector of group memberships for the rows of the dataframe (multi-group comparison), -when data is a matrix instead of a list of two dataframes.} - -\item{iter}{number of iterations for the sampler. Default is 1e3 for data fit with bgms and to 1e4 for data with with BGGM.} +\item{type}{Specifies the data type. Currently supported types for group +comparison: +\itemize{ +\item \code{"ordinal"}: For ordinal (Likert-type) data. Default package: +bgms. +\item \code{"binary"}: For binary (0/1) data. Always uses bgms. +\item \code{"blume-capel"}: For Blume-Capel ordinal data. Requires +\code{baseline_category}. Always uses bgms. +\item \code{"continuous"}: Only supported with +\code{package = "BGGM"} (must be set explicitly). Not yet supported via +bgms. +\item \code{"mixed"}: Only supported with +\code{package = "BGGM"} (must be set explicitly). Requires +\code{not_cont}. Not yet supported via bgms for group comparison. +}} + +\item{package}{The R-package used for fitting the comparison model. Optional; +if not specified, \code{bgms} is used as the default for ordinal, binary, +and blume-capel data. Supported options: +\itemize{ +\item \code{"bgms"} (Default): Supports ordinal, binary, and blume-capel +data. Supports both two-group (list input) and multi-group (single +dataframe + \code{group_indicator}) comparisons. +\item \code{"BGGM"}: Supports continuous and mixed data. Only supports +two-group comparison (list input). Binary data is always routed to +bgms regardless. +}} + +\item{not_cont}{A binary vector of length p, required when +\code{type = "mixed"}. Each element indicates whether the corresponding +variable is not continuous (\code{1} = not continuous/ordinal, +\code{0} = continuous).} + +\item{group_indicator}{An integer vector of length n specifying group +membership for each row in \code{data}. Required when \code{data} is a +single matrix/dataframe (multi-group comparison). Supports two or more +groups (e.g., \code{rep(c(1, 2, 3), each = 50)} for three groups of 50 +observations each). Only available with the \code{bgms} package.} + +\item{iter}{Number of iterations for the sampler. The default depends on the +package: +\itemize{ +\item \code{bgms}: 1e4 (10,000 iterations) +\item \code{BGGM}: 1e4 (10,000 iterations) +} +The recommended number of iterations depends on the data and model +complexity. Check convergence diagnostics in the output.} -\item{save}{Logical. Should the posterior samples be obtained (default = TRUE)?} +\item{save}{Logical. Should the posterior samples be obtained +(default = \code{TRUE})? If \code{TRUE}, the output includes a +\code{samples_posterior} matrix with posterior samples for each difference +parameter.} -\item{progress}{Logical. Should a progress bar be shown (default = TRUE)?} +\item{progress}{Logical. Should a progress bar be shown +(default = \code{TRUE})?} -\item{...}{Additional arguments that are handed to the fitting functions of the packages, e.g., informed prior specifications.} +\item{...}{Additional arguments passed to the fitting functions of the +underlying packages (e.g., prior specifications). Consult the documentation +of \code{bgms} and \code{BGGM} for the specific options available.} } \value{ -The returned object of \code{easybgm} contains several elements: +An object of class \code{easybgm_compare} with the following +elements: +\strong{Always returned:} \itemize{ - -\item \code{parameters} A p x p matrix containing difference across partial associations. - -\item \code{inc_probs} A p x p matrix containing the posterior inclusion probabilities of subgroup differences. - -\item \code{inc_BF} A p x p matrix containing the posterior inclusion Bayes factors of subgroup differences. - -\item \code{structure} Adjacency matrix of the median probability model (i.e., edges with a posterior probability larger 0.5). +\item \code{parameters}: A p x p matrix of posterior mean differences in +partial associations across groups. +\item \code{inc_probs}: A p x p matrix of posterior inclusion probabilities +for group differences (i.e., the probability that an edge differs between +groups). +\item \code{inc_BF}: A p x p matrix of inclusion Bayes factors for group +differences. +\item \code{structure}: A p x p adjacency matrix of the median probability +model for differences (edges with posterior inclusion probability > 0.5). +\item \code{model}: A string indicating the data type used. } -In addition, for \code{bgms}, the function returns: - +\strong{Returned for bgms only:} \itemize{ - -\item \code{structure_probabilities} A vector containing the posterior probabilities of all visited structures, between 0 and 1. - -\item \code{graph_weights} A vector containing the number of times a particular structure was visited. - -\item \code{sample_graph} A vector containing the indexes of a particular structure. - -\item \code{convergence_parameter} A vector containing the R-hat (Gelman–Rubin) statistic for the difference parameter measuring how well MCMC chains have converged to the same target distribution. +\item \code{structure_probabilities}: Posterior probabilities of all +visited graph structures. +\item \code{graph_weights}: Number of times each structure was visited. +\item \code{sample_graph}: Identifiers for each visited structure. +\item \code{convergence_parameter}: The Gelman-Rubin (R-hat) convergence +statistic for each difference parameter. Values close to 1 indicate +good convergence. } -For both packages, when setting \code{save = TRUE}, the function will also return the following object: - +\strong{Returned when save = TRUE:} \itemize{ - -\item \code{samples_posterior} A k x iter matrix containing the posterior samples of parameter differences (i.e., k = (p/(p-1))/2) at each iteration (i.e., iter) of the sampler. +\item \code{samples_posterior}: A k x iter matrix of posterior samples for +each difference parameter (k = p*(p-1)/2 edges). } } \description{ -Easy comparison of networks using Bayesian inference to extract differences in conditional (in)dependence across groups. +Easy comparison of networks using Bayesian inference to extract +differences in conditional (in)dependence relations across groups. } \details{ -Users may oftentimes wish to deviate from the default, usually uninformative, prior specifications of the -packages to informed priors. This can be done by simply adding additional arguments to the \code{easybgm} function. -Depending on the package that is running the underlying network estimation, researcher can specify different prior -arguments. Please consult the original packages "bgms" and "BGGM" for the specific informed prior options. +\strong{Data types and package support for group comparison} + +\tabular{lcc}{ +\strong{Data type} \tab \strong{bgms} \tab \strong{BGGM} \cr +ordinal \tab Yes (default) \tab No \cr +binary \tab Yes (always) \tab No \cr +blume-capel \tab Yes (default) \tab No \cr +continuous \tab No \tab Yes (explicit)\cr +mixed \tab No \tab Yes (explicit)\cr +} + +For continuous or mixed data comparison, you must explicitly set +\code{package = "BGGM"}. If no package is specified for these types, the +function will return an informative error. + +\strong{Two-group vs. multi-group comparison} + +\itemize{ +\item \strong{Two-group}: Provide \code{data} as a list of two +dataframes. Supported by both \code{bgms} and \code{BGGM}. +\item \strong{Multi-group}: Provide \code{data} as a single +matrix/dataframe and specify \code{group_indicator}. Supports two or more +groups. Only available with \code{bgms}. +} + +\strong{Prior specification} -We always encourage researcher to conduct prior robustness checks. +Users may wish to adjust priors via the \code{...} argument. Consult the +documentation of \code{\link[bgms]{bgm}} and \code{\link[BGGM]{explore}} for +specific prior options. + +We always encourage researchers to conduct prior sensitivity checks. } \examples{ @@ -94,25 +171,22 @@ library(bgms) data <- na.omit(ADHD) +# --- Two-group comparison (list input) --- group1 <- data[1:10, 1:3] group2 <- data[11:20, 1:3] -# Fitting the Wenchuan PTSD data - -fit <- easybgm_compare(list(group1, group2), +fit <- easybgm_compare(list(group1, group2), type = "binary", save = TRUE, - iter = 50 # for demonstration only; more samples required, check the defaults for each sampler + iter = 50 # for demonstration only ) - summary(fit) -# For multigroup estimation -fit_multi <- easybgm_compare(data[1:200, 1:5], +# --- Multi-group comparison (single dataframe + group_indicator) --- +fit_multi <- easybgm_compare(data[1:200, 1:5], group_indicator = rep(c(1, 2, 3, 4), each = 50), - type = "binary", save = TRUE, - iter = 100 # for demonstration only; more samples required, check the defaults for each sampler + type = "binary", save = TRUE, + iter = 100 # for demonstration only ) - summary(fit_multi) } } diff --git a/tests/testthat/Rplots.pdf b/tests/testthat/Rplots.pdf index 7ed97ef..d859703 100644 Binary files a/tests/testthat/Rplots.pdf and b/tests/testthat/Rplots.pdf differ diff --git a/tests/testthat/test-easybgm.R b/tests/testthat/test-easybgm.R index 6ff5061..6c0a71d 100644 --- a/tests/testthat/test-easybgm.R +++ b/tests/testthat/test-easybgm.R @@ -24,11 +24,15 @@ test_that("easybgm returns expected structure across valid type–package combos list(type = "mixed", pkg = "BDgraph", sv = F, cnt = F), list(type = "continuous", pkg = "BDgraph", sv = F, cnt = F), ### bgms - list(type = "binary", pkg = "bgms", sv = F, cnt = F), - list(type = "binary", pkg = "bgms", sv = T, cnt = T), - list(type = "binary", pkg = "bgms", sv = F, cnt = T), - list(type = "blume-capel", pkg = "bgms", sv = T, cnt = T), - list(type = "binary", pkg = "bgms", sv = T, cnt = T, sbm = "Stochastic-Block") + list(type = "binary", pkg = "bgms", sv = F, cnt = F), + list(type = "binary", pkg = "bgms", sv = T, cnt = T), + list(type = "binary", pkg = "bgms", sv = F, cnt = T), + list(type = "blume-capel", pkg = "bgms", sv = T, cnt = T), + list(type = "binary", pkg = "bgms", sv = T, cnt = T, sbm = "Stochastic-Block"), + list(type = "continuous", pkg = "bgms", sv = F, cnt = F), + list(type = "continuous", pkg = "bgms", sv = T, cnt = T), + list(type = "mixed", pkg = "bgms", sv = T, cnt = T), + list(type = c("ordinal", "ordinal", "continuous", "continuous", "ordinal"), pkg = "bgms", sv = F, cnt = F) ) for (cmb in combos) { @@ -38,9 +42,9 @@ test_that("easybgm returns expected structure across valid type–package combos cnt <- cmb$cnt if(!is.null(cmb$sbm)) {sbm <- cmb$sbm} - not_cont <- if (t == "mixed") c(TRUE, TRUE, rep(FALSE, p - 2)) else NULL - - if(t == "blume-capel"){ + not_cont <- if (length(t) == 1 && t == "mixed") c(TRUE, TRUE, rep(FALSE, p - 2)) else NULL + + if(length(t) == 1 && t == "blume-capel"){ suppressWarnings({ res <- easybgm( data = dat, @@ -187,6 +191,52 @@ test_that("plotting functions work across valid type–package combos", { ##### NETWORK COMPARISON + +test_that("easybgm_compare errors for continuous/mixed without BGGM", { + data("Wenchuan", package = "bgms") + dat <- na.omit(Wenchuan)[1:20, 1:5] + group_dat <- list(dat[1:10, ], dat[11:20, ]) + + expect_error( + easybgm_compare(group_dat, type = "continuous"), + "only supports ordinal and binary" + ) + expect_error( + easybgm_compare(group_dat, type = "mixed", not_cont = c(1, 1, 0, 0, 0)), + "only supports ordinal and binary" + ) + expect_error( + easybgm_compare(group_dat, type = "continuous", package = "bgms"), + "only supports ordinal and binary" + ) +}) + +test_that("easybgm errors for BDgraph continuous with missing data", { + data("Wenchuan", package = "bgms") + dat_with_na <- Wenchuan[1:20, 1:5] # Wenchuan has NAs + + expect_error( + easybgm(dat_with_na, type = "continuous", package = "BDgraph", + iter = 10, progress = FALSE), + "missing values" + ) +}) + +test_that("easybgm defaults to bgms for all data types", { + data("Wenchuan", package = "bgms") + dat <- na.omit(Wenchuan)[1:20, 1:5] + + suppressWarnings({ + res <- easybgm(dat, type = "continuous", iter = 10, progress = FALSE) + }) + expect_true("package_bgms" %in% class(res)) + + suppressWarnings({ + res2 <- easybgm(dat, type = "ordinal", iter = 10, progress = FALSE) + }) + expect_true("package_bgms" %in% class(res2)) +}) + test_that("easybgm_compare returns expected structure across valid type–package combos", { set.seed(123)