diff --git a/.jules/bolt.md b/.jules/bolt.md new file mode 100644 index 0000000..540518b --- /dev/null +++ b/.jules/bolt.md @@ -0,0 +1,7 @@ +## 2024-06-22 - R Matrix Subsetting Performance +**Learning:** In the `mirt` package, model data slots (e.g. `model@Data$data`) are often stored as matrices, not `data.frame`s. Using `data[[colname]]` on a matrix fails with `subscript out of bounds`. Using `data[, colname]` is correct and prevents runtime crashes. +**Action:** When extracting data for column-level checks (like calculating `length(unique(...))`), use matrix subsetting syntax `[, "colname"]` rather than list subsetting `[["colname"]]` when dealing with internal package slots. + +## 2024-06-22 - Replacing `grep` and `as.factor` overhead +**Learning:** Checking for column existence via `length(grep(paste0('^', name, '$'), colnames))` and counting categories via `length(levels(as.factor(col)))` creates major bottlenecks inside iterative loops in R. +**Action:** Replace `grep` operations with straightforward `%in%` matches (`name %in% colnames`). Replace category counts with `length(unique(na.omit(col)))` to skip expensive factor conversions and provide measurable speedups. diff --git a/DESCRIPTION b/DESCRIPTION index 4925812..b559411 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -8,7 +8,8 @@ Authors@R: person(given = "Seongho", family = "Bae", role = c("aut", "cre"), Description: Automates fixed item parameter linking for test linking under the item response theory paradigm using mirt package estimates. License: GPL-3 | file LICENSE -Imports: mirt +Imports: mirt, + stats Suggests: testthat (>= 3.0.0) Encoding: UTF-8 Config/testthat/edition: 3 diff --git a/NAMESPACE b/NAMESPACE index 6ab6ae5..29ecf68 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,3 +2,4 @@ export(autoFIPC) import(mirt) +importFrom(stats,na.omit) diff --git a/R/aFIPC.R b/R/aFIPC.R index b6a9e6c..e8124bf 100644 --- a/R/aFIPC.R +++ b/R/aFIPC.R @@ -1,6 +1,7 @@ #' automated fixed item parameter linking #' #' @import mirt +#' @importFrom stats na.omit #' @param newformXData new form data X #' @param oldformYData old form (base form) data Y #' @param newformCommonItemNames Common item variable names in new form data @@ -22,7 +23,7 @@ #' @export #' #' @examples -#' \donotrun{ +#' \dontrun{ #' autoFIPC() ## FIXME #' } autoFIPC <- @@ -548,18 +549,8 @@ autoFIPC <- # IPD target item checking for (i in 1:length(oldformCommonItemNames)) { if ( - (length(grep( - paste0('^', newformCommonItemNames[i], '$'), - colnames(newformXDataK[colnames(newFormModel@Data$data)]) - )) == - 1) == - TRUE && - (length(grep( - paste0('^', oldformCommonItemNames[i], '$'), - colnames(oldformYDataK[colnames(oldFormModel@Data$data)]) - )) == - 1) == - TRUE + newformCommonItemNames[i] %in% colnames(newFormModel@Data$data) && + oldformCommonItemNames[i] %in% colnames(oldFormModel@Data$data) ) { IPDItemCount <- IPDItemCount + 1 IPDItemNamesOldForm[IPDItemCount] <- @@ -700,30 +691,10 @@ autoFIPC <- for (i in 1:length(oldformCommonItemNames)) { if ( - (length(grep( - paste0('^', newformCommonItemNames[i], '$'), - colnames(newformXDataK[colnames(newFormModel@Data$data)]) - )) == - 1) == - TRUE && - (length(grep( - paste0('^', oldformCommonItemNames[i], '$'), - colnames(oldformYDataK[colnames(oldFormModel@Data$data)]) - )) == - 1) == - TRUE && - (length(levels(as.factor( - newFormModel@Data$data[, grep( - paste0('^', newformCommonItemNames[i], '$'), - colnames(newformXDataK[colnames(newFormModel@Data$data)]) - )] - ))) == - length(levels(as.factor( - oldFormModel@Data$data[, grep( - paste0('^', oldformCommonItemNames[i], '$'), - colnames(oldformYDataK[colnames(oldFormModel@Data$data)]) - )] - )))) + newformCommonItemNames[i] %in% colnames(newFormModel@Data$data) && + oldformCommonItemNames[i] %in% colnames(oldFormModel@Data$data) && + length(unique(na.omit(newFormModel@Data$data[, newformCommonItemNames[i]]))) == + length(unique(na.omit(oldFormModel@Data$data[, oldformCommonItemNames[i]]))) ) { message( 'applying ',