From 8dfe2074daf17af9b66b2e41808c8de311264918 Mon Sep 17 00:00:00 2001 From: seonghobae <8172694+seonghobae@users.noreply.github.com> Date: Mon, 29 Jun 2026 02:59:38 +0000 Subject: [PATCH] =?UTF-8?q?=EC=A0=9C=EA=B0=80=20`R/aFIPC.R`=20=ED=8C=8C?= =?UTF-8?q?=EC=9D=BC=EC=9D=98=20=EC=A1=B0=EA=B1=B4=EB=AC=B8=20=EB=B0=8F=20?= =?UTF-8?q?=EB=B0=98=EB=B3=B5=EB=AC=B8=20=EC=84=B1=EB=8A=A5=EC=9D=84=20?= =?UTF-8?q?=EC=B5=9C=EC=A0=81=ED=99=94=ED=96=88=EC=8A=B5=EB=8B=88=EB=8B=A4?= =?UTF-8?q?=20(=EB=B6=88=ED=95=84=EC=9A=94=ED=95=9C=20=EB=AC=B8=EC=9E=90?= =?UTF-8?q?=EC=97=B4=20=EA=B2=80=EC=83=89=EA=B3=BC=20`as.factor`=20?= =?UTF-8?q?=EC=98=A4=EB=B2=84=ED=97=A4=EB=93=9C=20=EC=A0=9C=EA=B1=B0).?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .jules/bolt.md | 7 +++++++ DESCRIPTION | 3 ++- NAMESPACE | 1 + R/aFIPC.R | 45 ++++++++------------------------------------- 4 files changed, 18 insertions(+), 38 deletions(-) create mode 100644 .jules/bolt.md diff --git a/.jules/bolt.md b/.jules/bolt.md new file mode 100644 index 0000000..540518b --- /dev/null +++ b/.jules/bolt.md @@ -0,0 +1,7 @@ +## 2024-06-22 - R Matrix Subsetting Performance +**Learning:** In the `mirt` package, model data slots (e.g. `model@Data$data`) are often stored as matrices, not `data.frame`s. Using `data[[colname]]` on a matrix fails with `subscript out of bounds`. Using `data[, colname]` is correct and prevents runtime crashes. +**Action:** When extracting data for column-level checks (like calculating `length(unique(...))`), use matrix subsetting syntax `[, "colname"]` rather than list subsetting `[["colname"]]` when dealing with internal package slots. + +## 2024-06-22 - Replacing `grep` and `as.factor` overhead +**Learning:** Checking for column existence via `length(grep(paste0('^', name, '$'), colnames))` and counting categories via `length(levels(as.factor(col)))` creates major bottlenecks inside iterative loops in R. +**Action:** Replace `grep` operations with straightforward `%in%` matches (`name %in% colnames`). Replace category counts with `length(unique(na.omit(col)))` to skip expensive factor conversions and provide measurable speedups. diff --git a/DESCRIPTION b/DESCRIPTION index 4925812..b559411 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -8,7 +8,8 @@ Authors@R: person(given = "Seongho", family = "Bae", role = c("aut", "cre"), Description: Automates fixed item parameter linking for test linking under the item response theory paradigm using mirt package estimates. License: GPL-3 | file LICENSE -Imports: mirt +Imports: mirt, + stats Suggests: testthat (>= 3.0.0) Encoding: UTF-8 Config/testthat/edition: 3 diff --git a/NAMESPACE b/NAMESPACE index 6ab6ae5..29ecf68 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,3 +2,4 @@ export(autoFIPC) import(mirt) +importFrom(stats,na.omit) diff --git a/R/aFIPC.R b/R/aFIPC.R index b6a9e6c..e8124bf 100644 --- a/R/aFIPC.R +++ b/R/aFIPC.R @@ -1,6 +1,7 @@ #' automated fixed item parameter linking #' #' @import mirt +#' @importFrom stats na.omit #' @param newformXData new form data X #' @param oldformYData old form (base form) data Y #' @param newformCommonItemNames Common item variable names in new form data @@ -22,7 +23,7 @@ #' @export #' #' @examples -#' \donotrun{ +#' \dontrun{ #' autoFIPC() ## FIXME #' } autoFIPC <- @@ -548,18 +549,8 @@ autoFIPC <- # IPD target item checking for (i in 1:length(oldformCommonItemNames)) { if ( - (length(grep( - paste0('^', newformCommonItemNames[i], '$'), - colnames(newformXDataK[colnames(newFormModel@Data$data)]) - )) == - 1) == - TRUE && - (length(grep( - paste0('^', oldformCommonItemNames[i], '$'), - colnames(oldformYDataK[colnames(oldFormModel@Data$data)]) - )) == - 1) == - TRUE + newformCommonItemNames[i] %in% colnames(newFormModel@Data$data) && + oldformCommonItemNames[i] %in% colnames(oldFormModel@Data$data) ) { IPDItemCount <- IPDItemCount + 1 IPDItemNamesOldForm[IPDItemCount] <- @@ -700,30 +691,10 @@ autoFIPC <- for (i in 1:length(oldformCommonItemNames)) { if ( - (length(grep( - paste0('^', newformCommonItemNames[i], '$'), - colnames(newformXDataK[colnames(newFormModel@Data$data)]) - )) == - 1) == - TRUE && - (length(grep( - paste0('^', oldformCommonItemNames[i], '$'), - colnames(oldformYDataK[colnames(oldFormModel@Data$data)]) - )) == - 1) == - TRUE && - (length(levels(as.factor( - newFormModel@Data$data[, grep( - paste0('^', newformCommonItemNames[i], '$'), - colnames(newformXDataK[colnames(newFormModel@Data$data)]) - )] - ))) == - length(levels(as.factor( - oldFormModel@Data$data[, grep( - paste0('^', oldformCommonItemNames[i], '$'), - colnames(oldformYDataK[colnames(oldFormModel@Data$data)]) - )] - )))) + newformCommonItemNames[i] %in% colnames(newFormModel@Data$data) && + oldformCommonItemNames[i] %in% colnames(oldFormModel@Data$data) && + length(unique(na.omit(newFormModel@Data$data[, newformCommonItemNames[i]]))) == + length(unique(na.omit(oldFormModel@Data$data[, oldformCommonItemNames[i]]))) ) { message( 'applying ',