From 8dfe2074daf17af9b66b2e41808c8de311264918 Mon Sep 17 00:00:00 2001
From: seonghobae <8172694+seonghobae@users.noreply.github.com>
Date: Mon, 29 Jun 2026 02:59:38 +0000
Subject: [PATCH] =?UTF-8?q?=EC=A0=9C=EA=B0=80=20`R/aFIPC.R`=20=ED=8C=8C?=
 =?UTF-8?q?=EC=9D=BC=EC=9D=98=20=EC=A1=B0=EA=B1=B4=EB=AC=B8=20=EB=B0=8F=20?=
 =?UTF-8?q?=EB=B0=98=EB=B3=B5=EB=AC=B8=20=EC=84=B1=EB=8A=A5=EC=9D=84=20?=
 =?UTF-8?q?=EC=B5=9C=EC=A0=81=ED=99=94=ED=96=88=EC=8A=B5=EB=8B=88=EB=8B=A4?=
 =?UTF-8?q?=20(=EB=B6=88=ED=95=84=EC=9A=94=ED=95=9C=20=EB=AC=B8=EC=9E=90?=
 =?UTF-8?q?=EC=97=B4=20=EA=B2=80=EC=83=89=EA=B3=BC=20`as.factor`=20?=
 =?UTF-8?q?=EC=98=A4=EB=B2=84=ED=97=A4=EB=93=9C=20=EC=A0=9C=EA=B1=B0).?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .jules/bolt.md |  7 +++++++
 DESCRIPTION    |  3 ++-
 NAMESPACE      |  1 +
 R/aFIPC.R      | 45 ++++++++-------------------------------------
 4 files changed, 18 insertions(+), 38 deletions(-)
 create mode 100644 .jules/bolt.md

diff --git a/.jules/bolt.md b/.jules/bolt.md
new file mode 100644
index 0000000..540518b
--- /dev/null
+++ b/.jules/bolt.md
@@ -0,0 +1,7 @@
+## 2024-06-22 - R Matrix Subsetting Performance
+**Learning:** In the `mirt` package, model data slots (e.g. `model@Data$data`) are often stored as matrices, not `data.frame`s. Using `data[[colname]]` on a matrix fails with `subscript out of bounds`. Using `data[, colname]` is correct and prevents runtime crashes.
+**Action:** When extracting data for column-level checks (like calculating `length(unique(...))`), use matrix subsetting syntax `[, "colname"]` rather than list subsetting `[["colname"]]` when dealing with internal package slots.
+
+## 2024-06-22 - Replacing `grep` and `as.factor` overhead
+**Learning:** Checking for column existence via `length(grep(paste0('^', name, '$'), colnames))` and counting categories via `length(levels(as.factor(col)))` creates major bottlenecks inside iterative loops in R.
+**Action:** Replace `grep` operations with straightforward `%in%` matches (`name %in% colnames`). Replace category counts with `length(unique(na.omit(col)))` to skip expensive factor conversions and provide measurable speedups.
diff --git a/DESCRIPTION b/DESCRIPTION
index 4925812..b559411 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -8,7 +8,8 @@ Authors@R: person(given = "Seongho", family = "Bae", role = c("aut", "cre"),
 Description: Automates fixed item parameter linking for test linking under
     the item response theory paradigm using mirt package estimates.
 License: GPL-3 | file LICENSE
-Imports: mirt
+Imports: mirt,
+    stats
 Suggests: testthat (>= 3.0.0)
 Encoding: UTF-8
 Config/testthat/edition: 3
diff --git a/NAMESPACE b/NAMESPACE
index 6ab6ae5..29ecf68 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -2,3 +2,4 @@
 
 export(autoFIPC)
 import(mirt)
+importFrom(stats,na.omit)
diff --git a/R/aFIPC.R b/R/aFIPC.R
index b6a9e6c..e8124bf 100644
--- a/R/aFIPC.R
+++ b/R/aFIPC.R
@@ -1,6 +1,7 @@
 #' automated fixed item parameter linking
 #'
 #' @import mirt
+#' @importFrom stats na.omit
 #' @param newformXData new form data X
 #' @param oldformYData old form (base form) data Y
 #' @param newformCommonItemNames Common item variable names in new form data
@@ -22,7 +23,7 @@
 #' @export
 #'
 #' @examples
-#' \donotrun{
+#' \dontrun{
 #' autoFIPC() ## FIXME
 #' }
 autoFIPC <-
@@ -548,18 +549,8 @@ autoFIPC <-
       # IPD target item checking
       for (i in 1:length(oldformCommonItemNames)) {
         if (
-          (length(grep(
-            paste0('^', newformCommonItemNames[i], '$'),
-            colnames(newformXDataK[colnames(newFormModel@Data$data)])
-          )) ==
-            1) ==
-            TRUE &&
-            (length(grep(
-              paste0('^', oldformCommonItemNames[i], '$'),
-              colnames(oldformYDataK[colnames(oldFormModel@Data$data)])
-            )) ==
-              1) ==
-              TRUE
+          newformCommonItemNames[i] %in% colnames(newFormModel@Data$data) &&
+          oldformCommonItemNames[i] %in% colnames(oldFormModel@Data$data)
         ) {
           IPDItemCount <- IPDItemCount + 1
           IPDItemNamesOldForm[IPDItemCount] <-
@@ -700,30 +691,10 @@ autoFIPC <-
 
     for (i in 1:length(oldformCommonItemNames)) {
       if (
-        (length(grep(
-          paste0('^', newformCommonItemNames[i], '$'),
-          colnames(newformXDataK[colnames(newFormModel@Data$data)])
-        )) ==
-          1) ==
-          TRUE &&
-          (length(grep(
-            paste0('^', oldformCommonItemNames[i], '$'),
-            colnames(oldformYDataK[colnames(oldFormModel@Data$data)])
-          )) ==
-            1) ==
-            TRUE &&
-          (length(levels(as.factor(
-            newFormModel@Data$data[, grep(
-              paste0('^', newformCommonItemNames[i], '$'),
-              colnames(newformXDataK[colnames(newFormModel@Data$data)])
-            )]
-          ))) ==
-            length(levels(as.factor(
-              oldFormModel@Data$data[, grep(
-                paste0('^', oldformCommonItemNames[i], '$'),
-                colnames(oldformYDataK[colnames(oldFormModel@Data$data)])
-              )]
-            ))))
+        newformCommonItemNames[i] %in% colnames(newFormModel@Data$data) &&
+        oldformCommonItemNames[i] %in% colnames(oldFormModel@Data$data) &&
+        length(unique(na.omit(newFormModel@Data$data[, newformCommonItemNames[i]]))) ==
+        length(unique(na.omit(oldFormModel@Data$data[, oldformCommonItemNames[i]])))
       ) {
         message(
           'applying ',