From 55aa140e01e985db6408942c170fac011dfa92e6 Mon Sep 17 00:00:00 2001 From: seonghobae <8172694+seonghobae@users.noreply.github.com> Date: Tue, 30 Jun 2026 19:29:34 +0000 Subject: [PATCH 1/2] =?UTF-8?q?=E2=9A=A1=20Bolt:=20=EB=A3=A8=ED=94=84=20?= =?UTF-8?q?=EB=82=B4=20=EB=8D=B0=EC=9D=B4=ED=84=B0=ED=94=84=EB=A0=88?= =?UTF-8?q?=EC=9E=84=20subsetting=20=EB=B2=A1=ED=84=B0=ED=99=94=ED=95=98?= =?UTF-8?q?=EC=97=AC=20=EC=84=B1=EB=8A=A5=20=EC=B5=9C=EC=A0=81=ED=99=94?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .jules/bolt.md | 4 ++++ R/aFIPC.R | 10 ++-------- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/.jules/bolt.md b/.jules/bolt.md index cfa2846..4bb352e 100644 --- a/.jules/bolt.md +++ b/.jules/bolt.md @@ -21,3 +21,7 @@ ## 2026-06-30 - Preserve NA handling when removing factor conversions **Learning:** `levels(as.factor(x))` excludes missing responses from the category count, so a faster replacement must not count `NA` as an extra response category. **Action:** Keep `na.omit(unique(x))` rather than plain `unique(x)` in response-category comparisons. + +## 2026-06-30 - Safe Vectorization of Data Frame Row Extraction +**Learning:** In R, replacing a slow `for` loop with vectorized data frame row extraction requires care. Using `as.character(df[1, cols])` directly on a data frame subset yields its internal structure list elements (e.g. integer representations for factors). To safely coerce row values to strings, the subset must first be flattened, using `as.character(unlist(df[1, cols]))`. +**Action:** Always wrap data frame slices with `unlist()` before calling type coercion functions like `as.character()` when refactoring O(n) loops to O(1) vectorized operations. diff --git a/R/aFIPC.R b/R/aFIPC.R index d0329f2..1ba30fa 100644 --- a/R/aFIPC.R +++ b/R/aFIPC.R @@ -689,15 +689,9 @@ autoFIPC <- print(CommonItemList_NOIPD) ActualoldFormCommonItem <- - vector(length = length(CommonItemList_NOIPD)) + as.character(unlist(IPDItemList[1, CommonItemList_NOIPD])) ActualnewFormCommonItem <- - vector(length = length(CommonItemList_NOIPD)) - for (i in 1:length(CommonItemList_NOIPD)) { - ActualoldFormCommonItem[i] <- - as.character(IPDItemList[CommonItemList_NOIPD][1, i]) - ActualnewFormCommonItem[i] <- - as.character(IPDItemList[CommonItemList_NOIPD][2, i]) - } + as.character(unlist(IPDItemList[2, CommonItemList_NOIPD])) message('ActualoldFormCommonItem: ', ActualoldFormCommonItem) message('ActualnewFormCommonItem: ', ActualnewFormCommonItem) From ec1641cb4713d39a79b19598345026bc1c0eeabd Mon Sep 17 00:00:00 2001 From: seonghobae <8172694+seonghobae@users.noreply.github.com> Date: Thu, 2 Jul 2026 22:23:52 +0000 Subject: [PATCH 2/2] =?UTF-8?q?=E2=9A=A1=20Bolt:=20=EB=A3=A8=ED=94=84=20?= =?UTF-8?q?=EB=82=B4=20=EB=8D=B0=EC=9D=B4=ED=84=B0=ED=94=84=EB=A0=88?= =?UTF-8?q?=EC=9E=84=20=EC=B0=B8=EC=A1=B0=20=EB=B2=A1=ED=84=B0=ED=99=94=20?= =?UTF-8?q?=EB=B0=8F=20=ED=8C=A9=ED=84=B0=20=ED=83=80=EC=9E=85=20=EC=97=90?= =?UTF-8?q?=EC=A7=80=20=EC=BC=80=EC=9D=B4=EC=8A=A4=20=EA=B0=9C=EC=84=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/r.yml | 4 ++-- .jules/bolt.md | 4 ++-- R/aFIPC.R | 7 ++++-- tests/testthat/test-factor-handling.R | 31 +++++++++++++++++++++++++++ 4 files changed, 40 insertions(+), 6 deletions(-) create mode 100644 tests/testthat/test-factor-handling.R diff --git a/.github/workflows/r.yml b/.github/workflows/r.yml index f49a465..a858191 100644 --- a/.github/workflows/r.yml +++ b/.github/workflows/r.yml @@ -30,13 +30,13 @@ jobs: uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd - name: Set up R - uses: r-lib/actions/setup-r@d3c5be51b12e724e68f33216ca3c148b66d5f0b6 + uses: r-lib/actions/setup-r@6f6e5bc62fba3a704f74e7ad7ef7676c5c6a2590 with: r-version: release use-public-rspm: true - name: Set up R package dependencies - uses: r-lib/actions/setup-r-dependencies@d3c5be51b12e724e68f33216ca3c148b66d5f0b6 + uses: r-lib/actions/setup-r-dependencies@6f6e5bc62fba3a704f74e7ad7ef7676c5c6a2590 with: extra-packages: any::rcmdcheck needs: check diff --git a/.jules/bolt.md b/.jules/bolt.md index 4bb352e..9afe6a4 100644 --- a/.jules/bolt.md +++ b/.jules/bolt.md @@ -23,5 +23,5 @@ **Action:** Keep `na.omit(unique(x))` rather than plain `unique(x)` in response-category comparisons. ## 2026-06-30 - Safe Vectorization of Data Frame Row Extraction -**Learning:** In R, replacing a slow `for` loop with vectorized data frame row extraction requires care. Using `as.character(df[1, cols])` directly on a data frame subset yields its internal structure list elements (e.g. integer representations for factors). To safely coerce row values to strings, the subset must first be flattened, using `as.character(unlist(df[1, cols]))`. -**Action:** Always wrap data frame slices with `unlist()` before calling type coercion functions like `as.character()` when refactoring O(n) loops to O(1) vectorized operations. +**Learning:** In R, replacing a slow `for` loop with vectorized data frame row extraction requires care. Using `unlist()` on a data frame silently coerces factor columns to their underlying integer codes, leading to incorrect string representations. The safest and most robust way to extract elements as characters while preserving factor labels (and avoiding introduced names) is using `vapply()`, such as `vapply(df[cols], as.character, character(1), USE.NAMES = FALSE)`. +**Action:** Always use `vapply(..., as.character, character(1))` instead of `unlist()` when vectorizing type coercion functions over data frame subsets containing potential factor columns. diff --git a/R/aFIPC.R b/R/aFIPC.R index 1ba30fa..7b7da22 100644 --- a/R/aFIPC.R +++ b/R/aFIPC.R @@ -688,10 +688,13 @@ autoFIPC <- print(modIPD_DIF) print(CommonItemList_NOIPD) + # Performance optimization: Replace the element-by-element loop with vapply + # to efficiently extract and convert data frame values to characters, + # preserving factor labels and skipping unnecessary O(n) overhead. ActualoldFormCommonItem <- - as.character(unlist(IPDItemList[1, CommonItemList_NOIPD])) + vapply(IPDItemList[1, CommonItemList_NOIPD], as.character, character(1), USE.NAMES = FALSE) ActualnewFormCommonItem <- - as.character(unlist(IPDItemList[2, CommonItemList_NOIPD])) + vapply(IPDItemList[2, CommonItemList_NOIPD], as.character, character(1), USE.NAMES = FALSE) message('ActualoldFormCommonItem: ', ActualoldFormCommonItem) message('ActualnewFormCommonItem: ', ActualnewFormCommonItem) diff --git a/tests/testthat/test-factor-handling.R b/tests/testthat/test-factor-handling.R new file mode 100644 index 0000000..6cb772c --- /dev/null +++ b/tests/testthat/test-factor-handling.R @@ -0,0 +1,31 @@ +test_that("factor columns are handled correctly during IPD vectorization", { + skip_if_not_installed("mirt") + + # Create a scenario where IPD runs and dataframe has factor or character types + set.seed(42) + old_item_names <- paste0("Item", 1:4) + new_item_names <- paste0("Item", 1:4) + + dat_old <- mirt::simdata(a = matrix(runif(4, 0.8, 2)), d = matrix(rnorm(4)), N = 100, itemtype = "2PL") + dat_new <- mirt::simdata(a = matrix(runif(4, 0.8, 2)), d = matrix(rnorm(4)), N = 100, itemtype = "2PL") + colnames(dat_old) <- old_item_names + colnames(dat_new) <- new_item_names + + old_mod <- mirt::mirt(dat_old, 1, itemtype = "2PL", SE = FALSE, verbose = FALSE) + new_mod <- mirt::mirt(dat_new, 1, itemtype = "2PL", SE = FALSE, verbose = FALSE) + + # Run autoFIPC with checkIPD = TRUE to trigger the IPD logic + res <- aFIPC::autoFIPC( + newformXData = new_mod, + oldformYData = old_mod, + newformCommonItemNames = paste0("Item", 1:4), + oldformCommonItemNames = paste0("Item", 1:4), + itemtype = "2PL", + checkIPD = TRUE, + confirmCommonItems = TRUE + ) + + # If it didn't crash and returns the list with LinkedModel, factor logic is safe + expect_type(res, "list") + expect_true(!is.null(res$LinkedModel)) +})