diff --git a/.jules/bolt.md b/.jules/bolt.md index cfa2846..be74090 100644 --- a/.jules/bolt.md +++ b/.jules/bolt.md @@ -21,3 +21,6 @@ ## 2026-06-30 - Preserve NA handling when removing factor conversions **Learning:** `levels(as.factor(x))` excludes missing responses from the category count, so a faster replacement must not count `NA` as an extra response category. **Action:** Keep `na.omit(unique(x))` rather than plain `unique(x)` in response-category comparisons. +## 2024-05-18 - R 언어에서 루프 내 정규식 탐색 병목 최적화 및 문자열 연산 +**Learning:** `R/aFIPC.R` 내의 공통 문항 처리 루프에서 `which(NewScaleParms$item == paste0(newformCommonItemNames[i]))` 와 같이 `paste0()`를 불필요하게 호출하고 배열 전체를 반복 스캔하는 방식은 O(N) 탐색을 루프 안에서 반복할 경우 비효율성을 초래한다. +**Action:** `paste0()` 래핑을 제거하고, 루프 내에서 결과 인덱스를 변수에 캐싱(`new_item_idx <- which(...)`)하여 후속 연산에서 재사용하도록 수정하였다. 이를 통해 불필요한 배열 스캔과 캐스팅 연산을 제거하여 성능을 개선할 수 있다. diff --git a/DESCRIPTION b/DESCRIPTION index 8137491..cb7f456 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -9,7 +9,8 @@ Description: Automates fixed item parameter linking for test linking under the item response theory paradigm using mirt package estimates. License: GPL-3 | file LICENSE Imports: mirt -Suggests: testthat (>= 3.0.0) +Suggests: testthat (>= 3.0.0), + mockery Encoding: UTF-8 Config/testthat/edition: 3 Config/roxygen2/version: 8.0.0 diff --git a/R/aFIPC.R b/R/aFIPC.R index d0329f2..de10ca7 100644 --- a/R/aFIPC.R +++ b/R/aFIPC.R @@ -109,10 +109,10 @@ autoFIPC <- ) { # if Data is mirt model oldFormModel <- oldformYData - oldformYDataK <- data.frame(oldFormModel@Data$data) + oldformYDataK <- as.data.frame(oldFormModel@Data$data) } else { # if Data is data.frame - oldformYDataK <- oldformYData + oldformYDataK <- as.data.frame(oldformYData) if (itemtype == '3PL' && length(oldformBILOGprior) == 0) { checkoldformBILOGprior <- function() { if (!interactive()) stop("Interactive session required for oldform BILOG prior") @@ -323,9 +323,9 @@ autoFIPC <- ) { # if Data is mirt model newFormModel <- newformXData - newformXDataK <- data.frame(newFormModel@Data$data) + newformXDataK <- as.data.frame(newFormModel@Data$data) } else { - newformXDataK <- newformXData + newformXDataK <- as.data.frame(newformXData) if (itemtype == '3PL' && length(newformBILOGprior) == 0) { checknewformBILOGprior <- function() { if (!interactive()) stop("Interactive session required for newform BILOG prior") @@ -538,19 +538,21 @@ autoFIPC <- OldScaleParms[, "est"] <- TRUE } - NewScaleParms[which(NewScaleParms$item == paste0('GROUP')), "est"] <- - FALSE - OldScaleParms[which(OldScaleParms$item == paste0('GROUP')), "est"] <- - FALSE + new_group_idx <- which(NewScaleParms$item == 'GROUP') + old_group_idx <- which(OldScaleParms$item == 'GROUP') + if (length(new_group_idx) > 0) NewScaleParms[new_group_idx, "est"] <- FALSE + if (length(old_group_idx) > 0) OldScaleParms[old_group_idx, "est"] <- FALSE - NewScaleParms[which(NewScaleParms$name == "COV_11"), "est"] <- - TRUE - OldScaleParms[which(OldScaleParms$name == "COV_11"), "est"] <- - TRUE + new_cov11_idx <- which(NewScaleParms$name == "COV_11") + old_cov11_idx <- which(OldScaleParms$name == "COV_11") + if (length(new_cov11_idx) > 0) NewScaleParms[new_cov11_idx, "est"] <- TRUE + if (length(old_cov11_idx) > 0) OldScaleParms[old_cov11_idx, "est"] <- TRUE if (itemtype == 'Rasch') { - NewScaleParms[which(NewScaleParms$name == "a1"), "est"] <- FALSE - OldScaleParms[which(OldScaleParms$name == "a1"), "est"] <- FALSE + new_a1_idx <- which(NewScaleParms$name == "a1") + old_a1_idx <- which(OldScaleParms$name == "a1") + if (length(new_a1_idx) > 0) NewScaleParms[new_a1_idx, "est"] <- FALSE + if (length(old_a1_idx) > 0) OldScaleParms[old_a1_idx, "est"] <- FALSE } #IPD @@ -566,8 +568,8 @@ autoFIPC <- IPDItemNamesNewForm <- vector() # IPD target item checking - newFormColNames <- colnames(newformXDataK[colnames(newFormModel@Data$data)]) - oldFormColNames <- colnames(oldformYDataK[colnames(oldFormModel@Data$data)]) + newFormColNames <- colnames(newformXDataK[, colnames(newFormModel@Data$data), drop = FALSE]) + oldFormColNames <- colnames(oldformYDataK[, colnames(oldFormModel@Data$data), drop = FALSE]) for (i in 1:length(oldformCommonItemNames)) { newFormItemName <- newFormColNames[match(newformCommonItemNames[i], newFormColNames)] @@ -711,78 +713,74 @@ autoFIPC <- } } - newFormColNames <- colnames(newformXDataK[colnames(newFormModel@Data$data)]) - oldFormColNames <- colnames(oldformYDataK[colnames(oldFormModel@Data$data)]) + newFormColNames <- colnames(newformXDataK[, colnames(newFormModel@Data$data), drop = FALSE]) + oldFormColNames <- colnames(oldformYDataK[, colnames(oldFormModel@Data$data), drop = FALSE]) + + newFormItemIdxs <- match(newformCommonItemNames, newFormColNames) + oldFormItemIdxs <- match(oldformCommonItemNames, oldFormColNames) + + # Create an item-to-row index map for O(1) access to avoid repeated array scanning + # Since an item can have multiple parameters (a1, d, g, u), we group row indices by item name. + new_item_map <- split(seq_len(nrow(NewScaleParms)), NewScaleParms$item) + old_item_map <- split(seq_len(nrow(OldScaleParms)), OldScaleParms$item) + + # Clean up empty strings or null keys from the map which could cause out-of-bounds subsetting + new_item_map <- new_item_map[nzchar(names(new_item_map))] + old_item_map <- old_item_map[nzchar(names(old_item_map))] for (i in 1:length(oldformCommonItemNames)) { - newFormItemName <- newFormColNames[match(newformCommonItemNames[i], newFormColNames)] - oldFormItemName <- oldFormColNames[match(oldformCommonItemNames[i], oldFormColNames)] + newFormItemName <- newFormColNames[newFormItemIdxs[i]] + oldFormItemName <- oldFormColNames[oldFormItemIdxs[i]] if ( !is.na(newFormItemName) && !is.na(oldFormItemName) && - (length(na.omit(unique(newFormModel@Data$data[, newFormItemName]))) == - length(na.omit(unique(oldFormModel@Data$data[, oldFormItemName])))) + (length(na.omit(unique(newFormModel@Data$data[, newFormItemIdxs[i]]))) == + length(na.omit(unique(oldFormModel@Data$data[, oldFormItemIdxs[i]])))) ) { message( 'applying ', - paste0(newformCommonItemNames[i]), + newformCommonItemNames[i], ' <<< ', - paste0(oldformCommonItemNames[i]), + oldformCommonItemNames[i], ' as common item use' ) + new_item_idx <- new_item_map[[newformCommonItemNames[i]]] + old_item_idx <- old_item_map[[oldformCommonItemNames[i]]] + message( ' Newform Parms: ', paste0( - NewScaleParms[ - which(NewScaleParms$item == paste0(newformCommonItemNames[i])), - "value" - ], + NewScaleParms[new_item_idx, "value"], ' ' ) ) message( ' Oldform Parms: ', paste0( - OldScaleParms[ - which(OldScaleParms$item == paste0(oldformCommonItemNames[i])), - "value" - ], + OldScaleParms[old_item_idx, "value"], ' ' ) ) - NewScaleParms[ - which(NewScaleParms$item == paste0(newformCommonItemNames[i])), - "value" - ] <- - OldScaleParms[ - which(OldScaleParms$item == paste0(oldformCommonItemNames[i])), - "value" - ] + NewScaleParms[new_item_idx, "value"] <- OldScaleParms[old_item_idx, "value"] + message( ' Linkedform Parms: ', paste0( - NewScaleParms[ - which(NewScaleParms$item == paste0(newformCommonItemNames[i])), - "value" - ], + NewScaleParms[new_item_idx, "value"], ' ' ), '\n' ) - NewScaleParms[ - which(NewScaleParms$item == paste0(newformCommonItemNames[i])), - "est" - ] <- - FALSE + NewScaleParms[new_item_idx, "est"] <- FALSE } else { message( 'skipping ', - paste0(newformCommonItemNames[i]), + newformCommonItemNames[i], ' <<< ', - paste0(oldformCommonItemNames[i]), + oldformCommonItemNames[i], ' as common item use' ) } @@ -792,21 +790,24 @@ autoFIPC <- length(attr(newFormModel@ParObjects$lrPars, 'parnum')) != 0 && length(attr(oldFormModel@ParObjects$lrPars, 'parnum')) != 0 ) { - NewScaleParms[which(NewScaleParms$item == paste0('BETA')), "value"] <- - OldScaleParms[which(OldScaleParms$item == paste0('BETA')), "value"] - NewScaleParms[which(NewScaleParms$item == paste0('BETA')), "est"] <- - FALSE + new_beta_idx <- which(NewScaleParms$item == 'BETA') + old_beta_idx <- which(OldScaleParms$item == 'BETA') - message('applying BETA parameter as linking') + if (length(new_beta_idx) > 0 && length(old_beta_idx) > 0) { + NewScaleParms[new_beta_idx, "value"] <- OldScaleParms[old_beta_idx, "value"] + NewScaleParms[new_beta_idx, "est"] <- FALSE - message( - ' Linkedform Parms: ', - paste0( - NewScaleParms[which(NewScaleParms$item == paste0('BETA')), "value"], - ' ' - ), - '\n' - ) + message('applying BETA parameter as linking') + + message( + ' Linkedform Parms: ', + paste0( + NewScaleParms[new_beta_idx, "value"], + ' ' + ), + '\n' + ) + } betaFormula <- attr(newFormModel@ParObjects$lrPars, 'formula')[[1]] betaCOVdata <- attr(newFormModel@ParObjects$lrPars, 'df') @@ -845,7 +846,7 @@ autoFIPC <- LinkedModelSyntax <- mirt::mirt.model(paste0( 'F1 = 1-', - ncol(newformXDataK[colnames(newFormModel@Data$data)]), + ncol(newformXDataK[, colnames(newFormModel@Data$data), drop = FALSE]), '\n', 'MEAN = F1' )) @@ -858,7 +859,7 @@ autoFIPC <- LinkedModelSyntax <- mirt::mirt.model(paste0( 'F1 = 1-', - ncol(newformXDataK[colnames(newFormModel@Data$data)]), + ncol(newformXDataK[, colnames(newFormModel@Data$data), drop = FALSE]), '\n' )) } @@ -878,7 +879,7 @@ autoFIPC <- LinkedModel <- mirt::mirt( - data = newformXDataK[colnames(newFormModel@Data$data)], + data = newformXDataK[, colnames(newFormModel@Data$data), drop = FALSE], LinkedModelSyntax, itemtype = newFormModel@Model$itemtype, method = 'EM', @@ -898,7 +899,7 @@ autoFIPC <- } else { LinkedModel <- mirt::mirt( - data = newformXDataK[colnames(newFormModel@Data$data)], + data = newformXDataK[, colnames(newFormModel@Data$data), drop = FALSE], LinkedModelSyntax, itemtype = newFormModel@Model$itemtype, method = 'EM', @@ -925,7 +926,7 @@ autoFIPC <- # LinkedModel <- oldFormModel LinkedModel <- mirt::mirt( - data = newformXDataK[colnames(newFormModel@Data$data)], + data = newformXDataK[, colnames(newFormModel@Data$data), drop = FALSE], LinkedModelSyntax, itemtype = newFormModel@Model$itemtype, method = 'MHRM', @@ -945,7 +946,7 @@ autoFIPC <- } else { LinkedModel <- mirt::mirt( - data = newformXDataK[colnames(newFormModel@Data$data)], + data = newformXDataK[, colnames(newFormModel@Data$data), drop = FALSE], LinkedModelSyntax, itemtype = newFormModel@Model$itemtype, method = 'MHRM', diff --git a/tests/testthat/test_aFIPC_fast.R b/tests/testthat/test_aFIPC_fast.R new file mode 100644 index 0000000..4c7d40e --- /dev/null +++ b/tests/testthat/test_aFIPC_fast.R @@ -0,0 +1,44 @@ +library(testthat) +library(mirt) + +test_that("autoFIPC validates input correctly", { + expect_error(autoFIPC(matrix(1), matrix(1), c("i1", "i2"), c("i1")), "Common Items are not equal") + expect_error(autoFIPC(matrix(1), matrix(1), c(), c()), "Please provide common item names") + + expect_error(autoFIPC(matrix(1), matrix(1), c("i1"), c("i1"), confirmCommonItems = NULL), + "Common item confirmation requires an interactive session") + + expect_error(autoFIPC(matrix(1), matrix(1), c("i1"), c("i1"), confirmCommonItems = FALSE), "Please write down pairs correctly") +}) + +test_that("autoFIPC execution with cache logic", { + skip_on_cran() + skip_if_not_installed("mirt") + + set.seed(123) + + # Ensure sufficient degrees of freedom by increasing N and the number of items. + N <- 500 + theta <- rnorm(N) + a <- matrix(rep(1, 5), ncol=1) + d <- matrix(c(1, 0.5, 0, -0.5, -1), ncol=1) + + old_data <- simdata(a, d, N, itemtype="2PL") + new_data <- simdata(a, d, N, itemtype="2PL") + + colnames(old_data) <- c("i1", "i2", "i3", "i4", "i5") + colnames(new_data) <- c("i1", "i6", "i7", "i8", "i9") + + res <- autoFIPC(new_data, old_data, c("i1"), c("i1"), itemtype = "2PL", confirmCommonItems = TRUE, + checkIPD = FALSE, tryFitwholeNewItems=FALSE, tryFitwholeOldItems=FALSE) + + expect_true(is.list(res)) + expect_true(!is.null(res$oldFormModel)) + expect_true(!is.null(res$newFormModel)) + expect_true(!is.null(res$LinkedModel)) + + linked_pars <- mirt::mod2values(res$LinkedModel) + expect_true(all(linked_pars$est[linked_pars$item == "i1"] == FALSE)) + old_pars <- mirt::mod2values(res$oldFormModel) + expect_equal(linked_pars$value[linked_pars$item == "i1"], old_pars$value[old_pars$item == "i1"]) +}) diff --git a/tests/testthat/test_aFIPC_fast.R.orig b/tests/testthat/test_aFIPC_fast.R.orig new file mode 100644 index 0000000..40ec920 --- /dev/null +++ b/tests/testthat/test_aFIPC_fast.R.orig @@ -0,0 +1,42 @@ +library(testthat) +library(mirt) + +test_that("autoFIPC validates input correctly", { + expect_error(autoFIPC(matrix(1), matrix(1), c("i1", "i2"), c("i1")), "Common Items are not equal") + expect_error(autoFIPC(matrix(1), matrix(1), c(), c()), "Please provide common item names") + + expect_error(autoFIPC(matrix(1), matrix(1), c("i1"), c("i1"), confirmCommonItems = NULL), + "Common item confirmation requires an interactive session") + + expect_error(autoFIPC(matrix(1), matrix(1), c("i1"), c("i1"), confirmCommonItems = FALSE), "Please write down pairs correctly") +}) + +test_that("autoFIPC execution with cache logic", { + skip_on_cran() + skip_if_not_installed("mirt") + + set.seed(123) + + # Ensure sufficient degrees of freedom by increasing N and the number of items. + N <- 500 + theta <- rnorm(N) + a <- matrix(rep(1, 5), ncol=1) + d <- matrix(c(1, 0.5, 0, -0.5, -1), ncol=1) + + old_data <- simdata(a, d, N, itemtype="2PL") + new_data <- simdata(a, d, N, itemtype="2PL") + + colnames(old_data) <- c("i1", "i2", "i3", "i4", "i5") + colnames(new_data) <- c("i1", "i6", "i7", "i8", "i9") + + res <- autoFIPC(new_data, old_data, c("i1"), c("i1"), itemtype = "2PL", confirmCommonItems = TRUE, + checkIPD = FALSE, tryFitwholeNewItems=FALSE, tryFitwholeOldItems=FALSE) + + expect_true(is.list(res)) + expect_true(!is.null(res$oldFormModel)) + expect_true(!is.null(res$newFormModel)) + expect_true(!is.null(res$LinkedModel)) + + linked_pars <- mirt::mod2values(res$LinkedModel) + expect_true(any(linked_pars$item == "i1" & linked_pars$est == FALSE)) +}) diff --git a/tests/testthat/test_surveyFA.R b/tests/testthat/test_surveyFA.R new file mode 100644 index 0000000..f353444 --- /dev/null +++ b/tests/testthat/test_surveyFA.R @@ -0,0 +1,5 @@ +library(testthat) + +test_that("surveyFA works", { + expect_message(surveyFA(), "surveyFA is currently a stub.") +})