From c2917de2466463464965b5d47822b6198c9086ff Mon Sep 17 00:00:00 2001 From: svetlanabelitser <93388849+svetlanabelitser@users.noreply.github.com> Date: Fri, 18 Nov 2022 13:03:45 +0100 Subject: [PATCH 01/10] lparal = T --- p_parameters/08_SCRI_parameters.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/p_parameters/08_SCRI_parameters.R b/p_parameters/08_SCRI_parameters.R index e2dbd0ef..a53a61fb 100644 --- a/p_parameters/08_SCRI_parameters.R +++ b/p_parameters/08_SCRI_parameters.R @@ -61,7 +61,7 @@ SCRI_variables_vocabulary <- data.table(vac4eu = c("E_GOUT_COV", "C_MYOCARD_AESI col_list <- c("red", "green3", "orange", "deepskyblue", "magenta2", "cyan2", "chocolate1", "gray" ) # parallel - lparal = F # if T ==> library(parallel) is started in function 'scri' + lparal = T # if T ==> library(parallel) is started in function 'scri' n_cores = NA leventplot = T From e5c7206f59d6ceef2fd9a5931178c78d49f6fce0 Mon Sep 17 00:00:00 2001 From: svetlanabelitser <93388849+svetlanabelitser@users.noreply.github.com> Date: Fri, 18 Nov 2022 13:20:46 +0100 Subject: [PATCH 02/10] gc() added --- p_steps/08_T5_10_scri.R | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/p_steps/08_T5_10_scri.R b/p_steps/08_T5_10_scri.R index 46628a33..85097e0c 100644 --- a/p_steps/08_T5_10_scri.R +++ b/p_steps/08_T5_10_scri.R @@ -41,7 +41,6 @@ for (subpop in subpopulations_non_empty) { dir.create(sdr_models0, showWarnings = FALSE, recursive = TRUE) - cat(paste0('\n\t"',subpop,'":\n\n')) @@ -109,9 +108,17 @@ for (subpop in subpopulations_non_empty) { # check: tb<-table(data_vax$study_entry_days < data_vax$study_exit_days ) - if(length(tb)>1)stop(paste("There are ",tb["FALSE"],"rows with 'study_entry_date' >= 'study_exit_date' !")) - data_vax <- data_vax[data_vax$study_entry_days < data_vax$study_exit_days,] + if(length(tb)>1){ + warning(paste("There are ",tb["FALSE"],"rows with 'study_entry_date' >= 'study_exit_date' !")) + print('"study_entry_days" < "study_exit_days":') + table(tb) + data_vax_excluded <- data_vax[data_vax$study_entry_days >= data_vax$study_exit_days,] + save(data_vax_excluded,file=paste0(sdr0,"excluded_rows.RData")) + sink(paste0(sdr0,"excluded_rows.txt")); print(data_vax_excluded);sink() + + data_vax <- data_vax[data_vax$study_entry_days < data_vax$study_exit_days,] + } cond <- !is.na(data_vax$study_entry_days) & ( is.na(data_vax$vax_days) | ( !is.na(data_vax$vax_days) & data_vax$study_entry_days <= data_vax$vax_days ) ) if(any(!cond)) stop(paste( sum(cond), "rows with 'study_entry_days' > 'vax_days'")) @@ -299,8 +306,9 @@ for (subpop in subpopulations_non_empty) { res <- scri( formula = "~ brand:lab", vax_def = vax_def, data = data_vax, strata_var=strata_var, strata_value=strata_value, use_all_events=T, event_info=event_info, extra_parameters = extra_options, add_to_itself=T ) - } - } + } # end for strata_value + gc() + } # end for strata_var } # end lmain @@ -407,6 +415,7 @@ for (subpop in subpopulations_non_empty) { } # end covid 'for' }# end codid variable + gc() } # end lcovid @@ -572,6 +581,7 @@ for (subpop in subpopulations_non_empty) { extra_options_dist$extra_name <- "" + gc() ########################################################################################### # @@ -777,8 +787,10 @@ for (subpop in subpopulations_non_empty) { extra_options_dist$extra_name <- "" - } - } + gc() + + } # end for strata_value + } # end for strata_var } # end of ldist From 1b3ebf241fb6815aa8a1db65f5406145bb351ff7 Mon Sep 17 00:00:00 2001 From: svetlanabelitser <93388849+svetlanabelitser@users.noreply.github.com> Date: Fri, 18 Nov 2022 13:22:09 +0100 Subject: [PATCH 03/10] gc() added --- p_steps/08_T5_10_scri.R | 1 + 1 file changed, 1 insertion(+) diff --git a/p_steps/08_T5_10_scri.R b/p_steps/08_T5_10_scri.R index 85097e0c..85957dbe 100644 --- a/p_steps/08_T5_10_scri.R +++ b/p_steps/08_T5_10_scri.R @@ -748,6 +748,7 @@ for (subpop in subpopulations_non_empty) { extra_options_dist$extra_name <- "" + gc() ########################################################################################### # From 15b8e96da05c94d3b6f6e05d0e472be0bc221e52 Mon Sep 17 00:00:00 2001 From: svetlanabelitser <93388849+svetlanabelitser@users.noreply.github.com> Date: Fri, 18 Nov 2022 18:03:46 +0100 Subject: [PATCH 04/10] less time intervals --- p_parameters/08_SCRI_parameters.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/p_parameters/08_SCRI_parameters.R b/p_parameters/08_SCRI_parameters.R index a53a61fb..e3faf70e 100644 --- a/p_parameters/08_SCRI_parameters.R +++ b/p_parameters/08_SCRI_parameters.R @@ -34,8 +34,8 @@ SCRI_variables_vocabulary <- data.table(vac4eu = c("E_GOUT_COV", "C_MYOCARD_AESI # # specify the length and the start moment of the calendar time intervals: # - time_interval_width <- c( 21, 30, 30, 45) - time_interval_starts <- c( -8,-10, -1, -5) + time_interval_width <- c( 30, 30 ) + time_interval_starts <- c(-10, -1 ) ############################# From 1a4f8c9bcb94aef26353e15d7cae7e256ba7db58 Mon Sep 17 00:00:00 2001 From: svetlanabelitser <93388849+svetlanabelitser@users.noreply.github.com> Date: Fri, 18 Nov 2022 18:10:28 +0100 Subject: [PATCH 05/10] less stratified analysis --- p_steps/08_T5_10_scri.R | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/p_steps/08_T5_10_scri.R b/p_steps/08_T5_10_scri.R index 85957dbe..3b4e5697 100644 --- a/p_steps/08_T5_10_scri.R +++ b/p_steps/08_T5_10_scri.R @@ -294,18 +294,13 @@ for (subpop in subpopulations_non_empty) { # in models with calendar time intervals use only events from the current stratum: res <- scri( formula = "~ brand:lab", vax_def = vax_def, data = data_vax, strata_var=strata_var, strata_value=strata_value, use_all_events=F, event_info=event_info, extra_parameters = extra_options, add_to_itself=F ) - # in models with calendar time intervals use events from all strata: - res <- scri( formula = "~ brand:lab", vax_def = vax_def, data = data_vax, strata_var=strata_var, strata_value=strata_value, use_all_events=T, - event_info=event_info, extra_parameters = extra_options, add_to_itself=T ) - + ## cut_points_name="7d" vax_def <- define_rws(vax_def0, cut_points_before = c(-90,-29,0), cut_points_after = c(0,1,8,15,22,29), cut_points_name="7d", no_last_interval_after=T, data=data_vax, vax_dep = c( before="vax_brand_short" )) res <- scri( formula = "~ brand:lab", vax_def = vax_def, data = data_vax, strata_var=strata_var, strata_value=strata_value, use_all_events=F, event_info=event_info, extra_parameters = extra_options, add_to_itself=T ) - res <- scri( formula = "~ brand:lab", vax_def = vax_def, data = data_vax, strata_var=strata_var, strata_value=strata_value, use_all_events=T, - event_info=event_info, extra_parameters = extra_options, add_to_itself=T ) - + } # end for strata_value gc() } # end for strata_var From bfc76c764b473c291bc35803fa95e6592c2fc141 Mon Sep 17 00:00:00 2001 From: svetlanabelitser <93388849+svetlanabelitser@users.noreply.github.com> Date: Mon, 21 Nov 2022 09:18:37 +0100 Subject: [PATCH 06/10] faster --- p_macro/scri_tools.R | 410 ++++++++++++++++++++++++++++--------------- 1 file changed, 266 insertions(+), 144 deletions(-) diff --git a/p_macro/scri_tools.R b/p_macro/scri_tools.R index d014d3b1..b65ad708 100644 --- a/p_macro/scri_tools.R +++ b/p_macro/scri_tools.R @@ -43,7 +43,7 @@ scri_fit <- function( formula="", delete_median_cols = T, lprint = T, test = F, save_data = F -){ +){ #if(missing(formula )) stop("'formula' is missing.") if(missing(rws )) stop("'rws' is missing.") @@ -108,7 +108,7 @@ scri_fit <- function( formula="", data_rws <- data_rws$data_rws } else sep_vars <- c() - + ####### @@ -187,12 +187,15 @@ scri_fit <- function( formula="", Poisson_formula <- as.formula(paste(event,"~", paste( var_names, collapse=" + "), "+", "strata(",id,")", "+", "offset(log(interval))")) + #cat(paste("before ",Poisson_formula,":",Sys.time(),"\n")) + if(!no_formula){ suppressWarnings( mod <- try( clogit(formula = Poisson_formula, data = data_rws, control=coxph.control(iter.max=1000) ), silent = T ) ) } else mod <- NA + #cat(paste("after ",Poisson_formula,":",Sys.time(),"\n")) if(class(mod)[[1]]== "try-error" | no_formula){ @@ -288,11 +291,13 @@ scri_fit <- function( formula="", Poisson_formula <- as.formula(paste(event,"~", paste( var_names, collapse=" + "), "+", "strata(",id,")", "+", "offset(log(interval))")) + #cat(paste("before ",Poisson_formula,":",Sys.time(),"\n")) if(!no_formula){ suppressWarnings( mod <- try( clogit(formula = Poisson_formula, data = data_rws, control=coxph.control(iter.max=1000) ), silent = T ) ) } + #cat(paste("after ",Poisson_formula,":",Sys.time(),"\n")) if(class(mod)[[1]]== "try-error" | no_formula) { @@ -411,7 +416,7 @@ create_rws <- function( rws, # list of risk/control win # create and calculate variable from formula from list 'rws' and dataset 'data': 'lab' (or another name), 'rw_start' and 'rw_end': calc_wind <- function(x, data, obj){ - + cond_rws <- rep(T, nrow(data)) if(!is.null(x$cond)) cond_rws <- eval(x$cond,data) else { @@ -424,7 +429,7 @@ create_rws <- function( rws, # list of risk/control win data <- data[cond_rws & !is.na(data[, x$t0]),] - if(nrow(data)==0) return(NULL) + if(nrow(data)==0) return("no data") data$rws_cat <- x$name @@ -612,7 +617,7 @@ create_rws <- function( rws, # list of risk/control win else obj$data_parameters$next_vax_time <- "next_vax_time" } } - + if(!any(names(data)==obj$data_parameters$next_vax_time)){ data <- data[order(data[,obj$data_parameters$id],data[,obj$data_parameters$vax_time]),] data[,obj$data_parameters$next_vax_time] <- c(data[-1,obj$data_parameters$vax_time], NA) @@ -796,7 +801,7 @@ create_rws <- function( rws, # list of risk/control win if(length(x$vax_dep)>1) for(i in 2:length(x$vax_dep)) data_rws[,"tmp_var"] <- paste( data_rws[,"tmp_var"], "&", data_rws[,paste0(x$name,"_",x$vax_dep[i])]) #data_rws[ data_rws[,paste0(x$name,"_lab")] == paste0("no ",x$name), "tmp_var"] <- NA - + vax_dep_cat <- unique(data_rws[ !(data_rws[,paste0(x$name,"_lab")] %in% c(paste0("no ",x$name),no_strata_name) ),"tmp_var"]) if(length(vax_dep_cat)>0) for(ibr in 1:length(vax_dep_cat) ){ @@ -1093,7 +1098,7 @@ summary_tab <- function( var_names, # var_names <- c("lab", "cal_time_cat") res_tab_model$all_cat <- gsub( paste0(":",ivar), ":", res_tab_model$all_cat) } names_matched <- match(c("exp","low","upp","Pr(","se("),substring(names(res_tab_model),1,3)) - + if(any(!is.na(names_matched))) names(res_tab_model)[names_matched[!is.na(names_matched)]] <- c("RR","lci","uci","pval","se_coef")[!is.na(names_matched)] #names(res_tab_model)[match(c("exp","low","upp","Pr("),substring(names(res_tab_model),1,3))]] <- c("RR","2.5%","97.5%","pval") @@ -1136,7 +1141,7 @@ summary_tab <- function( var_names, # var_names <- c("lab", "cal_time_cat") res_tab_new$all_cat_without_space <- gsub(" ","",res_tab_new$all_cat) res_tab$all_cat_without_space <- gsub(" ","",res_tab$all_cat) res_tab_new <- merge.data.frame(res_tab_new,res_tab, by=c("all_cat_without_space"), all=T, sort=F ) - + names(res_tab_new)[substring(names(res_tab_new),nchar(names(res_tab_new))-1,nchar(names(res_tab_new)))==".x"] <- substring(names(res_tab_new),1,nchar(names(res_tab_new))-2)[substring(names(res_tab_new),nchar(names(res_tab_new))-1,nchar(names(res_tab_new)))==".x"] for(ivar in c(names(res_tab)[ !(names(res_tab) %in% c("all_cat_without_space",model_res_names))]) ){ # c( "event","all_cat","n_events","atrisk_days","atrisk_ids","days_pp","relative_rate","relative_perc")){ #for(ivar in c( "event","all_cat","n_events","atrisk_days","atrisk_ids","days_pp","relative_rate","relative_perc")){ @@ -1148,7 +1153,7 @@ summary_tab <- function( var_names, # var_names <- c("lab", "cal_time_cat") res_tab_new[,paste0(ivar,".y")] <- NULL } if( "model.y" %in% names(res_tab_new) & !("RR.y" %in% names(res_tab_new))) res_tab_new <- res_tab_new[,names(res_tab_new)!="model.y"] - + if(!missing(mod)){ model_res_names <- c("RR","lci","uci","pval","coef","se_coef","model") #model_res_names <- c("i","RR","2.5%","97.5%","pval","coef","se(coef)","model") @@ -1158,12 +1163,12 @@ summary_tab <- function( var_names, # var_names <- c("lab", "cal_time_cat") names(res_tab_new)[var_numbers] <- model_res_names[!is.na(var_numbers)] if( any( names(res_tab_new)=="RR" ) & any(names(res_tab_new)=="RR.y" ) ){ - # duplicated sets of columns with different values + # duplicated sets of columns with different values if(any( cond<-!is.na(res_tab_new$model.y) & substring(res_tab_new$all_cat,1,1)=="[" )){ - #if(sum( cond<- !is.na( res_tab_new[,"pval"] ) & !is.na( res_tab_new[,paste0("pval.y")]) & res_tab_new[,paste0("pval")]!=res_tab_new[,paste0("pval.y")] )>0){ + #if(sum( cond<- !is.na( res_tab_new[,"pval"] ) & !is.na( res_tab_new[,paste0("pval.y")]) & res_tab_new[,paste0("pval")]!=res_tab_new[,paste0("pval.y")] )>0){ res_tab_new_dupl <- res_tab_new[cond, , drop=F] res_tab_new_dupl[, model_res_names ] <- NULL - + match_res <- match(paste0(model_res_names,".y"),names(res_tab_new_dupl)) names(res_tab_new_dupl)[match_res[!is.na(match_res)]] <- model_res_names[!is.na(match_res)] if(any(names(res_tab_new_dupl)=="i.y")){ @@ -1172,7 +1177,7 @@ summary_tab <- function( var_names, # var_names <- c("lab", "cal_time_cat") } res_tab_new_dupl <- res_tab_new_dupl[!duplicated(res_tab_new_dupl[,names(res_tab_new_dupl)!="i"]),] } - + if(sum( (cond <- is.na( res_tab_new[,"RR"] ) & !is.na( res_tab_new[,paste0("RR.y")])) )>0) res_tab_new[cond, model_res_names ] <- res_tab_new[cond, paste0(model_res_names,".y") ] @@ -1196,7 +1201,7 @@ summary_tab <- function( var_names, # var_names <- c("lab", "cal_time_cat") if(any(cond<-!is.na(match(names(res_tab_new),c("i.y","model.y"))))) res_tab_new[,cond] <- NULL res_tab_new$model <- 1 } - + # order table on 'all_cat' res_tab_new$tablab_order <- match( res_tab_new$all_cat, levels(factor_ref(res_tab_new$all_cat[ substring(res_tab_new$all_cat,1,1)!="[" ], lab_orders=lab_orders)) ) res_tab_new$tablab_order[is.na(res_tab_new$tablab_order)] <- 100000 @@ -1435,7 +1440,7 @@ plot_res <- function(res, main="", if(all(is.na(x[cond,"model"]))) return(0) tapply( x[cond,"i"], x[cond,"model"], length ) } ))) - + if(missing(ylim)){ ymax <- unlist(lapply(res,function(x) { if(all(dimnames(x)[[2]]!="RR")) return(NA) @@ -1466,7 +1471,7 @@ plot_res <- function(res, main="", # function for colors: col_alpha <- function(col,alpha=0) rgb(t(col2rgb(col)/255),alpha=alpha) - + ########### # # plot 1: all coefficients: @@ -1522,7 +1527,7 @@ plot_res <- function(res, main="", if(!correct_max_time_adj){ # CI's for adjusted RR's: if(CI & sum(c("RR","lci","uci") %in% dimnames(res[[imod]])[[2]])==3) - if(any(!is.na( res[[imod]][cond_after_ncoef,][!is.na(res[[imod]]$RR[ cond_after_ncoef ]), c("lci","uci")] ))) + if(any(!is.na( res[[imod]][cond_after_ncoef,][!is.na(res[[imod]]$RR[ cond_after_ncoef ]), c("lci","uci")] ))) matlines( rbind( ncoef + 1:(nrow(res[[imod]])-ncoef), ncoef + 1:(nrow(res[[imod]])-ncoef) )[,!is.na(res[[imod]]$RR[ cond_after_ncoef ])], t(res[[imod]][cond_after_ncoef,][ !is.na(res[[imod]]$RR[ cond_after_ncoef ]) ,c("lci","uci")]), lty=1, lwd=1, col=col_alpha(col[imod],0.15), type="o", pch="-", cex=2 ) @@ -1594,9 +1599,10 @@ plot_res <- function(res, main="", # plot 2 en 3 (if ymax>30): coefficients without coefficients for time adjusting: # for(i in 2:3) { # plot 2 and plot 3 - - if(i==2){ + + if(i==2){ ymax <- unlist(lapply(res,function(x) { + if(is.null(x)) return(NA) if(all(dimnames(x)[[2]]!="RR") | nrow(x)==0 ) return(NA) x_tmp <- x$RR[1:ncoef][x$RR[1:ncoef]<1000] if(any(!is.na(x_tmp))) max(x_tmp*1.2,na.rm=T) else NA @@ -1609,7 +1615,7 @@ plot_res <- function(res, main="", if(i==3 & max(ylim)<=30) next if(i==3 & max(ylim)>30 ) ylim=c(0,20) - + plot( c(0,ncoef), ylim, type="n", main=main, xlab="effect number", ylab=ifelse(i==2, "RR", "RR under 20") ) grid();abline(h=1, col="darkgray",lty=1) @@ -1617,7 +1623,8 @@ plot_res <- function(res, main="", # CI's for unadjusted and adjusted RR's: if(CI) - for(imod in 1:length(res)){ + for(imod in 1:length(res)){ + if(is.null(res[[imod]])) next if(sum(c("RR","lci","uci") %in% dimnames(res[[imod]])[[2]])!=3 | nrow(res[[imod]])==0) next if(any(!is.na( t(res[[imod]][1:ncoef,][ !is.na(res[[imod]]$RR[1:ncoef]) ,c("lci","uci")]) ))) matlines( rbind( (1:ncoef+x_deltas[imod]),(1:ncoef+x_deltas[imod]))[,!is.na(res[[imod]]$RR[1:ncoef])], @@ -1626,6 +1633,7 @@ plot_res <- function(res, main="", } # RR's: for(imod in 1:length(res)){ + if(is.null(res[[imod]])) next if(sum(c("RR","lci","uci") %in% dimnames(res[[imod]])[[2]])!=3 | nrow(res[[imod]])==0) next lines( 1:ncoef+x_deltas[imod],res[[imod]]$RR[1:ncoef], type="o", col=col[imod],lwd=ifelse(imod==1,2,1)); if(imod==1) text( 1:ncoef,res[[1]]$RR, labels=as.character(res[[1]]$i), pos=3, col=col[1], cex= ifelse(ncoef<=50,1,0.7) ) @@ -1764,6 +1772,7 @@ scri <- function(vax_def, censored_vars=c(), # The rule 'rw_observed_percentage' does not work for variables 'censored_vars'. # (for example, "death_days" ==> 'id' is included in the corresponding risk window till death date.) event_in_rw=T, # if event in rw ==> this rw should not be deleted even if not completely observed + lplots = T, leventplot = T, max_n_points=NA, eventplot_file_separate=F, lplot = T, CI = T, lforest=T, forest_nrows=50 ,forest_cex=0.5, forest_cex_head=0.5, @@ -1783,11 +1792,24 @@ scri <- function(vax_def, sdr_tabs, sdr_models, cut_points_name = "", col = c("red", "green3", "orange", "deepskyblue", "magenta2", "cyan2", "chocolate1", "gray" ), + performance=F, ... ){ - + if(missing(data)) stop("Dataset 'data' is missing.") + start_sys_time <- Sys.time() + + # stratum: + # if only for subset (i.e., use_all_events==F) ==> delete all other rows + if(strata_var!="" & !use_all_events) { + data <- data[ data[,strata_var]== strata_value & !is.na(data[,strata_var]), ] + strata_var <- "" + } + + if(nrow(data)==0) return("no data") + + if(!missing(vax_def)) if(class(vax_def)=="scri_parameters"){ # vax variables: @@ -1814,6 +1836,8 @@ scri <- function(vax_def, if( missing(extra_name ) & "extra_name" %in% names(extra_parameters) ) extra_name <- extra_parameters[["extra_name" ]] + if( missing(lplots ) & "lplots" %in% names(extra_parameters) ) lplots <- extra_parameters[["lplots" ]] + if( missing(leventplot ) & "leventplot" %in% names(extra_parameters) ) leventplot <- extra_parameters[["leventplot" ]] if( missing(max_n_points ) & "max_n_points" %in% names(extra_parameters) ) max_n_points <- extra_parameters[["max_n_points" ]] if( missing(eventplot_file_separate) & "eventplot_file_separate" %in% names(extra_parameters) ) eventplot_file_separate <- extra_parameters[["eventplot_file_separate" ]] @@ -1843,8 +1867,9 @@ scri <- function(vax_def, if( missing(time_seq ) & "time_seq" %in% names(extra_parameters) ) time_seq <- extra_parameters[["time_seq" ]] if( missing(sdr_tabs ) & "sdr_tabs" %in% names(extra_parameters) ) sdr_tabs <- extra_parameters[["sdr_tabs" ]] - if( missing(sdr_models ) & "sdr_models" %in% names(extra_parameters) ) sdr_models <- extra_parameters[["sdr_models" ]] + if( missing(sdr_models ) & "sdr_models" %in% names(extra_parameters) ) sdr_models <- extra_parameters[["sdr_models" ]] + if( missing(performance ) & "performance" %in% names(extra_parameters) ) performance <- extra_parameters[["performance" ]] } if(!missing(event_info)){ @@ -1855,10 +1880,10 @@ scri <- function(vax_def, if(any(!is.na(data[,event_date])) & all( is.na(data[,event_time]))) data[,event_time] <- as.numeric(difftime(data[,event_date],as.Date("2020-08-31"),units="days")) if(all( is.na(data[,event_date])) & any(!is.na(data[,event_time]))) data[,event_date] <- as.Date("2020-08-31") + data[,event_time] - - data[,event] <- as.numeric(!is.na(data[,event_date])) } + data[,event] <- as.numeric(!is.na(data[,event_date])) + if(!any(data[,event]==1)) return("no data") ####################################################### # @@ -1906,65 +1931,48 @@ scri <- function(vax_def, if(any(names(vax_def$rws$vax_dep)=="before")) output_name <- c( output_name, paste0( gsub("_","",vax_def$rws$vax_dep["before"]), collapse = "_") ) if(any(names(vax_def$rws$vax_dep)=="after" )) output_name <- c( output_name, paste0( gsub("_","",vax_def$rws$vax_dep["after"] ), collapse = "_") ) } else output_name <- c( output_name, "nosplit" ) - if(strata_var!="" &!is.na(strata_value)) output_name <- c( output_name, strata_value ) + if(!is.na(strata_value)) output_name <- c( output_name, strata_value ) output_name <- paste0(output_name, collapse="_") output_name <- trimws(output_name, "left", "_") + cat(output_name) + output_file_name <- gsub("[:;,.-]","_",output_name) - + id <- vax_def$data_parameters$id vax_time <- vax_def$data_parameters$vax_time vax_date <- vax_def$data_parameters$vax_date vax_name <- vax_def$data_parameters$vax_name vax_dep <- vax_def$rws$vax_dep - - # add information about the first dose in separate variables: - data[,id] <- as.factor(data[,id]) - data <- data[order(data[,id],data[,vax_time]),] - # select the first id row: - data_v1 <- data[!duplicated(data[,id]),c(id,"vax_n",vax_time,vax_date,vax_name,vax_dep)] - names(data_v1)[-(1:2)] <- paste0(names(data_v1)[-(1:2)],"_v1") - if( any(data_v1[,paste0(vax_def$rws$cond_var,"_v1")]!=vax_def$rws$cond_value[1] & data_v1[,"vax_n"]>0) ) { - print("The first dose names:") - print(table1(data_v1[,paste0(vax_def$rws$cond_var,"_v1")])) - stop("Not everybode has the same name of the first dose.") - } - data_v1[,"vax_n"] <- NULL - data <- merge.data.frame(data,data_v1,by=id,all.x=T, ) - - if(!(event %in% names(data))) data[,event] <- as.numeric(!is.na(data[,event_date])) + +#browser() +#cat(paste("before image plot:",Sys.time(),"\n")) + # create leventplot (image) plots: - if(leventplot){ + if(leventplot & lplots){ if(any( !( names(list(...)) %in% names(formals(pdf)) ) )) stop(paste0("Argument[s] '", paste(names(list(...))[ !( names(list(...)) %in% names(formals(pdf)) )], collapse="', '" ), "' is not an argumnent of function 'pdf' from library 'qpdf'.")) + gc() pdf(file=paste0(sdr_tabs,"eventplot_tmp.pdf"), width=width, ... ) - + try(image_plots( vax_def=vax_def, data=data, event_info=event_info, strata_var=strata_var, strata_value=strata_value, use_all_events=use_all_events, tit=output_name, max_n_points=max_n_points ),silent=T) - + dev.off() - + #cat(paste("after image plot:",Sys.time(),"\n")) + } data <- data[data[,event]==1 & !is.na(data[,vax_def$data_parameters$vax_time]),] - # stratum: - # if only for subset (i.e., use_all_events==F) ==> delete all other rows - if(strata_var!="" & !use_all_events) { - data <- data[ data[,strata_var]== strata_value & !is.na(data[,strata_var]), ] - strata_var <- "" - } - - - if( length(time_seq)>1 & length(time_seq_ref)==1 ) time_seq_ref <- rep(time_seq_ref,length(time_seq)) if(any(ls()=="res")) rm(res) @@ -1973,6 +1981,8 @@ scri <- function(vax_def, names(res) <- c("no_adj", names(time_seq) ) if(strata_var!="" & use_all_events) names(res) <- paste0(names(res), "_all_events") + + #cat(paste("before scri_fit:",Sys.time(),"\n")) res[[1]] <- scri_fit(formula = formula, vax_def=vax_def, @@ -1995,6 +2005,7 @@ scri <- function(vax_def, event_in_rw = event_in_rw, lprint=lprint, save_data = save_data) + #cat(paste("after scri_fit:",Sys.time(),"\n")) ############################# # @@ -2051,6 +2062,7 @@ scri <- function(vax_def, res[ names(res_paral)[ names_overlap[!is.na(names_overlap)] ] ] <- res_paral[!is.na(match(names(res_paral),names(res)))] } # end of parallel + #cat(paste("after parallel scri_fit:",Sys.time(),"\n")) if(length(time_seq)>0 & !paral) @@ -2079,6 +2091,7 @@ scri <- function(vax_def, save_data = save_data)#[[1]] } + #cat(paste("after non-parallel scri_fit:",Sys.time(),"\n")) # for output file with 'tabs' @@ -2180,7 +2193,7 @@ scri <- function(vax_def, class(ret) <- "scri_output" - if(lplot | lforest){ + if((lplot | lforest) & lplots){ pdf(file=paste0(sdr_tabs,"tmp.pdf"), width=width, ...) if(!is.null(tabs[[1]])){ @@ -2212,6 +2225,9 @@ scri <- function(vax_def, if(!is.null(dev.list())) dev.off() } # end plots + #cat(paste("end :",Sys.time(),"\n")) + if(performance) cat(paste0(": duration = ",round(Sys.time()-start_sys_time),"s (till ",Sys.time(),")\n")) + else cat("\n") ret @@ -2238,7 +2254,7 @@ scri <- function(vax_def, image_plots <- function(vax_def, event_info, data, tit="", strata_var="", strata_value=NA, use_all_events=T, max_n_points=NA){ - + id <- vax_def$data_parameters$id vax_name <- vax_def$data_parameters$vax_name vax_date <- vax_def$data_parameters$vax_date @@ -2281,6 +2297,7 @@ image_plots <- function(vax_def, event_info, data, tit="", strata_var="", strata if(any(class(data_with_deaths[,iname]) %in% c("POSIXct","POSIXt","Date"))) data_with_deaths$death_date <- data_with_deaths[,iname] if(any(class(data_with_deaths[,iname]) %in% c("numeric","integer"))) data_with_deaths$death_days <- data_with_deaths[,iname] } + total_deaths <- length(unique(data_with_deaths[,id])) # the number of vaccinated and unvaccinated id's: n_unvaccinated <- length(unique(data[ is.na(data[,vax_date]),id])) @@ -2319,7 +2336,7 @@ image_plots <- function(vax_def, event_info, data, tit="", strata_var="", strata data$idep_value <- "" tit_dep <- "" } - + # per 'vax_number' or 'vax_name' for(ivax in 1:length(all_vax_names)){ @@ -2336,9 +2353,14 @@ image_plots <- function(vax_def, event_info, data, tit="", strata_var="", strata names(data_ivax)[2:4] <- paste0(c("vax_time","vax_date","vax_name"),"_ivax") data_ivax$idep_ivax_id <- rep(T,nrow(data_ivax)) data <- merge(data[, !(names(data) %in% names(data_ivax)[-1]) ], data_ivax, by=id, all.x=T ) + data$ivax_cond <- data[,"vax_name_ivax"]==all_vax_names[ivax] data_plot <- data[data$idep_ivax_id & !is.na(data$idep_ivax_id),] + limits_x <- range(data_plot$time_x, na.rm=T) + limits_y1 <- range(data_plot$time_y1, na.rm=T) + limits_y2 <- range(data_plot$time_y2, na.rm=T) + # create temporary variables with the current dose info in 'data_with_deaths' dataset: if(nrow(data_with_deaths)>0){ data_ivax <- data_with_deaths[data_with_deaths[,vax_name]==all_vax_names[ivax],c(id,vax_time,vax_date,vax_name, "death_days")] @@ -2350,91 +2372,152 @@ image_plots <- function(vax_def, event_info, data, tit="", strata_var="", strata data_ivax$ivax_id <- rep(T,nrow(data_ivax)) data_ivax$death_days <- NULL data_with_deaths <- merge(data_with_deaths[, !(names(data_with_deaths) %in% names(data_ivax)[-1]) ], data_ivax, by=id, all.x=T ) + data_with_deaths$ivax_cond <- data_with_deaths[,"vax_name_ivax"]==all_vax_names[ivax] & !is.na(data_with_deaths[,"vax_name_ivax"]) + + limits_x <- range(data_plot$time_x, data_with_deaths$time_x, na.rm=T) + limits_y1 <- range(data_plot$time_y1, data_with_deaths$time_y1, na.rm=T) + limits_y2 <- range(data_plot$time_y2, data_with_deaths$time_y2, na.rm=T) + } - limits_x <- range(data_plot$time_x, data_with_deaths$time_x, na.rm=T); limits_x <- limits_x + c(-1,1)*0.02*diff(limits_x) - limits_y1 <- range(data_plot$time_y1, data_with_deaths$time_y1, na.rm=T); limits_y1 <- limits_y1 + c(-1,1)*0.06*diff(limits_y1) - limits_y2 <- range(data_plot$time_y2, data_with_deaths$time_y2, na.rm=T); limits_y2 <- limits_y2 + c(-1,1)*0.06*diff(limits_y2) - - tit_ivax <- paste0(tit_dep, ifelse(tit_dep=="",""," & "), trimws(all_vax_names[ivax])) + limits_x <- limits_x + c(-1,1)*0.02*diff(limits_x) + limits_y1 <- limits_y1 + c(-1,1)*0.06*diff(limits_y1) + limits_y2 <- limits_y2 + c(-1,1)*0.06*diff(limits_y2) + + tit_ivax <- tit_ivax_events <- tit_ivax_deaths <- paste0(tit_dep, ifelse(tit_dep=="",""," & "), trimws(all_vax_names[ivax])) + ########### # sample points if they are more than 'max_n_points' + # data_plot$cond_sampled_ids <- rep(T,nrow(data_plot)) - if(!is.na(max_n_points)) - if( max_n_points < sum( data_plot$ivax_cond & data_plot$time_x>=0 ) ) { - ivax_ids <- data_plot[data_plot$ivax_cond & data_plot$time_x>=0,id] - data_plot_all_ids <- data_plot[,id] - sampled_ids <- sample(data_plot_all_ids, max_n_points * (length(data_plot_all_ids)/length(ivax_ids))) - data_plot$cond_sampled_ids <- data_plot[,id] %in% sampled_ids - tit_ivax <- paste0(tit_ivax, ifelse(tit_ivax=="",""," & "),"\n[", length(sampled_ids)," points from ",length(data_plot_all_ids),"]") + if(nrow(data_with_deaths)>0) + data_with_deaths$cond_sampled_ids <- rep(T,nrow(data_with_deaths)) + + if(!is.na(max_n_points)){ + # for events: + ivax_ids <- unique(data_plot[data_plot$ivax_cond & data_plot$time_x>=0,id]) + if( max_n_points < length(ivax_ids) ) { + ivax_all_ids <- data_plot[data_plot$ivax_cond,id] + sampled_ids <- sample(ivax_all_ids, max_n_points/length(ivax_ids) * length(ivax_all_ids)) + data_plot$cond_sampled_ids <- data_plot[,id] %in% sampled_ids & data_plot$ivax_cond + tit_ivax_events <- paste0(tit_ivax, ifelse(tit_ivax=="",""," & "),"\n[", round(100*max_n_points/length(ivax_ids),1),"% of all ",length(ivax_all_ids)," events after ",all_vax_names[ivax],"]") } - if(tit!="") tit_ivax <- paste0(tit,"\n",tit_ivax) - + # for deaths: + if(nrow(data_with_deaths)>0){ + ivax_ids <- unique(data_with_deaths[data_with_deaths$ivax_cond & data_with_deaths$time_x>=0,id]) + if( max_n_points < length(ivax_ids) ) { + ivax_all_ids <- unique(data_with_deaths[data_with_deaths$ivax_cond,id]) + sampled_ids <- sample(ivax_all_ids, max_n_points/length(ivax_ids) * length(ivax_all_ids)) + data_with_deaths$cond_sampled_ids <- data_with_deaths[,id] %in% sampled_ids & data_with_deaths$ivax_cond + tit_ivax_deaths <- paste0(tit_ivax, ifelse(tit_ivax=="",""," & "),"\n[", round(100*max_n_points/length(ivax_ids),1),"% of all ",length(ivax_all_ids)," deaths after ",all_vax_names[ivax],"]") + } + } + } + # + ##################### + cex=0.8 - - for(iplot_type in c(1,2,4)){ - ###################### the first and second types of plot: ######################## - # - xlab_extra <- "" - if(iplot_type%in%c(1,2)) { data_plot$time_y_date <- data_plot$time_y1; limits<-c(limits_x,limits_y1); ylab <- paste0("date of ", event) } - if(iplot_type%in%c(3,4)) { data_plot$time_y_date <- data_plot$time_y2; limits<-c(limits_x,limits_y2); ylab <- paste0('date of "', trimws(all_vax_names[ivax]),'"') } - - if(iplot_type%in%c(2,4)) { - limits[1:2] <- c( min( -90, data_plot$time_x[ data_plot[,paste0(vax_time,"_v1")]-90 <= data_plot[,event_time]]), 90 ); - xlab_extra <- paste0("\n[between vax1 - 90days and ", trimws(all_vax_names[ivax])," + 60 days]") - } - - data_plot$time_y <- (1970 + as.numeric(as.Date("2020-09-01"))/365.25) + data_plot$time_y_date/365.25 - - if(!any( (cond <- data_plot$cond_sampled_ids & data_plot$ivax_cond & ( data_plot$time_x>=0 | data_plot[,event_time] < data_plot[,paste0(vax_time,"_v1")] ) ) )) next - - distr_2d <- with(data_plot[cond,], kde2d(x=time_x, y=time_y, h=20, n=100, lims=limits ) ) - distr_2d$y <- (1970 + as.numeric(as.Date("2020-09-01"))/365.25) + distr_2d$y/365.25 - - if(length(unique(distr_2d$y))>1) - image(distr_2d, ylab=ylab, xlab=paste0(event,': number of days from "',trimws(all_vax_names)[ivax],'"', xlab_extra),axes=F ) - #image(distr_2d, ylab=ylab, xlab=paste0(event,': number of days from "',trimws(all_vax_names)[ivax],'"', xlab_extra),col=hcl.colors(100, "YlOrRd", rev = TRUE),axes=F ) - else with(data_plot[data_plot$cond_sampled_ids,], - plot(time_x,time_y, type="n", xlim=c(min(0,time_x-abs(0.02*time_x),na.rm=T),max(60,time_x*1.02,na.rm=T)), axes=F, - xlab=paste0(event,': number of days from "',trimws(all_vax_names)[ivax],'"', xlab_extra), ylab=ylab ) ) - - axis(1); axis(2, at=at_date, labels=at_date_lab, las=1); box(col="yellow3") - #points_legend("event_vax1_num", "event_num", data, place=place, cex=cex ) - title(paste0("id's with ",event," & ",tit_ivax)) - #title(paste0("id's with ",event," & ",tit_ivax,'\nrelative to "', all_vax_names[ivax],'"')) - - ################ - # ablines: - abline(v=c(0,7,14,21,28, 60)); abline(v=0,lwd=2) - if(ivax==1) abline(v=c(-30,-90)) - abline(h=c(2021 + 0.25*((-1):10)), col="pink") - abline(h=c(2021,2022), col="pink",lwd=2) - - # add points: - points(data_plot$time_x[data_plot$cond_sampled_ids], data_plot$time_y[data_plot$cond_sampled_ids], cex=cex ) - for(ivax2 in 1:length(all_vax_names)){ - cond_ivax2 <- data_plot$cond_sampled_ids & data_plot[,vax_name]==all_vax_names[ivax2] - with(data_plot[cond_ivax2 & data_plot[,vax_time]<=data_plot[,event_time],], points(time_x, time_y, cex=cex, col=vax_names_col[ivax2] ) ) + for(ievent_death in c("events","deaths")) + for(iplot_type in c(1,2,4)){ + ###################### the first and second types of plot: ######################## + # + xlab_extra <- "" + if(iplot_type%in%c(1,2)) { data_plot$time_y_date <- data_plot$time_y1; limits<-c(limits_x,limits_y1); ylab <- "" } + if(iplot_type%in%c(3,4)) { data_plot$time_y_date <- data_plot$time_y2; limits<-c(limits_x,limits_y2); ylab <- paste0('date of "', trimws(all_vax_names[ivax]),'"') } + data_plot$time_y <- (1970 + as.numeric(as.Date("2020-09-01"))/365.25) + data_plot$time_y_date / 365.25 + + if( nrow(data_with_deaths)>0){ + if(iplot_type%in%c(1,2)) { data_with_deaths$time_y_date <- data_with_deaths$time_y1 } + if(iplot_type%in%c(3,4)) { data_with_deaths$time_y_date <- data_with_deaths$time_y2 } + data_with_deaths$time_y <- (1970 + as.numeric(as.Date("2020-09-01"))/365.25) + data_with_deaths$time_y_date / 365.25 + } + + if(ievent_death=="events" & iplot_type%in%c(2,4)){ + limits[1:2] <- c( min( -90, data_plot$time_x[ data_plot[,paste0(vax_time,"_v1")]-90 <= data_plot[,event_time]]), 90 ) + xlab_extra <- paste0("\n[between vax1 - 90days and ", trimws(all_vax_names[ivax])," + 60 days]") + } + if(ievent_death=="deaths") { + limits[1] <- -2 + if(iplot_type%in%c(2,4)) { limits[2] <- min(180,limits[2]); xlab_extra <- paste0("\n[after ", trimws(all_vax_names[ivax])," + 180 days]") } + } - # deaths: - if(nrow(data_with_deaths)>0){ data$vaxdep_all==vaxdep_values[idep] - if( length(vaxdep_values)==1 & vaxdep_values[1]=="all" ) data_with_deaths$vaxdep_all_cond <- rep(T,nrow(data_with_deaths)) - else data_with_deaths$vaxdep_all_cond <- data_with_deaths$ivax_id - #data_with_deaths$vaxdep_all_cond <- data_with_deaths$vaxdep_all_cond & data_with_deaths$ivax_id + + if(ievent_death == "events"){ + cond <- data_plot$cond_sampled_ids & data_plot$ivax_cond & ( data_plot$time_x>=0 | data_plot[,event_time] < data_plot[,paste0(vax_time,"_v1")] ) + if(!any(cond)) next + distr_2d <- with(data_plot[cond,], kde2d(x=time_x, y=time_y, h=20, n=100, lims=limits ) ) + distr_2d$y <- (1970 + as.numeric(as.Date("2020-09-01"))/365.25) + distr_2d$y/365.25 + } + if(ievent_death == "deaths" & nrow(data_with_deaths)>0){ + cond <- data_with_deaths$cond_sampled_ids & data_with_deaths$ivax_cond & data_with_deaths$time_x>=0 + if(!any(cond)) next + distr_2d <- with(data_with_deaths[cond,], kde2d(x=time_x, y=time_y, h=20, n=100, lims=limits ) ) + distr_2d$y <- (1970 + as.numeric(as.Date("2020-09-01"))/365.25) + distr_2d$y/365.25 + } + + image_start <- function(data_for_plot){ + if(length(unique(distr_2d$y))>1) + image(distr_2d, ylab=ylab, xlab=paste0(event,': number of days from "',trimws(all_vax_names)[ivax],'"', xlab_extra),axes=F ) + #image(distr_2d, ylab=ylab, xlab=paste0(event,': number of days from "',trimws(all_vax_names)[ivax],'"', xlab_extra),col=hcl.colors(100, "YlOrRd", rev = TRUE),axes=F ) + else with(data_for_plot[data_for_plot$cond_sampled_ids,], + plot(time_x,time_y, type="n", xlim=c(min(0,time_x-abs(0.02*time_x),na.rm=T),max(60,time_x*1.02,na.rm=T)), axes=F, + xlab=paste0(event,': number of days from "',trimws(all_vax_names)[ivax],'"', xlab_extra), ylab=ylab ) ) - if(!any(data_with_deaths$vaxdep_all_cond)) next + axis(1); axis(2, at=at_date, labels=at_date_lab, las=1); box(col="yellow3") + #points_legend("event_vax1_num", "event_num", data, place=place, cex=cex ) + #title(paste0("id's with ",event," & ",tit_ivax,'\nrelative to "', all_vax_names[ivax],'"')) - data_tmp <- data_with_deaths[data_with_deaths$vaxdep_all_cond,] - if(iplot_type %in% c(1,2)) data_tmp$time_y_date <- data_tmp$time_y1 - if(iplot_type %in% c(3,4)) data_tmp$time_y_date <- data_tmp$time_y2 - data_tmp$time_y <- (1970 + as.numeric(as.Date("2020-09-01"))/365.25) + data_tmp$time_y_date/365.25 - with(data_tmp[data_tmp[,vax_time]<=data_tmp$death_days & data_tmp[,vax_name]==all_vax_names[ivax2],], - points(time_x, time_y, cex=1.1*cex, col=vax_names_col[ivax2], pch=3 ) ) + ################ + # ablines: + abline(v=c(0,7,14,21,28, 60)); abline(v=0,lwd=2) + if(ivax==1) abline(v=c(-30,-90)) + abline(h=c(2021 + 0.25*((-1):10)), col="pink") + abline(h=c(2021,2022), col="pink",lwd=2) + } # end function 'image_start' + + # add points: + if(ievent_death=="events"){ + if( iplot_type%in%c(1,2) ) ylab <- paste0("date of ", event) + image_start(data_plot[data_plot$cond_sampled_ids,]) + if(tit!="") tit_plot <- paste0(tit,"\n",tit_ivax_events) + else tit_plot <- tit_ivax_events + title(paste0("id's with ",event," & ",tit_plot)) + if(any(data_plot$cond_sampled_ids)){ + points(data_plot$time_x[data_plot$cond_sampled_ids], data_plot$time_y[data_plot$cond_sampled_ids], cex=cex ) + for(ivax2 in 1:length(all_vax_names)){ + cond_ivax2 <- data_plot$cond_sampled_ids & data_plot[,vax_name]==all_vax_names[ivax2] + with(data_plot[cond_ivax2 & data_plot[,vax_time]<=data_plot[,event_time],], points(time_x, time_y, cex=cex, col=vax_names_col[ivax2] ) ) + } + } } - } - - } # for iplot_type + if(ievent_death=="deaths"){ + if( iplot_type%in%c(1,2) ) ylab <- "date of death" + image_start(data_with_deaths[data_with_deaths$cond_sampled_ids,]) + if(tit!="") tit_plot <- paste0(tit,"\n",tit_ivax_deaths) + else tit_plot <- tit_ivax_deaths + title(paste0("id's with deaths & ",tit_plot)) + for(ivax2 in ivax:length(all_vax_names)){ + # deaths: + if(nrow(data_with_deaths)>0){ + if( length(vaxdep_values)==1 & vaxdep_values[1]=="all" ) data_with_deaths$vaxdep_all_cond <- rep(T,nrow(data_with_deaths)) + else data_with_deaths$vaxdep_all_cond <- data_with_deaths$ivax_cond + #data_with_deaths$vaxdep_all_cond <- data_with_deaths$vaxdep_all_cond & data_with_deaths$ivax_id + + if(!any(data_with_deaths$vaxdep_all_cond & data_with_deaths$cond_sampled_ids)) next + + data_tmp <- data_with_deaths[data_with_deaths$vaxdep_all_cond & data_with_deaths$cond_sampled_ids,] + if(nrow(data_tmp)>0){ + if(iplot_type %in% c(1,2)) data_tmp$time_y_date <- data_tmp$time_y1 + if(iplot_type %in% c(3,4)) data_tmp$time_y_date <- data_tmp$time_y2 + data_tmp$time_y <- (1970 + as.numeric(as.Date("2020-09-01"))/365.25) + data_tmp$time_y_date/365.25 + with(data_tmp[data_tmp[,vax_time]<=data_tmp$death_days & data_tmp[,vax_name]==all_vax_names[ivax2],], + points(time_x, time_y, cex=cex, col=vax_names_col[ivax2], pch=3 ) ) + } + } + } + } # end if ievent_death=="deaths" + } # for iplot_type } # for ivax } # for idep @@ -2442,6 +2525,22 @@ image_plots <- function(vax_def, event_info, data, tit="", strata_var="", strata else if(strata_value!="") strata_value <- paste0(strata_value,"; ") if(nrow(data_unvax_events)>0){ + ########### + # sample points if they are more than 'max_n_points' + # + tit_plot <- "" + data_unvax_events$cond_sampled_ids <- rep(T,nrow(data_unvax_events)) + if(!is.na(max_n_points)){ + # for events: + data_unvax_events_all_ids <- unique(data_unvax_events[,id]) + if( max_n_points < length(data_unvax_events_all_ids) ) { + sampled_ids <- sample(data_unvax_events_all_ids, max_n_points ) + data_unvax_events$cond_sampled_ids <- data_unvax_events[,id] %in% sampled_ids + tit_ivax_events <- paste0("[",round(100*max_n_points/length(data_unvax_events_all_ids),1),"% of all unvac.events (",length(data_unvax_events_all_ids),")]") + if(tit!="") tit_plot <- paste0(tit,"\n",tit_ivax_events) + } + } + data_unvax_events <- data_unvax_events[data_unvax_events$cond_sampled_ids, ] data_unvax_events$time_y <- (1970 + as.numeric(as.Date("2020-09-01"))/365.25) + data_unvax_events[,event_time]/365.25 plot(1:nrow(data_unvax_events), data_unvax_events$time_y, type="n", axes=F, xlab="just observation numbers", ylab=paste0("date of ", event ) ) axis(1); axis(2, at=at_date, labels=at_date_lab, las=1); box(col="yellow3") @@ -2449,17 +2548,40 @@ image_plots <- function(vax_def, event_info, data, tit="", strata_var="", strata abline(h=c(2021,2022), col="pink",lwd=2) points(1:nrow(data_unvax_events), data_unvax_events$time_y, cex=cex, col=vax_names_col[1], pch=1 ) } else { plot(1,1,axes=F,type="n",xlab="",ylab=""); text(1,1,paste0("no ",event), cex=1.5) } - title(paste0(strata_value,"unvaccinated persons (n_unvax=",n_unvaccinated,", n_vax=",n_vaccinated,")\ndate of ", event)) + title(paste0(strata_value,"unvaccinated persons (n_unvax=",n_unvaccinated,", n_vax=",n_vaccinated,")\n",tit_plot)) + + no_plot_yet <- T; tit_plot <- "" + if(nrow(data_with_deaths)>0){ + if(any(is.na(data_with_deaths[,vax_time]) & !is.na(data_with_deaths$death_days) )){ + ########### + # sample points if they are more than 'max_n_points' + # + data_with_deaths$cond_sampled_ids <- rep(T,nrow(data_with_deaths)) + if(!is.na(max_n_points)){ + # for deaths: + data_with_deaths$ivax_cond <- is.na(data_with_deaths[,vax_time]) & !is.na(data_with_deaths$death_days) + ivax_ids <- length(unique(data_with_deaths[data_with_deaths$ivax_cond,id])) + if( max_n_points < ivax_ids ) { + ivax_total_ids <- unique(data_with_deaths[,id]) + sampled_ids <- sample(ivax_total_ids, max_n_points/ivax_ids * total_deaths ) + data_with_deaths$cond_sampled_ids <- data_with_deaths[,id] %in% sampled_ids + tit_ivax_deaths <- paste0("[",round(100*max_n_points/ivax_ids,1),"% of total unvac.deaths (",total_deaths,")]") + if(tit!="") tit_plot <- paste0(tit,"\n",tit_ivax_deaths) + } + } + data_with_deaths <- data_with_deaths[ data_with_deaths$cond_sampled_ids, ] + data_with_deaths$time_y <- (1970 + as.numeric(as.Date("2020-09-01"))/365.25) + data_with_deaths$death_days/365.25 + plot( 1:sum(is.na(data_with_deaths[,vax_time])), data_with_deaths[is.na(data_with_deaths[,vax_time]),"time_y"], type="n", axes=F, xlab="just observation numbers", ylab=paste0("date of death") ) + axis(1); axis(2, at=at_date, labels=at_date_lab, las=1); box(col="yellow3") + abline(h=c(2021 + 0.25*((-1):10)), col="pink") + abline(h=c(2021,2022), col="pink",lwd=2) + points( 1:sum(is.na(data_with_deaths[,vax_time])), data_with_deaths[is.na(data_with_deaths[,vax_time]),"time_y"], cex=cex, col=vax_names_col[1], pch=3 ) + no_plot_yet <- F + } + } + if(no_plot_yet){ plot(1,1,axes=F,type="n",xlab="",ylab=""); text(1,1,"no deaths",cex=1.5) } + title(paste0(strata_value,"unvaccinated persons (n_unvax=",n_unvaccinated,", n_vax=",n_vaccinated,")\n", tit_plot)) - if(any(is.na(data_with_deaths[,vax_time]) & !is.na(data_with_deaths$death_days) )){ - data_with_deaths$time_y <- (1970 + as.numeric(as.Date("2020-09-01"))/365.25) + data_with_deaths$death_days/365.25 - plot( 1:sum(is.na(data_with_deaths[,vax_time])), data_with_deaths[is.na(data_with_deaths[,vax_time]),"time_y"], type="n", axes=F, xlab="just observation numbers", ylab=paste0("date of death") ) - axis(1); axis(2, at=at_date, labels=at_date_lab, las=1); box(col="yellow3") - abline(h=c(2021 + 0.25*((-1):10)), col="pink") - abline(h=c(2021,2022), col="pink",lwd=2) - points( 1:sum(is.na(data_with_deaths[,vax_time])), data_with_deaths[is.na(data_with_deaths[,vax_time]),"time_y"], cex=1.4*cex, col=vax_names_col[1], pch=3 ) - } else { plot(1,1,axes=F,type="n",xlab="",ylab=""); text(1,1,"no deaths",cex=1.5) } - title(paste0(strata_value,"unvaccinated persons (n_unvax=",n_unvaccinated,", n_vax=",n_vaccinated,")\ndate of death")) } # the end of function 'brand_images' @@ -2498,7 +2620,8 @@ forest_plots_tab <- function(tab_list, ndigits=2, nrows_forest_plot=40, cex=0.8, tt <- as.data.frame(lapply(tt,function(x,digits){if(mode(x)=="numeric")x<-round(x,digits=digits);x},digits=ndigits)) tt$group_start <- 1:nrow(tt) - tt$group_start[!grepl("reference",tt$all_cat) & c(F,substring(tt$all_cat[-1],1,3) == substring(tt$all_cat[-nrow(tt)],1,3))] <- NA + tt$group_start[!grepl("reference",tt$all_cat) & substring(tt$all_cat,1,1)!="[" & c(F,substring(tt$all_cat[-1],1,8) == substring(tt$all_cat[-nrow(tt)],1,8))] <- NA + #tt$group_start[!grepl("reference",tt$all_cat) & c(F,substring(tt$all_cat[-1],1,3) == substring(tt$all_cat[-nrow(tt)],1,3))] <- NA tt$group_start[ !c( T, tt$group_start[-1] - tt$group_start[-nrow(tt)] >1 ) ] <- NA tt$group <- 1 @@ -3570,4 +3693,3 @@ characteristics <- function(data, event, path_file_name, condition_value="", vax # the end of functions : # ############################### - From da50414b0adea49c2d2107006e4280df9c1d45b0 Mon Sep 17 00:00:00 2001 From: svetlanabelitser <93388849+svetlanabelitser@users.noreply.github.com> Date: Mon, 21 Nov 2022 09:20:05 +0100 Subject: [PATCH 07/10] faster --- p_steps/08_T5_10_scri.R | 350 +++++++++++++++------------------------- 1 file changed, 130 insertions(+), 220 deletions(-) diff --git a/p_steps/08_T5_10_scri.R b/p_steps/08_T5_10_scri.R index 3b4e5697..b78502ef 100644 --- a/p_steps/08_T5_10_scri.R +++ b/p_steps/08_T5_10_scri.R @@ -6,7 +6,7 @@ # runs on all datasets in g_output/scri # Requirements: # dependencies: preceding steps, package "survival" -# input: g_output/scri/* +# input: data_vax_SCRI # output: g_output/scri/* # # parameters: in 07_scri_inputs.R @@ -44,94 +44,18 @@ for (subpop in subpopulations_non_empty) { cat(paste0('\n\t"',subpop,'":\n\n')) - # Import Data ------------------------------------------------------------- - - - - # Load the D3_study_population_SCRI - load(paste0(dirtemp, "D3_study_population_SCRI", suffix[[subpop]], ".RData")) - scri_input <- as.data.frame(get(paste0("D3_study_population_SCRI", suffix[[subpop]]))) - rm(list = paste0("D3_study_population_SCRI", suffix[[subpop]])) + memory.limit(10^13) + #detach("package:data.table", unload = TRUE) - # scri_input <- D3_study_population_SCRI - - ################# - # create dataset 'data_vax' (with multiple rows per person) from 'scri_input' (with one row per person) - # - if(any(c("date_vax1","vax1_date") %in% names(scri_input))){ - date_var <- "date_vax"; type_var <- "type_vax" - if(any(c("vax1_date") %in% names(scri_input))) date_var <- "vax_date" - if(any(c("vax1_type") %in% names(scri_input))) type_var <- "vax_type" - scri_n_vax_var <- as.integer(substring(names(scri_input),9)[substring(names(scri_input),1,8)==date_var]) # ==> 1,2,3 (from "data_vax1", "date_vax2", ...) - - for(ivax in scri_n_vax_var){ - data_tmp <- scri_input[, !substring(names(scri_input),1,8) %in% c(type_var,date_var) | - ( substring(names(scri_input),1,8) %in% c(type_var,date_var) & names(scri_input) %in% paste0(c(type_var,date_var),ivax) ) ] - names(data_tmp)[names(data_tmp)==paste0(date_var,ivax)] <- "vax_date" - names(data_tmp)[names(data_tmp)==paste0(type_var,ivax)] <- "vax_brand" - data_tmp$vax_n <- ivax - data_tmp <- data_tmp[,c( names(data_tmp)[!names(data_tmp) %in% c("vax_brand","vax_date")], "vax_brand", "vax_date" )] - - if(ivax==1) data_vax <- data_tmp - else data_vax <- rbind.data.frame(data_vax, data_tmp) - } - data_tmp <- data_tmp[F,] - if("vax1_date" %in% names(scri_input)) data_tmp <- scri_input[is.na(scri_input[,"vax1_date"]),] - if("date_vax1" %in% names(scri_input)) data_tmp <- scri_input[is.na(scri_input[,"date_vax1"]),] - data_tmp$vax_n <- 0; data_tmp$vax_date <- rep(NA,nrow(data_tmp)); data_tmp$vax_brand <- rep(NA,nrow(data_tmp)) - if(nrow(data_tmp)>0) data_vax <- rbind.data.frame(data_vax, data_tmp[,names(data_vax)]) - data_vax <- data_vax[!(data_vax$vax_n>0 & is.na(data_vax$vax_date)),] - } else { - data_vax <- scri_input - names(data_vax)[names(data_vax) == "date_vax" ] <- "vax_date" - names(data_vax)[names(data_vax) %in% c("type_vax","vax_type")] <- "vax_brand" - } - # - ####### + # Import Data ------------------------------------------------------------- - data_vax[,"vax_days"] <- as.integer( difftime( data_vax[,"vax_date"], as.Date("2020-08-31"), units="days")) - # sort per id, vax_days - data_vax <- data_vax[order(data_vax[,id],data_vax[,"vax_days"]),] - - # dap: - if(!any(ls()=="dap")) dap <- ifelse( any(tolower(names(data_vax))=="dap"), data_vax[1,tolower(names(data_vax))=="dap"], "" ) - if(dap=="" & any(tolower(names(data_vax))=="datasource")) dap <- data_vax[1,tolower(names(data_vax))=="datasource"] - - # calculate the 'days'-variables: - for(idate_vars in substring(names(data_vax),6)[substring(names(data_vax),1,5)=="date_"]) - data_vax[,paste0(idate_vars,"_days")] <- as.integer( difftime( data_vax[,paste0("date_",idate_vars)], as.Date("2020-08-31"), units="days")) - for(idate_vars in substring(names(data_vax),1,nchar(names(data_vax))-5)[substring(names(data_vax),nchar(names(data_vax))-4,nchar(names(data_vax)))=="_date"]) - data_vax[,paste0(idate_vars,"_days")] <- as.integer( difftime( as.Date(data_vax[,paste0(idate_vars,"_date")]), as.Date("2020-08-31"), units="days")) - names(data_vax)[names(data_vax)=="of_death_days"] <- "death_days" - - - # check: - tb<-table(data_vax$study_entry_days < data_vax$study_exit_days ) - - if(length(tb)>1){ - warning(paste("There are ",tb["FALSE"],"rows with 'study_entry_date' >= 'study_exit_date' !")) - print('"study_entry_days" < "study_exit_days":') - table(tb) - data_vax_excluded <- data_vax[data_vax$study_entry_days >= data_vax$study_exit_days,] - save(data_vax_excluded,file=paste0(sdr0,"excluded_rows.RData")) - sink(paste0(sdr0,"excluded_rows.txt")); print(data_vax_excluded);sink() - - data_vax <- data_vax[data_vax$study_entry_days < data_vax$study_exit_days,] - } - - cond <- !is.na(data_vax$study_entry_days) & ( is.na(data_vax$vax_days) | ( !is.na(data_vax$vax_days) & data_vax$study_entry_days <= data_vax$vax_days ) ) - if(any(!cond)) stop(paste( sum(cond), "rows with 'study_entry_days' > 'vax_days'")) - data_vax <- data_vax[cond,] + # Load dataset 'data_vax' by loading file 'data_vax_SCRI.RData' + load(paste0(dirtemp, "data_vax_SCRI", suffix[[subpop]], ".RData")) + if("pat_n" %in% names(data_vax)) id <- "pat_n" - ############# SCRI models ############################ - # - # - old_width = options(width=300) - - # - ######################################################## + gc() ######################################## # @@ -144,33 +68,6 @@ for (subpop in subpopulations_non_empty) { ######################################## - - ########################################## - # - # create variables: - # dose number: 'vax_n' (1,2,...) - # 'vax_number' ("dose 1" ,"dose 2", "dose 3", ...) - # - - #### 'vax_n': - data_vax <- data_vax[order(data_vax[,id],data_vax[,"vax_days"]),] - ivax <- 1; - data_vax$vax_n <- data_vax[,"vax_days"] - data_vax$vax_n[!is.na(data_vax$vax_n)] <- ivax - data_vax$vax_n[ is.na(data_vax$vax_n)] <- 0 - while(T){ - cond_vax_i <- data_vax$vax_n == ivax & !is.na(data_vax$vax_n) - cond2<-duplicated(data_vax[cond_vax_i,id]) - if(!any(cond2)) break - ivax <- ivax + 1 - data_vax$vax_n[cond_vax_i][cond2] <- ivax - } - #### 'vax_number': - data_vax$vax_number <- paste0("dose ",data_vax$vax_n," ") - #### 'vax_brand_short': - data_vax$vax_brand_short <- format(substring(data_vax[,"vax_brand"],1,5)) - - ########################################################################################## # # these parameters are used for all analyses: @@ -182,10 +79,13 @@ for (subpop in subpopulations_non_empty) { lprint = F, plot_during_running = F, leventplot = leventplot, + max_n_points = max_n_points, # ?1000 lplot = lplot, CI_draw = CI_draw, lforest = lforest, - col = col_list + lplots = T, + col = col_list, + performance = T ) # default in function 'define_rws': (vax2 takes precedence over vax1) the risk window of dose 2 takes precedence over the risk window of dose 1 @@ -195,12 +95,11 @@ for (subpop in subpopulations_non_empty) { print(iae) if(!(paste0(iae,"_days") %in% names(data_vax))) { cat(paste0('\nevent "',iae,'" not found.\n\n')); next } - - #if(!any(names(data_vax)==iae)) - data_vax[,iae] <- as.integer(!is.na(data_vax[,paste0(iae,"_days")])) - + if(lmain){ + print("main part") + # SCCS output_directory for the event: EXPORT sdr <- paste0(sdr0, iae,"/") dir.create(sdr, showWarnings = FALSE, recursive = TRUE) @@ -229,23 +128,23 @@ for (subpop in subpopulations_non_empty) { # # vax_name="vax_number": dose1, dose2, dose3, ... # - vax_def0 <- scri_data_parameters( data = data_vax, vax_name = "vax_number", vax_time = "vax_days", vax_date = "vax_date", - id = "person_id", start_obs = "study_entry_days", end_obs = "study_exit_days", censored_vars = "death_days" ) + vax_def0 <- scri_data_parameters( data = data_vax, vax_name = "vax_number", vax_time = "vax_days", vax_date = "vax_date", + id = "pat_n", start_obs = "study_entry_days", end_obs = "study_exit_days", censored_vars = "death_days" ) ################################################### # baseline tables - vax_def <- define_rws(vax_def0, cut_points_before = c(-90,-29,0), cut_points_after = c(0,1,29,62,182), cut_points_name="28d", data=data_vax ) - characteristics(data=data_vax, event=iae, path_file_name=paste0(sdr,"baseline.txt"), vax_name="vax_number", condition_value="", age="age_at_study_entry", lab_orders=vax_def$lab_orders ) + vax_def <- define_rws(vax_def0, cut_points_before = c(-90,-29,0), cut_points_after = c(0,1,29,62,182), cut_points_name="28d", no_last_interval_after=T, data=data_vax ) + #characteristics(data=data_vax, event=iae, path_file_name=paste0(sdr,"baseline.txt"), vax_name="vax_number", condition_value="", age="age_at_study_entry", lab_orders=vax_def$lab_orders ) ########### vax_number & no split ##### # ## cut_points_name="28d" : { [-91;-30], [-29;-1], [0;0], [1;28], [28;61], [62;181], >181 } - vax_def <- define_rws(vax_def0, cut_points_before = c(-90,-29,0), cut_points_after = c(0,1,29,62,182), cut_points_name="28d", data=data_vax ) - res <- scri( formula = "~ lab", vax_def = vax_def, data = data_vax, event_info=event_info, extra_parameters = extra_options, add_to_itself=F ) + vax_def <- define_rws(vax_def0, cut_points_before = c(-90,-29,0), cut_points_after = c(0,1,29,62,182), cut_points_name="28d", no_last_interval_after=T, data=data_vax ) + res <- scri( formula = "~ lab", vax_def = vax_def, data = data_vax, event_info=event_info, extra_parameters = extra_options, add_to_itself=F, lplots=F ) ## cut_points_name="7d" { [-91;-30], [-29;-1], [0;0], [1;7], [8;14], [15;21], [22;28] } vax_def <- define_rws(vax_def0, cut_points_before = c(-90,-29,0), cut_points_after = c(0,1,8,15,22,29), cut_points_name="7d", no_last_interval_after=T, data=data_vax ) - res <- scri( formula = "~ lab", vax_def = vax_def, data = data_vax, event_info=event_info, extra_parameters = extra_options, add_to_itself=T ) + res <- scri( formula = "~ lab", vax_def = vax_def, data = data_vax, event_info=event_info, extra_parameters = extra_options, add_to_itself=T, lplots=T ) ########### vax_number & brand ( no distance): ##### @@ -253,12 +152,12 @@ for (subpop in subpopulations_non_empty) { ## cut_points_name="28d" : { [-91;-30], [-29;-1], [0;0], [1;28], [28;61], [62;181] } vax_def <- define_rws(vax_def0, cut_points_before = c(-90,-29,0), cut_points_after = c(0,1,29,62,182), cut_points_name="28d", no_last_interval_after=T, data=data_vax, vax_dep = c( before="vax_brand_short" )) - res <- scri( formula = "~ brand:lab", vax_def = vax_def, data = data_vax, event_info=event_info, extra_parameters = extra_options, add_to_itself=F) + res <- scri( formula = "~ brand:lab", vax_def = vax_def, data = data_vax, event_info=event_info, extra_parameters = extra_options, add_to_itself=F, lplots=F) ## cut_points_name="7d" vax_def <- define_rws(vax_def0, cut_points_before = c(-90,-29,0), cut_points_after = c(0,1,8,15,22,29), cut_points_name="7d", no_last_interval_after=T, data=data_vax, vax_dep = c( before="vax_brand_short" )) - res <- scri( formula = "~ brand:lab", vax_def = vax_def, data = data_vax, event_info=event_info, extra_parameters = extra_options, add_to_itself=T ) + res <- scri( formula = "~ brand:lab", vax_def = vax_def, data = data_vax, event_info=event_info, extra_parameters = extra_options, add_to_itself=T, lplots=T ) @@ -267,17 +166,19 @@ for (subpop in subpopulations_non_empty) { # strata analyse with brand for: age30, age30_50, sex, sex_age30 # - data_vax$age30 <- paste0("age",as.character(cut(data_vax$age_at_study_entry, c(-1,30, Inf)))); data_vax$age30[ is.na(data_vax$age_at_study_entry)] <- NA - data_vax$age30_50 <- paste0("age",as.character(cut(data_vax$age_at_study_entry, c(-1,30,50,Inf)))); data_vax$age30_50[is.na(data_vax$age_at_study_entry)] <- NA - - if("o" %in% tolower(data_vax$sex)) data_vax$sex[tolower(data_vax$sex)=="o"]<-NA - data_vax$sexc <- paste0("sex:",data_vax$sex ); data_vax$sexc[ is.na(data_vax$sex) ] <- NA - data_vax$sex_age30 <- paste0("sex:",data_vax$sex, " ", data_vax$age30); data_vax$sex_age30[is.na(data_vax$sex) | is.na(data_vax$age30)] <- NA +# data_vax$age30 <- paste0("age",as.character(cut(data_vax$age_at_study_entry, c(-1,30, Inf)))); data_vax$age30[ is.na(data_vax$age_at_study_entry)] <- NA +# data_vax$age30_50 <- paste0("age",as.character(cut(data_vax$age_at_study_entry, c(-1,30,50,Inf)))); data_vax$age30_50[is.na(data_vax$age_at_study_entry)] <- NA +# +# if("o" %in% tolower(data_vax$sex)) data_vax$sex[tolower(data_vax$sex)=="o"]<-NA +# data_vax$sexc <- paste0("sex:",data_vax$sex ); data_vax$sexc[ is.na(data_vax$sex) ] <- NA +# data_vax$sex_age30 <- paste0("sex:",data_vax$sex, " ", data_vax$age30); data_vax$sex_age30[is.na(data_vax$sex) | is.na(data_vax$age30)] <- NA # strata variables: for( strata_var in c( "age30","age30_50", "sexc", "sex_age30") ){ + print(strata_var) + # values of the current strata variable strata_values <- unique(data_vax[,strata_var]); strata_values <- strata_values[!is.na(strata_values)] @@ -286,21 +187,36 @@ for (subpop in subpopulations_non_empty) { for(strata_value in strata_values){ + data_vax_strata <- data_vax[data_vax[,strata_var]==strata_value,] + + ########### vax_number & no split ##### + # + ## cut_points_name="28d" : { [-91;-30], [-29;-1], [0;0], [1;28], [28;61], [62;181], >181 } + vax_def <- define_rws(vax_def0, cut_points_before = c(-90,-29,0), cut_points_after = c(0,1,29,62,182), cut_points_name="28d", no_last_interval_after=T, data=data_vax ) + res <- scri( formula = "~ lab", vax_def = vax_def, data = data_vax_strata, strata_value=strata_value, use_all_events=F, + event_info=event_info, extra_parameters = extra_options, add_to_itself=F, lplots=F ) + + ## cut_points_name="7d" { [-91;-30], [-29;-1], [0;0], [1;7], [8;14], [15;21], [22;28] } + vax_def <- define_rws(vax_def0, cut_points_before = c(-90,-29,0), cut_points_after = c(0,1,8,15,22,29), cut_points_name="7d", no_last_interval_after=T, data=data_vax ) + res <- scri( formula = "~ lab", vax_def = vax_def, data = data_vax_strata, strata_value=strata_value, use_all_events=F, + event_info=event_info, extra_parameters = extra_options, add_to_itself=T, lplots=T ) + + ########### vax_number & brand ( no distance) per stratum ##### # ## cut_points_name="28d" : { [-91;-30], [-29;-1], [0;0], [1;28], [28;61], [62;181] } vax_def <- define_rws(vax_def0, cut_points_before = c(-90,-29,0), cut_points_after = c(0,1,29,62,182), cut_points_name="28d", no_last_interval_after=T, data=data_vax, vax_dep = c( before="vax_brand_short" )) # in models with calendar time intervals use only events from the current stratum: - res <- scri( formula = "~ brand:lab", vax_def = vax_def, data = data_vax, strata_var=strata_var, strata_value=strata_value, use_all_events=F, - event_info=event_info, extra_parameters = extra_options, add_to_itself=F ) - + res <- scri( formula = "~ brand:lab", vax_def = vax_def, data = data_vax_strata, strata_value=strata_value, use_all_events=F, + event_info=event_info, extra_parameters = extra_options, add_to_itself=F, lplots=F ) + ## cut_points_name="7d" vax_def <- define_rws(vax_def0, cut_points_before = c(-90,-29,0), cut_points_after = c(0,1,8,15,22,29), cut_points_name="7d", no_last_interval_after=T, data=data_vax, vax_dep = c( before="vax_brand_short" )) - res <- scri( formula = "~ brand:lab", vax_def = vax_def, data = data_vax, strata_var=strata_var, strata_value=strata_value, use_all_events=F, - event_info=event_info, extra_parameters = extra_options, add_to_itself=T ) - + res <- scri( formula = "~ brand:lab", vax_def = vax_def, data = data_vax_strata, strata_value=strata_value, use_all_events=F, + event_info=event_info, extra_parameters = extra_options, add_to_itself=T, lplots=T ) + } # end for strata_value gc() } # end for strata_var @@ -314,6 +230,8 @@ for (subpop in subpopulations_non_empty) { if(lcovid){ if(any(tolower(names(data_vax)) != "covid19_date") ){ + print("covid") + # SCCS output_subdirectory 'covid' in 'event' directory sdr_covid <- paste0(sdr, "covid","/") dir.create(sdr_covid, showWarnings = FALSE, recursive = TRUE) @@ -379,19 +297,19 @@ for (subpop in subpopulations_non_empty) { ################################################### # baseline tables - vax_def <- define_rws(vax_def0, cut_points_before = c(-90,-29,0), cut_points_after = c(0,1,29,62,182), cut_points_name="28d", data=data_vax ) - characteristics(data=data_vax, event=iae, path_file_name=paste0(sdr_covid,"baseline_",icovid,".txt"), vax_name="vax_number", condition_value=icovid, age="age_at_study_entry", lab_orders=vax_def$lab_orders ) + vax_def <- define_rws(vax_def0, cut_points_before = c(-90,-29,0), cut_points_after = c(0,1,29,62,182), cut_points_name="28d", no_last_interval_after=T, data=data_vax ) + #characteristics(data=data_vax, event=iae, path_file_name=paste0(sdr_covid,"baseline_",icovid,".txt"), vax_name="vax_number", condition_value=icovid, age="age_at_study_entry", lab_orders=vax_def$lab_orders ) ########### vax_number & no split ##### # ## cut_points_name="28d" : { [-91;-30], [-29;-1], [0;0], [1;28], [28;61], [62;181], >181 } - vax_def <- define_rws(vax_def0, cut_points_before = c(-90,-29,0), cut_points_after = c(0,1,29,62,182), cut_points_name="28d", data=data_vax ) + vax_def <- define_rws(vax_def0, cut_points_before = c(-90,-29,0), cut_points_after = c(0,1,29,62,182), cut_points_name="28d", no_last_interval_after=T, data=data_vax ) res <- scri( formula = "~ lab", vax_def = vax_def, data = data_vax, strata_var="covid_selection_name", strata_value=covid_value, use_all_events=F, - event_info=event_info, extra_parameters = extra_options_covid, add_to_itself=F ) + event_info=event_info, extra_parameters = extra_options_covid, add_to_itself=F, lplots=F ) ## cut_points_name="7d" vax_def <- define_rws(vax_def0, cut_points_before = c(-90,-29,0), cut_points_after = c(0,1,8,15,22,29), cut_points_name="7d", no_last_interval_after=T, data=data_vax ) res <- scri( formula = "~ lab", vax_def = vax_def, data = data_vax, strata_var="covid_selection_name", strata_value=covid_value, use_all_events=F, - event_info=event_info, extra_parameters = extra_options_covid, add_to_itself=T ) + event_info=event_info, extra_parameters = extra_options_covid, add_to_itself=T, lplots=T ) ########### vax_number & brand ( no distance): ##### @@ -400,12 +318,12 @@ for (subpop in subpopulations_non_empty) { vax_def <- define_rws(vax_def0, cut_points_before = c(-90,-29,0), cut_points_after = c(0,1,29,62,182), cut_points_name="28d", no_last_interval_after=T, data=data_vax, vax_dep = c( before="vax_brand_short" )) res <- scri( formula = "~ brand:lab", vax_def = vax_def, data = data_vax, strata_var="covid_selection_name", strata_value=covid_value, use_all_events=F, - event_info=event_info, extra_parameters = extra_options_covid, add_to_itself=F ) + event_info=event_info, extra_parameters = extra_options_covid, add_to_itself=F, lplots=F ) ## cut_points_name="7d" vax_def <- define_rws(vax_def0, cut_points_before = c(-90,-29,0), cut_points_after = c(0,1,8,15,22,29), cut_points_name="7d", no_last_interval_after=T, data=data_vax, vax_dep = c( before="vax_brand_short" )) res <- scri( formula = "~ brand:lab", vax_def = vax_def, data = data_vax, strata_var="covid_selection_name", strata_value=covid_value, use_all_events=F, - event_info=event_info, extra_parameters = extra_options_covid, add_to_itself=T ) + event_info=event_info, extra_parameters = extra_options_covid, add_to_itself=T, lplots=T ) } # end covid 'for' @@ -425,28 +343,15 @@ for (subpop in subpopulations_non_empty) { # if(ldist){ + print("additional") ########################################## # # create additional variables: # - if( !all( c("dist","vax_name","type_with_prev","type_history","type_history_sorted") %in% names(data_vax)) ){ - #### 'dist' - data_vax$dist <- c(NA, diff(data_vax[,"vax_days"]) ) - data_vax$dist[-1][ data_vax[-1,id]!=data_vax[-nrow(data_vax),id] ] <- NA - #### 'dist_gt_60' - data_vax$dist_gt_60 <- c("<=60d"," >60d")[(data_vax$dist > 60) +1 ] - data_vax$dist_gt_60[is.na(data_vax$dist_gt_60)] <- "" - - #### 'vax_name' - data_vax$vax_name <- data_vax$vax_number - data_vax$vax_name[ data_vax$vax_name==paste0("dose ",1," ") ] <- "dose 1.1 " - data_vax$vax_name[ data_vax$vax_name==paste0("dose ",2," ") ] <- "dose 1.2 " - data_vax$vax_name[ data_vax$vax_name==paste0("dose ",3," ") ] <- "booster 1" - data_vax$vax_name[ data_vax$vax_name==paste0("dose ",4," ") ] <- "boost1or2" - data_vax$vax_name[ data_vax$vax_n==2 & data_vax$dist>60 ] <- "booster 1" + if( !all( c("type_with_prev","type_history","type_history_sorted") %in% names(data_vax)) ){ - # create variable with tow brands: from the previous and current doses : + # create variable with tow brands: from the previous and current doses : data_vax$type_with_prev <- format(data_vax[,"vax_brand_short"]) cond_prev_exists <- c( F, data_vax[-1,id] == data_vax[-nrow(data_vax),id] ) cond_next_exists <- c( data_vax[-nrow(data_vax),id] == data_vax[-1,id], F ) @@ -501,14 +406,14 @@ for (subpop in subpopulations_non_empty) { # # vax_name="vax_number": dose1, dose2, dose3, ... # - vax_def0 <- scri_data_parameters( data = data_vax, vax_name = "vax_number", vax_time = "vax_days", vax_date = "vax_date", - id = "person_id", start_obs = "study_entry_days", end_obs = "study_exit_days", censored_vars = "death_days" ) + vax_def0 <- scri_data_parameters( data = data_vax, vax_name = "vax_number", vax_time = "vax_days", vax_date = "vax_date", + id = "pat_n", start_obs = "study_entry_days", end_obs = "study_exit_days", censored_vars = "death_days" ) extra_options_dist$extra_name <- vax_def0$data_parameters$vax_name ################################################### # baseline tables #vax_def <- define_rws(vax_def0, cut_points_before = c(-90,-29,0), cut_points_after = c(0,1,29,62), cut_points_name="28d", no_last_interval_after=T, data=data_vax, vax_dep = c( after="dist_gt_60" ) ) - #characteristics(data=data_vax, event=iae, path_file_name=paste0(sdr_dist,"baseline_vax_number.txt"), vax_name="vax_number", condition_value=vax_def0$data_parameters$vax_name, age="age_at_study_entry", lab_orders=vax_def$lab_orders ) + ##characteristics(data=data_vax, event=iae, path_file_name=paste0(sdr_dist,"baseline_vax_number.txt"), vax_name="vax_number", condition_value=vax_def0$data_parameters$vax_name, age="age_at_study_entry", lab_orders=vax_def$lab_orders ) ########### vax_number & dist ##### # @@ -522,12 +427,12 @@ for (subpop in subpopulations_non_empty) { ## cut_points_name="28d" : { [-91;-30], [-29;-1], [0;0], [1;28], [28;61] } vax_def <- define_rws(vax_def0, cut_points_before = c(-90,-29,0), cut_points_after = c(0,1,29,62), cut_points_name="28d", no_last_interval_after=T, data=data_vax, vax_dep = c( before="vax_brand_short", after="dist_gt_60" )) - res <- scri( formula = "~ brand:lab", vax_def = vax_def, data = data_vax, event_info=event_info, extra_parameters = extra_options_dist, add_to_itself=F ) + res <- scri( formula = "~ brand:lab", vax_def = vax_def, data = data_vax, event_info=event_info, extra_parameters = extra_options_dist, add_to_itself=F, lplots=F ) ## cut_points_name="7d" vax_def <- define_rws(vax_def0, cut_points_before = c(-90,-29,0), cut_points_after = c(0,1,8,15,22,29), cut_points_name="7d", no_last_interval_after=T, data=data_vax, vax_dep = c( before="vax_brand_short", after="dist_gt_60" )) - res <- scri( formula = "~ brand:lab", vax_def = vax_def, data = data_vax, event_info=event_info, extra_parameters = extra_options_dist, add_to_itself=T ) + res <- scri( formula = "~ brand:lab", vax_def = vax_def, data = data_vax, event_info=event_info, extra_parameters = extra_options_dist, add_to_itself=T, lplots=T ) extra_options_dist$extra_name <- "" @@ -535,31 +440,35 @@ for (subpop in subpopulations_non_empty) { # # vax_name="vax_name": dose1.1, dose1.2, boost1, boost2, ... # - vax_def0 <- scri_data_parameters( data = data_vax, vax_name = "vax_name", vax_time = "vax_days", vax_date = "vax_date", - id = "person_id", start_obs = "study_entry_days", end_obs = "study_exit_days", censored_vars = "death_days" ) + vax_def0 <- scri_data_parameters( data = data_vax, vax_name = "vax_name", vax_time = "vax_days", vax_date = "vax_date", + id = "pat_n", start_obs = "study_entry_days", end_obs = "study_exit_days", censored_vars = "death_days" ) extra_options_dist$extra_name <- vax_def0$data_parameters$vax_name ################################################### # baseline tables - vax_def <- define_rws(vax_def0, cut_points_before = c(-90,-29,0), cut_points_after = c(0,1,29,62,182), cut_points_name="28d", data=data_vax ) - characteristics(data=data_vax, event=iae, path_file_name=paste0(sdr_dist,"baseline_vax_name.txt"), vax_name="vax_name", condition_value=vax_def0$data_parameters$vax_name, age="age_at_study_entry", lab_orders=vax_def$lab_orders ) + vax_def <- define_rws(vax_def0, cut_points_before = c(-90,-29,0), cut_points_after = c(0,1,29,62,182), cut_points_name="28d", no_last_interval_after=T, data=data_vax ) + #characteristics(data=data_vax, event=iae, path_file_name=paste0(sdr_dist,"baseline_vax_name.txt"), vax_name="vax_name", condition_value=vax_def0$data_parameters$vax_name, age="age_at_study_entry", lab_orders=vax_def$lab_orders ) ########### vax_name & no split ##### # ## cut_points_name="28d" : { [-91;-30], [-29;-1], [0;0], [1;28], [28;61], [62;181], >181 } - vax_def <- define_rws(vax_def0, cut_points_before = c(-90,-29,0), cut_points_after = c(0,1,29,62,182), cut_points_name="28d", data=data_vax ) - res <- scri( formula = "~ lab", vax_def = vax_def, data = data_vax, event_info=event_info, extra_parameters = extra_options_dist, add_to_itself=F ) + vax_def <- define_rws(vax_def0, cut_points_before = c(-90,-29,0), cut_points_after = c(0,1,29,62,182), cut_points_name="28d", no_last_interval_after=T, data=data_vax ) + res <- scri( formula = "~ lab", vax_def = vax_def, data = data_vax, event_info=event_info, extra_parameters = extra_options_dist, add_to_itself=F, lplots=F ) + + ## cut_points_name="7d" : + vax_def <- define_rws(vax_def0, cut_points_before = c(-90,-29,0), cut_points_after = c(0,1,8,15,22,29), cut_points_name="7d", no_last_interval_after=T, data=data_vax ) + res <- scri( formula = "~ lab", vax_def = vax_def, data = data_vax, event_info=event_info, extra_parameters = extra_options_dist, add_to_itself=T, lplots=T ) ########### vax_name & brand ( wihout distance ): ##### # ## cut_points_name="28d" : { [-91;-30], [-29;-1], [0;0], [1;28], [28;61], [62;181] } vax_def <- define_rws(vax_def0, cut_points_before = c(-90,-29,0), cut_points_after = c(0,1,29,62,182), cut_points_name="28d", no_last_interval_after=T, data=data_vax, vax_dep = c( before="vax_brand_short" )) - res <- scri( formula = "~ brand:lab", vax_def = vax_def, data = data_vax, event_info=event_info, extra_parameters = extra_options_dist, add_to_itself=F ) + res <- scri( formula = "~ brand:lab", vax_def = vax_def, data = data_vax, event_info=event_info, extra_parameters = extra_options_dist, add_to_itself=F, lplots=F ) ## cut_points_name="7d" vax_def <- define_rws(vax_def0, cut_points_before = c(-90,-29,0), cut_points_after = c(0,1,8,15,22,29), cut_points_name="7d", no_last_interval_after=T, data=data_vax, vax_dep = c( before="vax_brand_short" )) - res <- scri( formula = "~ brand:lab", vax_def = vax_def, data = data_vax, event_info=event_info, extra_parameters = extra_options_dist, add_to_itself=T ) + res <- scri( formula = "~ brand:lab", vax_def = vax_def, data = data_vax, event_info=event_info, extra_parameters = extra_options_dist, add_to_itself=T, lplots=T ) ########### vax_name & brand wih distance: ##### @@ -567,12 +476,12 @@ for (subpop in subpopulations_non_empty) { ## cut_points_name="28d" : { [-91;-30], [-29;-1], [0;0], [1;28], [28;61] } vax_def <- define_rws(vax_def0, cut_points_before = c(-90,-29,0), cut_points_after = c(0,1,29,62), cut_points_name="28d", no_last_interval_after=T, data=data_vax, vax_dep = c( before="vax_brand_short", after="dist_gt_60" )) - res <- scri( formula = "~ brand:lab", vax_def = vax_def, data = data_vax, event_info=event_info, extra_parameters = extra_options_dist, add_to_itself=F ) + res <- scri( formula = "~ brand:lab", vax_def = vax_def, data = data_vax, event_info=event_info, extra_parameters = extra_options_dist, add_to_itself=F, lplots=F ) ## cut_points_name="7d" vax_def <- define_rws(vax_def0, cut_points_before = c(-90,-29,0), cut_points_after = c(0,1,8,15,22,29), cut_points_name="7d", no_last_interval_after=T, data=data_vax, vax_dep = c( before="vax_brand_short", after="dist_gt_60" )) - res <- scri( formula = "~ brand:lab", vax_def = vax_def, data = data_vax, event_info=event_info, extra_parameters = extra_options_dist, add_to_itself=T ) + res <- scri( formula = "~ brand:lab", vax_def = vax_def, data = data_vax, event_info=event_info, extra_parameters = extra_options_dist, add_to_itself=T, lplots=T ) extra_options_dist$extra_name <- "" @@ -583,8 +492,8 @@ for (subpop in subpopulations_non_empty) { # for combination of brands (historical) # # - vax_def0 <- scri_data_parameters( data = data_vax, vax_name = "vax_number", vax_time = "vax_days", vax_date = "vax_date", - id = "person_id", start_obs = "study_entry_days", end_obs = "study_exit_days", censored_vars = "death_days" ) + vax_def0 <- scri_data_parameters( data = data_vax, vax_name = "vax_number", vax_time = "vax_days", vax_date = "vax_date", + id = "pat_n", start_obs = "study_entry_days", end_obs = "study_exit_days", censored_vars = "death_days" ) extra_options_dist$extra_name <- vax_def0$data_parameters$vax_name ########### vax_number & combination of previous and current brand : ##### @@ -593,7 +502,7 @@ for (subpop in subpopulations_non_empty) { vax_def <- define_rws(vax_def0, cut_points_before = c(-90,-29,0), cut_points_after = c(0,29), cut_points_name="28d", no_last_interval_after=T, data=data_vax, vax_dep = c( before="type_with_prev" )) # without formula: - res <- try( scri( formula = "~ brand:lab", vax_def = vax_def, data = data_vax, event_info=event_info, extra_parameters = extra_options_dist, add_to_itself=F ) ) + res <- try( scri( formula = "~ brand:lab", vax_def = vax_def, data = data_vax, event_info=event_info, extra_parameters = extra_options_dist, add_to_itself=F, lplots=T ) ) if(class(res)[[1]]== "try-error") forest_plots_tab(res[[1]][[1]][[1]]) ########### vax_number & brand history sorted, or ignore order, i.e. you don't know which one was the first, which one was the second, ... ##### @@ -601,7 +510,7 @@ for (subpop in subpopulations_non_empty) { ## cut_points_name="28d" : { [-91;-30], [-29;-1], [0;28] } vax_def <- define_rws(vax_def0, cut_points_before = c(-90,-29,0), cut_points_after = c(0,29), cut_points_name="28d", no_last_interval_after=T, data=data_vax, vax_dep = c( before="type_history_sorted" )) - res <- try( scri( formula = "~ brand:lab", vax_def = vax_def, data = data_vax, event_info=event_info, extra_parameters = extra_options_dist, add_to_itself=F ) ) + res <- try( scri( formula = "~ brand:lab", vax_def = vax_def, data = data_vax, event_info=event_info, extra_parameters = extra_options_dist, add_to_itself=F, lplots=T ) ) ########### vax_number & brand history ##### @@ -609,7 +518,7 @@ for (subpop in subpopulations_non_empty) { ## cut_points_name="28d" : { [-91;-30], [-29;-1], [0;28] } vax_def <- define_rws(vax_def0, cut_points_before = c(-90,-29,0), cut_points_after = c(0,29), cut_points_name="28d", no_last_interval_after=T, data=data_vax, vax_dep = c( before="type_history" )) - res <- try( scri( formula = "~ brand:lab", vax_def = vax_def, data = data_vax, event_info=event_info, extra_parameters = extra_options, add_to_itself=F ) ) + res <- try( scri( formula = "~ brand:lab", vax_def = vax_def, data = data_vax, event_info=event_info, extra_parameters = extra_options, add_to_itself=F, lplots=T ) ) extra_options_dist$extra_name <- "" @@ -620,18 +529,10 @@ for (subpop in subpopulations_non_empty) { # # analyse for "age30","age30_50","sexc","sex_age30" # - if( !all( c("age30","age30_50","sexc","sex_age30") %in% names(data_vax)) ){ - data_vax$age30 <- paste0("age",as.character(cut(data_vax$age_at_study_entry, c(-1,30, Inf)))); data_vax$age30[ is.na(data_vax$age_at_study_entry)] <- NA - data_vax$age30_50 <- paste0("age",as.character(cut(data_vax$age_at_study_entry, c(-1,30,50,Inf)))); data_vax$age30_50[is.na(data_vax$age_at_study_entry)] <- NA - - if("o" %in% tolower(data_vax$sex)) data_vax$sex[tolower(data_vax$sex)=="o"]<-NA - data_vax$sexc <- paste0("sex:",data_vax$sex ); data_vax$sexc[ is.na(data_vax$sex) ] <- NA - data_vax$sex_age30 <- paste0("sex:",data_vax$sex, " ", data_vax$age30); data_vax$sex_age30[is.na(data_vax$sex) | is.na(data_vax$age30)] <- NA - } # strata variables: for( strata_var in c( "age30","age30_50", "sexc", "sex_age30") ){ - + # SCCS output_subdirectory 'distance_combi' in 'event' directory sdr_dist_stratum <- paste0(sdr0, iae, "/distance_combi/",ifelse(strata_var%in%c("age30","age30_50"), "age", strata_var), "/" ) dir.create(sdr_dist_stratum, showWarnings = FALSE, recursive = TRUE) @@ -656,7 +557,7 @@ for (subpop in subpopulations_non_empty) { for(strata_value in strata_values){ - + data_vax_strata <- data_vax[data_vax[,strata_var]==strata_value,] ########################################################################################### ################################### event ############################################### @@ -668,8 +569,8 @@ for (subpop in subpopulations_non_empty) { # # vax_name="vax_number": dose1, dose2, dose3, ... # - vax_def0 <- scri_data_parameters( data = data_vax, vax_name = "vax_number", vax_time = "vax_days", vax_date = "vax_date", - id = "person_id", start_obs = "study_entry_days", end_obs = "study_exit_days", censored_vars = "death_days" ) + vax_def0 <- scri_data_parameters( data = data_vax, vax_name = "vax_number", vax_time = "vax_days", vax_date = "vax_date", + id = "pat_n", start_obs = "study_entry_days", end_obs = "study_exit_days", censored_vars = "death_days" ) extra_options_dist$extra_name <- vax_def0$data_parameters$vax_name ########### vax_number & dist ##### @@ -677,22 +578,27 @@ for (subpop in subpopulations_non_empty) { ## cut_points_name="28d" : { [-91;-30], [-29;-1], [0;0], [1;28], [28;61] } vax_def <- define_rws(vax_def0, cut_points_before = c(-90,-29,0), cut_points_after = c(0,1,29,62), cut_points_name="28d", no_last_interval_after=T, data=data_vax, vax_dep = c( after="dist_gt_60" ) ) - res <- scri( formula = "~ lab", vax_def = vax_def, data = data_vax, strata_var=strata_var, strata_value=strata_value, - event_info=event_info, extra_parameters = extra_options_dist, add_to_itself=F ) + res <- scri( formula = "~ lab", vax_def = vax_def, data = data_vax_strata, strata_value=strata_value, + event_info=event_info, extra_parameters = extra_options_dist, add_to_itself=F, lplots=F ) + ## cut_points_name="7d" + vax_def <- define_rws(vax_def0, cut_points_before = c(-90,-29,0), cut_points_after = c(0,1,8,15,22,29), cut_points_name="7d", no_last_interval_after=T, + data=data_vax, vax_dep = c( after="dist_gt_60" ) ) + res <- scri( formula = "~ lab", vax_def = vax_def, data = data_vax_strata, strata_value=strata_value, + event_info=event_info, extra_parameters = extra_options_dist, add_to_itself=T, lplots=T ) ########### vax_number & brand wih distance: ##### # ## cut_points_name="28d" : { [-91;-30], [-29;-1], [0;0], [1;28], [28;61] } vax_def <- define_rws(vax_def0, cut_points_before = c(-90,-29,0), cut_points_after = c(0,1,29,62), cut_points_name="28d", no_last_interval_after=T, data=data_vax, vax_dep = c( before="vax_brand_short", after="dist_gt_60" )) - res <- scri( formula = "~ brand:lab", vax_def = vax_def, data = data_vax, strata_var=strata_var, strata_value=strata_value, - event_info=event_info, extra_parameters = extra_options_dist, add_to_itself=F ) + res <- scri( formula = "~ brand:lab", vax_def = vax_def, data = data_vax_strata, strata_value=strata_value, + event_info=event_info, extra_parameters = extra_options_dist, add_to_itself=F, lplots=F ) ## cut_points_name="7d" vax_def <- define_rws(vax_def0, cut_points_before = c(-90,-29,0), cut_points_after = c(0,1,8,15,22,29), cut_points_name="7d", no_last_interval_after=T, data=data_vax, vax_dep = c( before="vax_brand_short", after="dist_gt_60" )) - res <- scri( formula = "~ brand:lab", vax_def = vax_def, data = data_vax, strata_var=strata_var, strata_value=strata_value, - event_info=event_info, extra_parameters = extra_options_dist, add_to_itself=T ) + res <- scri( formula = "~ brand:lab", vax_def = vax_def, data = data_vax_strata, strata_value=strata_value, + event_info=event_info, extra_parameters = extra_options_dist, add_to_itself=T, lplots=T ) extra_options_dist$extra_name <- "" @@ -701,30 +607,35 @@ for (subpop in subpopulations_non_empty) { # # vax_name="vax_name": dose1.1, dose1.2, boost1, boost2, ... # - vax_def0 <- scri_data_parameters( data = data_vax, vax_name = "vax_name", vax_time = "vax_days", vax_date = "vax_date", - id = "person_id", start_obs = "study_entry_days", end_obs = "study_exit_days", censored_vars = "death_days" ) + vax_def0 <- scri_data_parameters( data = data_vax, vax_name = "vax_name", vax_time = "vax_days", vax_date = "vax_date", + id = "pat_n", start_obs = "study_entry_days", end_obs = "study_exit_days", censored_vars = "death_days" ) extra_options_dist$extra_name <- vax_def0$data_parameters$vax_name ########### vax_name & no split ##### # ## cut_points_name="28d" : { [-91;-30], [-29;-1], [0;0], [1;28], [28;61], [62;181], >181 } - vax_def <- define_rws(vax_def0, cut_points_before = c(-90,-29,0), cut_points_after = c(0,1,29,62,182), cut_points_name="28d", data=data_vax ) - res <- scri( formula = "~ lab", vax_def = vax_def, data = data_vax, strata_var=strata_var, strata_value=strata_value, - event_info=event_info, extra_parameters = extra_options_dist, add_to_itself=F ) + vax_def <- define_rws(vax_def0, cut_points_before = c(-90,-29,0), cut_points_after = c(0,1,29,62,182), cut_points_name="28d", no_last_interval_after=T, data=data_vax ) + res <- scri( formula = "~ lab", vax_def = vax_def, data = data_vax_strata, strata_value=strata_value, + event_info=event_info, extra_parameters = extra_options_dist, add_to_itself=F, lplots=F ) + + ## cut_points_name="7d" + vax_def <- define_rws(vax_def0, cut_points_before = c(-90,-29,0), cut_points_after = c(0,1,8,15,22,29), cut_points_name="^7d", no_last_interval_after=T, data=data_vax ) + res <- scri( formula = "~ lab", vax_def = vax_def, data = data_vax_strata, strata_value=strata_value, + event_info=event_info, extra_parameters = extra_options_dist, add_to_itself=T, lplots=T ) ########### vax_name & brand ( wihout distance ): ##### # ## cut_points_name="28d" : { [-91;-30], [-29;-1], [0;0], [1;28], [28;61], [62;181] } vax_def <- define_rws(vax_def0, cut_points_before = c(-90,-29,0), cut_points_after = c(0,1,29,62,182), cut_points_name="28d", no_last_interval_after=T, data=data_vax, vax_dep = c( before="vax_brand_short" )) - res <- scri( formula = "~ brand:lab", vax_def = vax_def, data = data_vax, strata_var=strata_var, strata_value=strata_value, - event_info=event_info, extra_parameters = extra_options_dist, add_to_itself=F ) + res <- scri( formula = "~ brand:lab", vax_def = vax_def, data = data_vax_strata, strata_value=strata_value, + event_info=event_info, extra_parameters = extra_options_dist, add_to_itself=F, lplots=F ) ## cut_points_name="7d" vax_def <- define_rws(vax_def0, cut_points_before = c(-90,-29,0), cut_points_after = c(0,1,8,15,22,29), cut_points_name="7d", no_last_interval_after=T, data=data_vax, vax_dep = c( before="vax_brand_short" )) - res <- scri( formula = "~ brand:lab", vax_def = vax_def, data = data_vax, strata_var=strata_var, strata_value=strata_value, - event_info=event_info, extra_parameters = extra_options_dist, add_to_itself=T ) + res <- scri( formula = "~ brand:lab", vax_def = vax_def, data = data_vax_strata, strata_value=strata_value, + event_info=event_info, extra_parameters = extra_options_dist, add_to_itself=T, lplots=T ) ########### vax_name & brand wih distance: ##### @@ -732,14 +643,14 @@ for (subpop in subpopulations_non_empty) { ## cut_points_name="28d" : { [-91;-30], [-29;-1], [0;0], [1;28], [28;61] } vax_def <- define_rws(vax_def0, cut_points_before = c(-90,-29,0), cut_points_after = c(0,1,29,62), cut_points_name="28d", no_last_interval_after=T, data=data_vax, vax_dep = c( before="vax_brand_short", after="dist_gt_60" )) - res <- scri( formula = "~ brand:lab", vax_def = vax_def, data = data_vax, strata_var=strata_var, strata_value=strata_value, - event_info=event_info, extra_parameters = extra_options_dist, add_to_itself=F ) + res <- scri( formula = "~ brand:lab", vax_def = vax_def, data = data_vax_strata, strata_value=strata_value, + event_info=event_info, extra_parameters = extra_options_dist, add_to_itself=F, lplots=F ) ## cut_points_name="7d" vax_def <- define_rws(vax_def0, cut_points_before = c(-90,-29,0), cut_points_after = c(0,1,8,15,22,29), cut_points_name="7d", no_last_interval_after=T, data=data_vax, vax_dep = c( before="vax_brand_short", after="dist_gt_60" )) - res <- scri( formula = "~ brand:lab", vax_def = vax_def, data = data_vax, strata_var=strata_var, strata_value=strata_value, - event_info=event_info, extra_parameters = extra_options_dist, add_to_itself=T ) + res <- scri( formula = "~ brand:lab", vax_def = vax_def, data = data_vax_strata, strata_value=strata_value, + event_info=event_info, extra_parameters = extra_options_dist, add_to_itself=T, lplots=T ) extra_options_dist$extra_name <- "" @@ -750,8 +661,8 @@ for (subpop in subpopulations_non_empty) { # for combination of brands (historical) # # - vax_def0 <- scri_data_parameters( data = data_vax, vax_name = "vax_number", vax_time = "vax_days", vax_date = "vax_date", - id = "person_id", start_obs = "study_entry_days", end_obs = "study_exit_days", censored_vars = "death_days" ) + vax_def0 <- scri_data_parameters( data = data_vax, vax_name = "vax_number", vax_time = "vax_days", vax_date = "vax_date", + id = "pat_n", start_obs = "study_entry_days", end_obs = "study_exit_days", censored_vars = "death_days" ) extra_options_dist$extra_name <- vax_def0$data_parameters$vax_name ########### vax_number & combination of previous and current brand : ##### @@ -760,8 +671,8 @@ for (subpop in subpopulations_non_empty) { vax_def <- define_rws(vax_def0, cut_points_before = c(-90,-29,0), cut_points_after = c(0,29), cut_points_name="28d", no_last_interval_after=T, data=data_vax, vax_dep = c( before="type_with_prev" )) # without formula: - res <- try( scri( formula = "~ brand:lab", vax_def = vax_def, data = data_vax, strata_var=strata_var, strata_value=strata_value, - event_info=event_info, extra_parameters = extra_options_dist, add_to_itself=F ) ) + res <- try( scri( formula = "~ brand:lab", vax_def = vax_def, data = data_vax_strata, strata_value=strata_value, + event_info=event_info, extra_parameters = extra_options_dist, add_to_itself=F, lplots=T ) ) if(class(res)[[1]]== "try-error") forest_plots_tab(res[[1]][[1]][[1]]) ########### vax_number & brand history sorted, or ignore order, i.e. you don't know which one was the first, which one was the second, ... ##### @@ -769,8 +680,8 @@ for (subpop in subpopulations_non_empty) { ## cut_points_name="28d" : { [-91;-30], [-29;-1], [0;28] } vax_def <- define_rws(vax_def0, cut_points_before = c(-90,-29,0), cut_points_after = c(0,29), cut_points_name="28d", no_last_interval_after=T, data=data_vax, vax_dep = c( before="type_history_sorted" )) - res <- try( scri( formula = "~ brand:lab", vax_def = vax_def, data = data_vax, strata_var=strata_var, strata_value=strata_value, - event_info=event_info, extra_parameters = extra_options_dist, add_to_itself=F ) ) + res <- try( scri( formula = "~ brand:lab", vax_def = vax_def, data = data_vax_strata, strata_value=strata_value, + event_info=event_info, extra_parameters = extra_options_dist, add_to_itself=F, lplots=T ) ) ########### vax_number & brand history ##### @@ -778,8 +689,8 @@ for (subpop in subpopulations_non_empty) { ## cut_points_name="28d" : { [-91;-30], [-29;-1], [0;28] } vax_def <- define_rws(vax_def0, cut_points_before = c(-90,-29,0), cut_points_after = c(0,29), cut_points_name="28d", no_last_interval_after=T, data=data_vax, vax_dep = c( before="type_history" )) - res <- try( scri( formula = "~ brand:lab", vax_def = vax_def, data = data_vax, strata_var=strata_var, strata_value=strata_value, - event_info=event_info, extra_parameters = extra_options, add_to_itself=F ) ) + res <- try( scri( formula = "~ brand:lab", vax_def = vax_def, data = data_vax_strata, strata_value=strata_value, + event_info=event_info, extra_parameters = extra_options, add_to_itself=F, lplots=T ) ) extra_options_dist$extra_name <- "" @@ -798,4 +709,3 @@ for (subpop in subpopulations_non_empty) { } # end of 'subpop' loop - From 05d3d0e81673c3b73dccd8f3f364ea42e1793ca3 Mon Sep 17 00:00:00 2001 From: svetlanabelitser <93388849+svetlanabelitser@users.noreply.github.com> Date: Mon, 21 Nov 2022 09:21:34 +0100 Subject: [PATCH 08/10] max_n_points --- p_parameters/08_SCRI_parameters.R | 1 + 1 file changed, 1 insertion(+) diff --git a/p_parameters/08_SCRI_parameters.R b/p_parameters/08_SCRI_parameters.R index e3faf70e..5a02c537 100644 --- a/p_parameters/08_SCRI_parameters.R +++ b/p_parameters/08_SCRI_parameters.R @@ -65,6 +65,7 @@ SCRI_variables_vocabulary <- data.table(vac4eu = c("E_GOUT_COV", "C_MYOCARD_AESI n_cores = NA leventplot = T + max_n_points = 1000 lplot = T CI_draw = T lforest = T From 536f8d579fa6a2f6972ea44e9e5399a216760d83 Mon Sep 17 00:00:00 2001 From: svetlanabelitser <93388849+svetlanabelitser@users.noreply.github.com> Date: Mon, 21 Nov 2022 09:26:37 +0100 Subject: [PATCH 09/10] this should be run before 08_T5_10_scri --- p_steps/08_T5_10_scri_data_preparation.R | 236 +++++++++++++++++++++++ 1 file changed, 236 insertions(+) create mode 100644 p_steps/08_T5_10_scri_data_preparation.R diff --git a/p_steps/08_T5_10_scri_data_preparation.R b/p_steps/08_T5_10_scri_data_preparation.R new file mode 100644 index 00000000..5799e8fd --- /dev/null +++ b/p_steps/08_T5_10_scri_data_preparation.R @@ -0,0 +1,236 @@ +# Program Information ---------------------------------------------------- +# +# Program: step_08_..._scri_data_preparation.R +# Author: Svetlana Belitser +# Description: calls functions which runs an SCRI on specified datasets +# runs on all datasets in g_output/scri +# Requirements: +# dependencies: preceding steps, package "survival", "MASS", "qpdf", "metafor"" +# input: D3_study_population_SCRI +# output: data_vax_SCRI +# excluded_rows.RData +# excluded_rows_2.RData +# +# parameters: in 08_parameters_scri_inputs.R +# +# + +# Housekeeping ----------------------------------------------------------- +# install and load packages + +if(!any(ls()=="thisdir")) thisdir <- getwd() +if(!any(ls()=="dirtemp")) dirtemp <- paste0(thisdir,"/g_intermediate/") +if(!any(ls()=="diroutput")) diroutput <- paste0(thisdir,"/g_output/") + +# ensure required folders are created +dir.create(file.path(paste0(dirtemp, "scri")), showWarnings = FALSE, recursive = TRUE) +dir.create(file.path(paste0(thisdir, "/log_files/scri")), showWarnings = FALSE, recursive = TRUE) +dir.create(file.path(paste0(diroutput, "scri")), showWarnings = FALSE, recursive = TRUE) + + +for (subpop in subpopulations_non_empty) { + + # scri output_directory for export: + sdr0 <- paste0(direxpsubpop[[subpop]], "scri/") + dir.create(file.path(sdr0), showWarnings = FALSE, recursive = TRUE) + + # scri output_directory for models not for export: + sdr_models0 <- paste0(diroutput, "scri/") + if(length(subpopulations_non_empty)>1) sdr_models0 <- paste0(diroutput, "scri/",subpop,"/") + dir.create(sdr_models0, showWarnings = FALSE, recursive = TRUE) + + + cat(paste0('\n\t"',subpop,'":\n\n')) + + + memory.limit(10^13) + #detach("package:data.table", unload = TRUE) + + # max_number_doses <- 4 + + + + # Import Data ------------------------------------------------------------- + + # Load the D3_study_population_SCRI + load(paste0(dirtemp, "D3_study_population_SCRI", suffix[[subpop]], ".RData")) + scri_input <- as.data.frame(get(paste0("D3_study_population_SCRI", suffix[[subpop]]))) + rm(list = paste0("D3_study_population_SCRI", suffix[[subpop]])) + + # scri_input <- D3_study_population_SCRI + ################# + # create dataset 'data_vax' (with multiple rows per person) from 'scri_input' (with one row per person) + # + + # change the id variable: + scri_input$pat_n <- 1:nrow(scri_input) + scri_input[,id] <- NULL + id <- "pat_n" + + scri_input[,names(scri_input) %in% ae_events] <- NULL + + if(any(c("date_vax1","vax1_date") %in% names(scri_input))){ + date_var <- "date_vax"; type_var <- "type_vax" + if(any(c("vax1_date") %in% names(scri_input))) date_var <- "vax_date" + if(any(c("vax1_type") %in% names(scri_input))) type_var <- "vax_type" + scri_n_vax_var <- as.integer(substring(names(scri_input),9)[substring(names(scri_input),1,8)==date_var][1:max_number_doses]) # ==> 1,2,3 (from "data_vax1", "date_vax2", ...) + scri_n_vax_var <- scri_n_vax_var[!is.na(scri_n_vax_var)] + + for(ivax in scri_n_vax_var){ + print(Sys.time()) + data_tmp <- scri_input[, !substring(names(scri_input),1,8) %in% c(type_var,date_var) | + ( substring(names(scri_input),1,8) %in% c(type_var,date_var) & names(scri_input) %in% paste0(c(type_var,date_var),ivax) ) ] + names(data_tmp)[names(data_tmp)==paste0(date_var,ivax)] <- "vax_date" + names(data_tmp)[names(data_tmp)==paste0(type_var,ivax)] <- "vax_brand" + data_tmp$vax_n <- ivax + data_tmp <- data_tmp[!is.na(data_tmp$vax_date),c( names(data_tmp)[!names(data_tmp) %in% c("vax_brand","vax_date")], "vax_brand", "vax_date" )] + + if(ivax==1) data_vax <- data_tmp + else data_vax <- rbind.data.frame(data_vax, data_tmp) + gc() + } + print(Sys.time()) + data_tmp <- data_tmp[F,] + if("vax1_date" %in% names(scri_input)) data_tmp <- scri_input[is.na(scri_input[,"vax1_date"]),] + if("date_vax1" %in% names(scri_input)) data_tmp <- scri_input[is.na(scri_input[,"date_vax1"]),] + data_tmp$vax_n <- 0; data_tmp$vax_date <- rep(NA,nrow(data_tmp)); data_tmp$vax_brand <- rep(NA,nrow(data_tmp)) + if(nrow(data_tmp)>0) {gc(); data_vax <- rbind.data.frame(data_vax, data_tmp[,names(data_vax)]) } + data_vax <- data_vax[!(data_vax$vax_n>0 & is.na(data_vax$vax_date)),] + } else { + data_vax <- scri_input + names(data_vax)[names(data_vax) == "date_vax" ] <- "vax_date" + names(data_vax)[names(data_vax) %in% c("type_vax","vax_type")] <- "vax_brand" + } + rm(scri_input) + gc() + # + ####### + Sys.time() + + data_vax[,"vax_days"] <- as.integer( difftime( data_vax[,"vax_date"], as.Date("2020-08-31"), units="days")) + # sort per id, vax_days + data_vax <- data_vax[order(data_vax$pat_n,data_vax[,"vax_days"]),] + #data_vax <- data_vax[order(data_vax[,id],data_vax[,"vax_days"]),] + + # dap: + if(!any(ls()=="dap")) dap <- ifelse( any(tolower(names(data_vax))=="dap"), data_vax[1,tolower(names(data_vax))=="dap"], "" ) + if(dap=="" & any(tolower(names(data_vax))=="datasource")) dap <- data_vax[1,tolower(names(data_vax))=="datasource"] + + # calculate the 'days'-variables: + for(idate_vars in substring(names(data_vax),6)[substring(names(data_vax),1,5)=="date_"]) + data_vax[,paste0(idate_vars,"_days")] <- as.integer( difftime( data_vax[,paste0("date_",idate_vars)], as.Date("2020-08-31"), units="days")) + for(idate_vars in substring(names(data_vax),1,nchar(names(data_vax))-5)[substring(names(data_vax),nchar(names(data_vax))-4,nchar(names(data_vax)))=="_date"]) + data_vax[,paste0(idate_vars,"_days")] <- as.integer( difftime( as.Date(data_vax[,paste0(idate_vars,"_date")]), as.Date("2020-08-31"), units="days")) + names(data_vax)[names(data_vax)=="of_death_days"] <- "death_days" + + + ############# SCRI models ############################ + # + # + old_width = options(width=300) + + # check: + tb<-table((cond<-data_vax$study_entry_days < data_vax$study_exit_days )) + + if(length(tb)>1){ + warning(paste("There are ",tb["FALSE"],"rows with 'study_entry_date' >= 'study_exit_date' !")) + cat('\n"study_entry_days" < "study_exit_days":\n') + print(table(tb)) + cat(paste0('\n"study_entry_days" == "study_exit_days": ', sum(data_vax$study_entry_days == data_vax$study_exit_days,na.rm=T ),' rows\n\n')) + data_vax_excluded <- data_vax[!cond,] + save(data_vax_excluded,file=paste0(sdr0,"excluded_rows.RData")) + sink(paste0(sdr0,"excluded_rows.txt")); old_sink = options (width=300, max.print=99999 );print(data_vax_excluded);options(old_sink);sink() + rm(data_vax_excluded) + data_vax <- data_vax[cond,] + } + + cond <- !is.na(data_vax$study_entry_days) & ( is.na(data_vax$vax_days) | ( !is.na(data_vax$vax_days) & data_vax$study_entry_days <= data_vax$vax_days ) ) + if(any(!cond)){ + warning(paste( sum(!cond), "rows with 'study_entry_days' > 'vax_days'")) + data_vax_excluded_2 <- data_vax[!cond,] + save(data_vax_excluded_2,file=paste0(sdr0,"excluded_rows_2.RData")) + sink(paste0(sdr0,"excluded_rows_2.txt")); old_sink = options (width=300, max.print=99999 );print(data_vax_excluded_2);options(old_sink);sink() + rm(data_vax_excluded_2) + data_vax <- data_vax[cond,] + } + # + ######################################################## + + ########################################## + # + # create variables: + # dose number: 'vax_n' (1,2,...) + # 'vax_number' ("dose 1" ,"dose 2", "dose 3", ...) + # + gc() + #### 'vax_n': + data_vax <- data_vax[order(data_vax[,id],data_vax[,"vax_days"]),] + ivax <- 1; + data_vax[,"vax_n"] <- data_vax[,"vax_days"] + data_vax[!is.na(data_vax[,"vax_n"]),"vax_n"] <- ivax + data_vax[ is.na(data_vax[,"vax_n"]),"vax_n"] <- 0 + while(T){ + cond_vax_i <- data_vax$vax_n == ivax & !is.na(data_vax$vax_n) + cond2<-duplicated(data_vax[cond_vax_i,id]) + if(!any(cond2)) break + ivax <- ivax + 1 + data_vax[cond_vax_i,"vax_n"][cond2] <- ivax + } + #### 'vax_number': + data_vax$vax_number <- paste0("dose ",data_vax[,"vax_n"]," ") + + #### 'dist' + data_vax$dist <- c(NA, diff(data_vax[,"vax_days"]) ) + data_vax[ c(F, data_vax[-1,id]!=data_vax[-nrow(data_vax),id] ), "dist" ] <- NA + #### 'dist_gt_60' + data_vax$dist_gt_60 <- c("<=60d"," >60d")[(data_vax$dist > 60) +1 ] + data_vax[is.na(data_vax$dist_gt_60),"dist_gt_60"] <- "" + + #### 'vax_name' + data_vax[ data_vax$vax_n==1, "vax_name" ] <- "dose 1.1 " + data_vax[ data_vax$vax_n==2, "vax_name" ] <- "dose 1.2 " + data_vax[ data_vax$vax_n==3, "vax_name" ] <- "booster 1" + data_vax[ data_vax$vax_n==4, "vax_name" ] <- "boost2or3" + + data_vax[ data_vax$vax_n==2 & data_vax$dist>60, "vax_name" ] <- "booster 1" + data_vax[ c(F, data_vax[-1,"vax_n"]==3 & data_vax[-nrow(data_vax),"vax_name"]=="booster 1"), "vax_name" ] <- "booster 2" + data_vax[ c(F, data_vax[-1,"vax_n"]==4 & data_vax[-nrow(data_vax),"vax_name"]=="booster 2"), "vax_name" ] <- "booster 3" + + #### create 'vax_brand_short': + data_vax$vax_brand_short <- format(substring(data_vax[,"vax_brand"],1,5)) + + #################### + # add information about the first dose in separate variables: + data_v1 <- data_vax[!duplicated(data_vax[,id]),c(id,"vax_days","vax_number","vax_name","vax_date","vax_brand","vax_brand_short")] + names(data_v1)[-1] <- paste0(names(data_v1)[-1],"_v1") + gc() + print(Sys.time()) + data_vax <- merge.data.frame(data_vax,data_v1,by=id,all.x=T ) + + print(Sys.time() ) + + ####################################################################### + # + # strata analyse with brand for: age30, age30_50, sex, sex_age30 + # + + data_vax$age30 <- paste0("age",as.character(cut(data_vax$age_at_study_entry, c(-1,30, Inf)))); data_vax$age30[ is.na(data_vax$age_at_study_entry)] <- NA + data_vax$age30_50 <- paste0("age",as.character(cut(data_vax$age_at_study_entry, c(-1,30,50,Inf)))); data_vax$age30_50[is.na(data_vax$age_at_study_entry)] <- NA + + if("o" %in% tolower(data_vax$sex)) data_vax$sex[tolower(data_vax$sex)=="o"]<-NA + data_vax$sexc <- paste0("sex:",data_vax$sex ); data_vax$sexc[ is.na(data_vax$sex) ] <- NA + data_vax$sex_age30 <- paste0("sex:",data_vax$sex, " ", data_vax$age30); data_vax$sex_age30[is.na(data_vax$sex) | is.na(data_vax$age30)] <- NA + + + # Save dataset 'data_vax' in file 'data_vax_SCRI.RData': + save(data_vax, file=paste0(dirtemp, "data_vax_SCRI", suffix[[subpop]], ".RData")) + + gc() + + ########## + # restore options: + options(old_width) + + +} # end of 'subpop' loop + From b1353949e8ad4b18859d16d245a18953132ad730 Mon Sep 17 00:00:00 2001 From: svetlanabelitser <93388849+svetlanabelitser@users.noreply.github.com> Date: Mon, 21 Nov 2022 09:27:22 +0100 Subject: [PATCH 10/10] Update 08_T5_10_scri.R --- p_steps/08_T5_10_scri.R | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/p_steps/08_T5_10_scri.R b/p_steps/08_T5_10_scri.R index b78502ef..47e06159 100644 --- a/p_steps/08_T5_10_scri.R +++ b/p_steps/08_T5_10_scri.R @@ -164,15 +164,7 @@ for (subpop in subpopulations_non_empty) { ####################################################################### # # strata analyse with brand for: age30, age30_50, sex, sex_age30 - # - -# data_vax$age30 <- paste0("age",as.character(cut(data_vax$age_at_study_entry, c(-1,30, Inf)))); data_vax$age30[ is.na(data_vax$age_at_study_entry)] <- NA -# data_vax$age30_50 <- paste0("age",as.character(cut(data_vax$age_at_study_entry, c(-1,30,50,Inf)))); data_vax$age30_50[is.na(data_vax$age_at_study_entry)] <- NA -# -# if("o" %in% tolower(data_vax$sex)) data_vax$sex[tolower(data_vax$sex)=="o"]<-NA -# data_vax$sexc <- paste0("sex:",data_vax$sex ); data_vax$sexc[ is.na(data_vax$sex) ] <- NA -# data_vax$sex_age30 <- paste0("sex:",data_vax$sex, " ", data_vax$age30); data_vax$sex_age30[is.na(data_vax$sex) | is.na(data_vax$age30)] <- NA - + # # strata variables: for( strata_var in c( "age30","age30_50", "sexc", "sex_age30") ){