-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdevelopmental_target_count_heatmap_plotter.R
More file actions
108 lines (87 loc) · 6.19 KB
/
developmental_target_count_heatmap_plotter.R
File metadata and controls
108 lines (87 loc) · 6.19 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
library(data.table)
library(glmnet)
library(ggplot2)
library(gridExtra)
library(pheatmap)
library(viridis)
args <- commandArgs(trailingOnly = TRUE)
directory = args[1] # example: "/oak/stanford/groups/horence/Roozbeh/NOMAD_10x/runs/CCLE_all/"
############## reading in metadata and sample_conversion files #########################
#metadata = fread(metadata_file)
sample_name_id_conversion = fread(paste(directory,"/sample_name_to_id.mapping.txt",sep=""),header = FALSE)
names(sample_name_id_conversion) = c("sample_name","sample_id")
metadata = fread(paste(directory,"/metadata_modified.tsv",sep=""))
###############################################################
selected_anchors = fread(paste(directory,"result.after_correction.scores.tsv",sep=""),select = c("anchor", "effect_size_bin", "number_nonzero_samples", "most_freq_target_1", "cnt_most_freq_target_1", "most_freq_target_2", "cnt_most_freq_target_2","target_entropy"))
system(paste("mkdir ", directory,"/satc_sig_anchor",sep="")) # this is the directory for the supervised analysis files
##################################################################################
####### satc_dump for getting counts for the selected anchors ####################
write.table(selected_anchors$anchor,paste(directory, "/satc_sig_anchor/selected_anchors.txt",sep=""),sep="\t",row.names=FALSE,quote=FALSE,col.names = FALSE)
for (counter in 0:127){
system(paste("/oak/stanford/groups/horence/Roozbeh/software/R-NOMAD/bin/satc_dump --anchor_list ", paste(directory, "/satc_sig_anchor/selected_anchors.txt ", sep = ""), directory, "result_satc/bin", counter, ".satc ", directory, "/satc_sig_anchor/", "satcOut",counter,".tsv ", sep = ""))
}
# now below I read in satcout files generated by satc_dump
satcOut_files = list.files(path = paste(directory,"/satc_sig_anchor/",sep=""), pattern = "satcOut")
counts = data.table()
for (counter in 1:length(satcOut_files)) {
counts = rbind(counts, fread(paste(directory,"/satc_sig_anchor/",satcOut_files[counter], sep = "")))
print(counter)
}
setnames(counts,c("V1","V2","V3","V4"),c("sample_id","anchor","target","count"))
###################################################################################
#### now merge count file with metadata #################
metadata = merge(metadata,sample_name_id_conversion,all.x=T,all.y = F,by.x="sample_name",by.y="sample_name")
metadata[,sample_name:=gsub("-",".",sample_name),by=sample_name]
counts = merge(counts, metadata, all.x = TRUE, all.y = FALSE, by.x = "sample_id", by.y = "sample_id")
metadata[,sample_id:=NULL]
metadata[,Day:=gsub("D","",Day)]
metadata$Day = as.numeric(metadata$Day)
#########################################################
system(paste("mkdir ",directory,"/satc_sig_anchor/heatmap_plots/",sep=""))
for (counter in 1:nrow(selected_anchors)){
tryCatch({
anchor_interest = selected_anchors$anchor[counter]
effectsize_interest = selected_anchors$effect_size_bin[counter]
entropy_interest = selected_anchors$target_entropy[counter]
counts_anchor = counts[anchor==anchor_interest] # target counts for the selected anchor
counts_anchor[,target_count := sum(count),by=target] # compute total counts for each target
counts_anchor[,target_frac:=target_count/sum(counts_anchor$count)] # fraction of anchor read for each target
counts_anchor = counts_anchor[target_frac>0.001]
counts_anchor[,target:=paste(target,round(target_frac,digits =4),sep="_"),by=target]
counts_anchor$target = factor(counts_anchor$target,levels = counts_anchor[order(-target_frac),list(target,target_frac)][!duplicated(target)]$target) # sort targets based on their counts
counts_reshape = reshape(counts_anchor[,list(sample_name,target,count)], timevar="sample_name", idvar="target", direction="wide")
counts_reshape = counts_reshape[order(target)] # sort the rows of the reshaped datatable based on the marginal target counts
counts_reshape[is.na(counts_reshape)]=0
counts_reshape_df = data.frame(counts_reshape)
rownames(counts_reshape_df) = counts_reshape_df$target
counts_reshape_df = counts_reshape_df[,-1]
column_names = colnames(counts_reshape_df)
column_names = gsub("count.","",column_names)
colnames(counts_reshape_df) = column_names
sorting_dt = data.table(names(counts_reshape_df))
sorting_dt = merge(sorting_dt,metadata,all.x=T,all.y=F,by.x="V1",by.y="sample_name")
if (directory %like%"immune"){
sort_cols = c("Day","Treatment")
} else{
sort_cols = c("Day")
}
sorting_dt = sorting_dt[order(sorting_dt[, ..sort_cols])]
sorting_dt$Day = factor(sorting_dt$Day)
counts_reshape_df = counts_reshape_df[,sorting_dt$V1] ## sorting the columns of counts_reshape based on the way samples have been sorted based on Day and treatment
### now making the data frame needed for annotating columns
my_annotation_col = data.table(colnames(counts_reshape_df))
metadata$Day = factor(metadata$Day)
my_annotation_col = merge(my_annotation_col, metadata,all.x=T,all.y=F,by.x="V1",by.y="sample_name")
my_annotation_col$V1 = factor(my_annotation_col$V1,levels=colnames(counts_reshape_df))
setorder(my_annotation_col,V1)
test = my_annotation_col$V1
my_annotation_col[,V1:=NULL]
my_annotation_col = data.frame(my_annotation_col)
rownames(my_annotation_col) = test
g1=pheatmap(counts_reshape_df,cluster_rows = FALSE,cluster_cols = F, main = "Target Counts", show_colnames = FALSE,fontsize = 6, color = viridis(n = 256, alpha = 1, begin = 0, end = 1, option = "viridis"),annotation_col = my_annotation_col,show_rownames =T)
g2=pheatmap(t(t(counts_reshape_df) / colSums(counts_reshape_df)),cluster_rows = FALSE,cluster_cols = F, main = "Target Fractions", show_colnames = FALSE,fontsize = 6, color = viridis(n = 256, alpha = 1, begin = 0, end = 1, option = "viridis"),annotation_col = my_annotation_col,show_rownames =T)
pdf(file=paste(directory,"/satc_sig_anchor/heatmap_plots/","anchor_",anchor_interest,"_entropy_",entropy_interest,"_effectsize_",effectsize_interest,".pdf", sep = ""), width = 9, height = 6)
print(grid.arrange(grobs = list(g1[[4]], g2[[4]]),layout_matrix=rbind(c(1,1),c(2,2))))
dev.off()
},error=function(e){cat("ERROR :",conditionMessage(e), "\n")})
}