-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcellPhyPlotTree.R
More file actions
executable file
·104 lines (83 loc) · 2.67 KB
/
cellPhyPlotTree.R
File metadata and controls
executable file
·104 lines (83 loc) · 2.67 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#!/usr/bin/env Rscript
.getSourceDir <- function() {
cmdArgs <- commandArgs(trailingOnly=FALSE)
fileArg <- "--file="
match <- grep(fileArg, cmdArgs)
if (length(match) > 0) {
sourcedir <- dirname(gsub(fileArg, "", cmdArgs[match]))
}
return( sourcedir )
}
sourcedir <- .getSourceDir()
print(sourcedir)
# Load libraries
suppressPackageStartupMessages({
source(paste(sourcedir, "/cellPhyUtils.R",sep=""), chdir = T)
library(BSgenome.Hsapiens.NCBI.GRCh38)
library(VariantAnnotation)
ref_genome <- "BSgenome.Hsapiens.NCBI.GRCh38"
library(ref_genome, character.only = TRUE)
})
# get command arguments
args = commandArgs(trailingOnly = TRUE)
cellphydir = args[1]
vcf_file = args[2]
ptato_dir = args[3]
outgroup = args[4]
percent = args[5]
prefix=args[6]
# read in the PTATO VCFs
if (ptato_dir != "NONE") {
ptato_grl = GRangesList(xxxx) # HERE THE FILTERED PTATO VCFS SHOULD BE EXTRACTED FROM THE DIR AND PUT INTO ONE GRANGESLIST
} else {
ptato_grl = NA
}
# Set a percentage of cells that can be missed
if (percent != "NA"){
percent = as.numeric(percent)
} else{
percent = 0.4
}
# read in the SMuRF VCF
vcf = readVcf(vcf_file)
seqlevels(vcf, pruning.mode = 'tidy') = c(1:22, "X", "Y")
print(paste("This is outgroup: ", outgroup))
# If no VAF column is present create one
if (is.null(geno(vcf)$VAF)){
mydf <- data.frame(matrix(NA, nrow = nrow(vcf)))
# Calculate VAF per sample
for (sname in samples(header(vcf))) {
ad <- sapply(geno(vcf)$AD[,sname],"[[",2)
dp <- geno(vcf)$DP[,sname]
VAF <- ad/dp
mydf[[sname]] <- as.numeric(VAF)
}
# Add VAF to vcf
mydf <- mydf[-1]
geno(header(vcf))["VAF",] = list("1","Float","Varient Allele Frequency")
geno(vcf)$VAF <- mydf
}
# load the tree, all annotations (muts, boots), and filter end-branches
cell_phy_tree <- load_tree_with_info(
dir = cellphydir,
outgr = outgroup,
vcf = vcf,
ptato_grl = ptato_grl,
cellphy_rm_non1 = TRUE,
mutation_soure = 'percentage',
norm_pres_max = 0.95,
high_frac_min = 0.05,
min_frac_all = 0.1,
percent = percent,
prefix = prefix
)
# plot the tree
tree_plot = ggtree(cell_phy_tree, branch.length = 'branch_length') +
geom_nodelab(aes(label = n_boot), geom = 'label', color = 'grey50', fill = rgb(1,1,1,0.7), size = 2) +
geom_text(aes(x = branch, label = branch_length), color = 'black', nudge_y = -0.5, nudge_x = 1) +
geom_tiplab(size=3) +
coord_cartesian(clip = 'off') +
theme(plot.margin = unit(c(10,100,10,10), 'points'))
ggsave(plot = tree_plot, filename = paste0(cellphydir, "/CPW_Tree_", as.character(percent),".pdf"),
width = 10, height = 7)
saveRDS(cell_phy_tree, file = paste0(cellphydir, "/TreeObject", as.character(percent),".RDS"))