forked from YPARK/gtex-fqtl
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfigure5.factor.stat.R
executable file
·86 lines (60 loc) · 2.59 KB
/
figure5.factor.stat.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#!/usr/bin/env Rscript
argv <- commandArgs(trailingOnly = TRUE)
if(length(argv) < 4) q()
k.max <- as.integer(argv[1]) # e.g., k.max = 50
pip.cutoff <- as.numeric(argv[2]) # e.g., pip.cutoff = 95
stat1.file <- argv[3] # e.g., 'result/fig_stat1.pdf'
stat2.file <- argv[4] # e.g., 'result/fig_stat2.pdf'
if(k.max < 2) q()
library(ggplot2)
library(dplyr)
source('Util.R')
options(stringsAsFactors = FALSE)
merge.tabs <- function(...) {
do.call(rbind, lapply(..., read.table))
}
data.tab <- merge.tabs(glue('result/stat/chr', 1:22, '/', k.max, '/combined.txt.gz'))
colnames(data.tab) <- c('ensg', 'chr', 'tss',
'tis.idx', 'tis.names', 'tis.theta', 'tis.se', 'tis.lodds',
'snps', 'snp.theta', 'snp.se', 'snp.lodds', 'k', 'pip')
pip.cutoff <- pip.cutoff / 100
## Fig: histogram of #factors
count.tab <- data.tab %>%
filter(pip == pip.cutoff) %>%
group_by(ensg) %>%
summarize(num.factors = length(k))
s1.tab <- count.tab %>%
group_by(num.factors) %>%
summarize(count = length(num.factors))
summary(glm(data = s1.tab, formula = count ~ num.factors +1, family = Gamma(link = 'log')))
brk <- sort(unique(c(range(s1.tab$count), c(1, 10, 50, 100, 500, 1000))))
p1 <- ggplot(s1.tab, aes(x = num.factors, y = count)) +
theme_classic() +
geom_point() +
scale_x_continuous(breaks = seq(1,50)) +
scale_y_log10(breaks = brk) + xlab('number of factors in one gene') +
theme(axis.text = element_text(size=6))
pdf(file = stat1.file, width = 2.5, height = 3, useDingbats = FALSE)
print(p1)
dev.off()
## Fig: histogram of factor-factor covariance
s2.tab <- merge.tabs(glue('result/stat/chr', 1:22, '/', k.max, '/s2.txt.gz'))
colnames(s2.tab) <- c('k1', 'k2', 'z', 'num', 'denom',
's2', 'var', 'tr', 'p', 'pip')
s2.tab <- s2.tab %>% mutate(self = ifelse(k1 == k2, 'within', 'between'))
## Just take 95% quantiles and compare
s2.filter <- s2.tab %>% group_by(self) %>%
filter(num > quantile(num, 0.025), num < quantile(num, 0.975))
p2 <- ggplot(s2.filter, aes(y = num / sqrt(s2), x = self, color = self, fill = self)) +
theme_classic() +
geom_hline(yintercept = 0, color = 'gray60', lty = 'dashed') +
geom_violin() +
ylab('z-score') +
theme(legend.position = 'none',
axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
axis.title.x = element_blank())
p2 <- p2 + scale_color_manual(values = c('orange', 'gray40')) +
scale_fill_manual(values = c('orange', 'gray40'))
pdf(file = stat2.file, width = 2, height = 3, useDingbats = FALSE)
print(p2)
dev.off()