-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathFASTSTEP3R
executable file
·94 lines (74 loc) · 3.65 KB
/
FASTSTEP3R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#!/usr/bin/env Rscript
### FASTSTEP3R v1.0.1 (C) Max Planck Society for the Advancement of Science
###
### Code developed by Víctor Roces <https://github.com/RocesV/>
###
### This program is free software: you can redistribute it and/or modify
### it under the terms of the GNU General Public License as published by
### the Free Software Foundation, either version 3 of the License, or
### (at your option) any later version.
###
### This program is distributed in the hope that it will be useful,
### but WITHOUT ANY WARRANTY; without even the implied warranty of
### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
### GNU General Public License for more details.
library(optparse)
option_list = list(
make_option(c("-g", "--genelist"), type="character", default="tmp_gene_list",
help="Name of input tmp_genelist [default= %default]", metavar="file"),
make_option(c("-t", "--tmp"), type="character", default=NULL,
help="${TMP_PATH} directory to import and export results", metavar="path"),
make_option(c("-p", "--pattern"), type = "character", default="Diamond_F3R_",
help="Default pattern used to search ${DIAMONDOUT} splitted files in tmp directory [default= %default]", metavar="string"),
make_option(c("-c", "--cores"), type="integer", default=NULL,
help="Number of threads ", metavar="NUM")
);
opt_parser = OptionParser(option_list=option_list);
opt = parse_args(opt_parser);
if (is.null(opt$tmp)){
message ("")
message ("FASTSTEP3R: STEP 3 gene queries data.table implementation for GenEra")
message ("Generate per gene .bout files")
message ("This script is meant to be used as a replace of the grep command in the Erassignment script at the cost of more RAM and simultaneous tmp files")
message ("")
print_help(opt_parser)
stop("The ${TMP_PATH} directory must be supplied", call.=FALSE)
}
if (is.null(opt$cores)){
message ("")
message ("FASTSTEP3R: STEP 3 gene queries data.table implementation for GenEra")
message ("Generate per gene .bout files")
message ("This script is meant to be used as a replace of the grep command in the Erassignment script at the cost of more RAM and simultaneous tmp files")
message ("")
print_help(opt_parser)
stop("The ${THREADS} number must be supplied", call.=FALSE)
}
library(data.table)
# set the number of threads
setDTthreads(opt$cores)
# Import and indexing inputs
cat(paste0("--------------------------------------------------"), "\n")
cat(paste0("Importing and indexing files needed... | ", date(), "\n"))
## Import tmp_gene_list
tmp.genelist <- fread(paste0(opt$tmp,"/tmp_gene_list"), header = F, sep = "\t")
## Import ${DIAMONDOUT} splitted files and indexing for fast queries
Diamond.results <- list()
Split.files <- paste0(opt$tmp,"/",list.files(opt$tmp)[grep(opt$pattern, list.files(opt$tmp))])
for(i in 1:length(Split.files)){
Diamond.results[[i]] <- fread(Split.files[i], header = F, sep = "\t")
setkey(Diamond.results[[i]], V1, verbose = FALSE)
}
# Filtering and writting
cat(paste0("--------------------------------------------------"), "\n")
cat(paste0("Filtering and exporting single gen hits ... | ", date(), "\n"))
for(i in tmp.genelist$V1){
hits <- list()
for(j in 1:length(Diamond.results)){
hits[[j]] <- Diamond.results[[j]][V1 == i]
}
fwrite(rbindlist(hits, use.names = FALSE), file = paste0(opt$tmp, "/tmp_", i, ".bout"), sep = "\t", row.names = F, col.names = F, quote = F)
}
# Print goodbye message
cat(paste0("ANALYSIS COMPLETE !!! | ", date(), "\n"))
cat(paste0("Feed all these tmp files to Erassignment for saving a lot of time in STEP 3"), "\n")
cat(paste0("Thanks for using FASTSTEP3R"), "\n")