forked from theodorejperkins/RECAP
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathRECAP_Re-Mix.sh
executable file
·253 lines (215 loc) · 7.59 KB
/
RECAP_Re-Mix.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
#!/bin/bash
# ===============================================================
# RECAP Re-Mix
# RECAP is a wrapper algorithm that resamples ChIP-seq and control
# data to estimate and control for biases built into peak calling
# algorithms.
# The purpose of this script is to randomly re-mix ChIP-seq and
# control prior to peak calling analysis. A detailed explanation
# of the algorithm can be found under PUBLICATIONS.
#
# HISTORY:
# 03/11/2017 - v1.0.0 - First Creation
# 29/01/2018 - v1.0.1 - Minor commenting update
# 16/01/2019 - v1.0.2 - Better instructions
#
# CREDITS:
# RECAP was developed by Justin G. Chitpin and Theodore J. Perkins.
# Development of RECAP was carried out at the Ottawa Hospital
# Research Institute in the Perkins Lab.
#
# PUBLICATIONS:
# If you use RECAP, please cite the following paper:
# <INSERT PUBLICATION HERE>
#
# QUESTIONS:
# Please contact tperkins@ohri.ca
# ===============================================================
# ===============================================================
# Script version number
VERSION="1.0.2"
# Provide a variable with the location of this script.
SCRIPT_PATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
# Text display commands
bold=$(tput bold)
normal=$(tput sgr0)
# ===============================================================
# ===============================================================
# Set Flags
# Flags which can be overridden by user input.
# Default values are below
# ===============================================================
# source: https://www.gnu.org/software/coreutils/manual/html_node/Random-sources.html
function get_seeded_random() {
seed="$1"
openssl enc -aes-256-ctr -pass pass:"$seed" -nosalt \
</dev/zero 2>/dev/null
}
function mainRemix() {
####################### Begin Script Here #######################
#################################################################
start_time=`date +%s`
echo "##################################################"
echo "###### RECAP RE-MIX v$VERSION ######"
echo "##################################################"
echo ""
echo "Input directory (absolute path): $INPUT_DIR"
echo "Treatment library: $TREATMENT_NAME"
echo "Control library: $CONTROL_NAME"
echo "Output directory (absolute path): $OUTPUT_DIR"
echo "Re-mix method: $METHOD_NAME"
echo "Number of re-mixes: $BOOTSTRAP"
echo "Random seed: $SEED"
if [[ ! -d $INPUT_DIR ]]
then
echo -e "\nERROR: Input directory does not exist"
exit 1
fi
cd $INPUT_DIR
if [[ ! -e $TREATMENT_NAME || ! $TREATMENT_NAME == *.bed ]]
then
echo -e "\nERROR: Treatment bed file does not exist"
exit 1
elif [[ ! -e $CONTROL_NAME || ! $CONTROL_NAME == *.bed ]]
then
echo -e "\nERROR: Control bed file does not exist"
exit 1
elif [[ ! -d $OUTPUT_DIR ]]
then
echo -e "\nERROR: Output directory does not exist"
exit 1
elif [[ ! $BOOTSTRAP -gt 0 ]]
then
echo -e "\nERROR: Specify the number of re-mixes"
echo -e " Must be a natural number"
exit 1
fi
# Base names of treatment and control libraries
TREATMENT_NAME_BASE="${TREATMENT_NAME%.*}"
CONTROL_NAME_BASE="${CONTROL_NAME%.*}"
for (( BOOTSTRAP_COUNT=1; BOOTSTRAP_COUNT<=$BOOTSTRAP; BOOTSTRAP_COUNT++ ))
do
cd $INPUT_DIR
echo ""
echo "${bold}Starting Re-Mixing Procedure #$BOOTSTRAP_COUNT ${normal}"
echo "Creating directory for re-mix files:"
mkdir -p $OUTPUT_DIR/re-mix
echo "Check!"
echo "Concatenating treatment and control libraries"
cat $TREATMENT_NAME $CONTROL_NAME > $OUTPUT_DIR/re-mix/$TREATMENT_NAME_BASE.bootstrap_$BOOTSTRAP_COUNT.tmp
echo "Check!"
# If method is unequal
if [ $METHOD = 0 ]
then
echo "Unequal mixing method selected"
FIRST_LINES=$( wc -l < $TREATMENT_NAME )
LAST_LINES=$( wc -l < $CONTROL_NAME )
cd $OUTPUT_DIR/re-mix
# If method is equal
elif [ $METHOD = 1 ]
then
echo "Equal mixing method selected"
cd $OUTPUT_DIR/re-mix
COMBINED_LINES=$(wc -l < $TREATMENT_NAME_BASE.bootstrap_$BOOTSTRAP_COUNT.tmp)
FIRST_LINES=$(( COMBINED_LINES / 2 ))
LAST_LINES=$(( COMBINED_LINES - FIRST_LINES ))
fi
echo "Re-mixing..."
shuf --random-source=<(get_seeded_random $SEED) -o $TREATMENT_NAME_BASE.bootstrap_$BOOTSTRAP_COUNT.tmp < $TREATMENT_NAME_BASE.bootstrap_$BOOTSTRAP_COUNT.tmp
echo "Check!"
echo "Creating re-mixed treatment library"
head -n $FIRST_LINES $TREATMENT_NAME_BASE.bootstrap_$BOOTSTRAP_COUNT.tmp > $TREATMENT_NAME_BASE.bootstrap_$BOOTSTRAP_COUNT.bed
echo "Check!"
echo "Creating re-mixed control library"
tail -n $LAST_LINES $TREATMENT_NAME_BASE.bootstrap_$BOOTSTRAP_COUNT.tmp > $CONTROL_NAME_BASE.bootstrap_$BOOTSTRAP_COUNT.bed
echo "Check!"
echo "Deleting temporary files"
rm $TREATMENT_NAME_BASE.bootstrap_$BOOTSTRAP_COUNT.tmp
echo "Check!"
echo "Completed re-mix #$BOOTSTRAP_COUNT"
done
cd $INPUT_DIR
end_time=`date +%s`
echo RECAP RE-MIX execution time: `expr $end_time - $start_time`s.
#################################################################
######################## End Script Here ########################
}
#################### Begin Options and Usage ####################
# Print usage
usage() {
echo -n "
[Input directory] [Treatment file] [Control file]
[Output directory] [Re-mix method] [Bootstrap]
${bold}USAGE:${normal}
-i, --input Input file directory (absolute path)
-t, --treatment Treatment bed file
-c, --control Control bed file
-o, --output Output file directory (absolute path)
-m, --method Method of re-mixing (equal) or (unequal)
-b, --bootstrap Number of re-mixes
${bold}OPTIONS:${normal}
-h, --help Display this help and exit
"
}
# ===============================================================
# Iterate over options breaking --foo=bar into --foo bar
unset options
while (($#)); do
case $1 in
# If option is of type --foo=bar
--?*=*) options+=("${1%%=*}" "${1#*=}") ;;
# add --endopts for --
--) options+=(--endopts) ;;
# Otherwise, nothing special
*) options+=("$1") ;;
esac
shift
done
set -- "${options[@]}"
unset options
# ===============================================================
# ===============================================================
# Print help if no arguments or the incorrect number were passed.
[[ $# -eq 0 ]] && set -- "--help"
[[ $# -lt 6 ]] && set -- "--help"
# ===============================================================
# ===============================================================
# Read the options and set stuff
while [[ $1 = -?* ]]; do
case $1 in
-i|--input) shift; INPUT_DIR=${1} ;;
-t|--treatment) shift; TREATMENT_NAME=${1} ;;
-c|--control) shift; CONTROL_NAME=${1} ;;
-o|--output) shift; OUTPUT_DIR=${1} ;;
-m|--method) shift
if [ ${1} = 'equal' ]
then
METHOD=1
METHOD_NAME="Equal library sizes"
elif [ ${1} = 'unequal' ]
then
METHOD=0
METHOD_NAME="Default library sizes"
else
echo "ERROR: Invalid option"
exit 1
fi ;;
-b|--bootstrap) shift; BOOTSTRAP=${1} ;;
-s|--seed) shift; SEED=${1} ;;
-h|--help) usage >&2; exit 0 ;;
*) echo "ERROR: Bad argument ${1}" ; exit 1 ;;
esac
shift
done
# Store the remaining part as arguments.
args+=("$@")
# ===============================================================
##################### End Options and Usage #####################
# ===============================================================
# Set IFS to preferred implementation
IFS=$'\n\t'
# ===============================================================
# ===============================================================
# Run script
mainRemix
# ===============================================================