-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathGlobalFunction.sh
263 lines (239 loc) · 10.3 KB
/
GlobalFunction.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
#!/usr/bin/env bash
###### trap_add <command> <trap_name> ######
###### e.g. trap_add 'echo "in trap SIGINT"' SIGINT ######
log() { printf '%s\n' "$*"; }
error() { log "ERROR: $*" >&2; }
fatal() {
error "$@"
exit 1
}
trap_add() {
trap_add_cmd=$1
shift || fatal "trap_add usage error"
for trap_add_name in "$@"; do
trap -- "$(
# helper fn to get existing trap command from output
# of trap -p
extract_trap_cmd() { printf '%s\n' "$3"; }
# print existing trap command with newline
eval "extract_trap_cmd $(trap -p "${trap_add_name}")"
# print the new trap command
printf '%s;\n' "${trap_add_cmd}"
)" "${trap_add_name}" || fatal "unable to add to trap ${trap_add_name}"
done
}
declare -f -t trap_add
###### color_echo <color> <message> ######
color_echo() {
local color=$1
local text=$2
if [[ $color == "red" ]]; then
echo -e "\033[31m$text\033[0m"
elif [[ $color == "green" ]]; then
echo -e "\033[32m$text\033[0m"
elif [[ $color == "yellow" ]]; then
echo -e "\033[33m$text\033[0m"
elif [[ $color == "blue" ]]; then
echo -e "\033[34m$text\033[0m"
elif [[ $color == "purple" ]]; then
echo -e "\033[35m$text\033[0m"
fi
}
###### processbar <current> <total> <label> ######
processbar() {
local current=$1
local total=$2
local label=$3
local maxlen=60
local barlen=50
local format="%-${barlen}s%$((maxlen - barlen))s %s"
local perc="[$current/$total]"
local progress=$((current * barlen / total))
local prog=$(for i in $(seq 0 $progress); do printf '='; done)
printf "\r$format\n" "$prog" "$perc" "$label"
}
bar=0
###### check_logfile <sample> <tool> <logfile> <error_pattern> <complete_pattern> <mode>######
error_pattern="(error)|(fatal)|(corrupt)|(interrupt)|(EOFException)|(no such file or directory)"
complete_pattern="(NGSmodule finished the job)"
check_logfile() {
local sample=$1
local tool=$2
local logfile=$3
local error_pattern=$4
local complete_pattern=$5
local mode=$6
local status=$7
if [[ $status != 0 ]] && [[ $status != "" ]] && [[ $mode == "postcheck" ]]; then
color_echo "yellow" "[INFO] ${sample}: postcheck detected the non-zero exit status($status) for the ${tool}."
return 1
fi
if [[ -f $logfile ]]; then
error=$(grep -ioP "${error_pattern}" "${logfile}" | sort | uniq | paste -sd "|")
complete=$(grep -ioP "${complete_pattern}" "${logfile}" | sort | uniq | paste -sd "|")
if [[ $complete ]]; then
if [[ $mode == "precheck" ]]; then
color_echo "blue" "+++++ ${sample}: ${tool} skipped [precheck] +++++"
return 0
elif [[ $mode == "postcheck" ]]; then
color_echo "blue" "+++++ ${sample}: ${tool} done [postcheck] +++++"
echo -e "NGSmodule finished the job [${tool}]" >>"${logfile}"
return 0
fi
elif [[ $error ]]; then
if [[ $mode == "precheck" ]]; then
color_echo "yellow" "[INFO] ${sample}: precheck detected problems($error) in ${tool} logfile: ${logfile}. Restart ${tool}."
return 1
elif [[ $mode == "postcheck" ]]; then
color_echo "yellow" "[INFO] ${sample}: postcheck detected problems($error) in ${tool} logfile: ${logfile}."
return 1
fi
else
if [[ $mode == "precheck" ]]; then
color_echo "yellow" "[INFO] ${sample}: precheck unable to determine ${tool} status. Restart ${tool}."
return 1
elif [[ $mode == "postcheck" ]]; then
color_echo "blue" "+++++ ${sample}: ${tool} done with no problem [postcheck] +++++"
echo -e "NGSmodule finished the job [${tool}]" >>"${logfile}"
return 0
fi
fi
else
if [[ $mode == "precheck" ]]; then
color_echo "blue" "+++++ ${sample}: Start ${tool} [precheck] +++++"
return 1
elif [[ $mode == "postcheck" ]]; then
color_echo "yellow" "[INFO] ${sample}: postcheck cannot find the log file for the tool ${tool}: $logfile."
return 1
fi
fi
}
###### globalcheck_logfile "$dir" logfiles[@] "$force" "$error_pattern" "$complete_pattern" "$sample" ######
globalcheck_logfile() {
local dir="${1}"
local logfiles=("${!2}")
local force="${3}"
local error_pattern="${4}"
local complete_pattern="${5}"
local id="${6}"
find_par=$(printf -- " -o -name %s" "${logfiles[@]}")
find_par=${find_par:3}
existlogs=()
while IFS='' read -r line; do
existlogs+=("$line")
done < <(find "${dir}" $find_par)
if ((${#existlogs[*]} >= 1)); then
if [[ $force == "TRUE" ]]; then
color_echo "yellow" "[INFO] ${id}: Force to perform a complete workflow."
for log in "${existlogs[@]}"; do
rm -f "${log}"
done
else
for log in "${existlogs[@]}"; do
if [[ $(grep -iP "${error_pattern}" "${log}") ]] && [[ ! $(grep -iP "${complete_pattern}" "${log}") ]]; then
color_echo "yellow" "[INFO] ${id}: Detected uncompleted status from logfile: ${log}."
rm -f "${log}"
fi
done
fi
fi
}
fqCheck_SE() {
local sample="${1}"
local fq1="${2}"
local logfile="${3}"
pigz -t "$(realpath "${fq1}")" 2>/dev/null
if [[ $? != 0 ]]; then
echo -e "ERROR! fq1:${fq1} is not a completed .gz file.\n" >>"$logfile"
color_echo "yellow" "[INFO] $sample: fq1 is not a completed .gz file."
return 1
fi
fq1_nlines=$(unpigz -c "$fq1" | wc -l)
fq1_tail_line=$(unpigz -c "$fq1" | tail -n4)
fq1_tail_line1=$(echo "$fq1_tail_line" | sed -n '1p')
fq1_tail_line2=$(echo "$fq1_tail_line" | sed -n '2p')
fq1_tail_line3=$(echo "$fq1_tail_line" | sed -n '3p')
fq1_tail_line4=$(echo "$fq1_tail_line" | sed -n '4p')
fq1_tail_line2_len=$(echo "$fq1_tail_line2" | wc -c)
fq1_tail_line4_len=$(echo "$fq1_tail_line4" | wc -c)
echo -e "fq1_nlines:$fq1_nlines fq1_nreads:$((fq1_nlines / 4))" >"$logfile"
if [[ $((fq1_nlines % 4)) != 0 ]] || [[ $fq1_nlines == 0 ]]; then
echo -e "ERROR! fq1_nlines count is zero or not divisible by 4.\n" >>"$logfile"
color_echo "yellow" "[INFO] $sample: fq1_nlines is zero or not divisible by 4."
return 1
elif [[ ! $(echo "$fq1_tail_line1" | grep -P "^@") ]] || [[ ! $(echo "$fq1_tail_line3" | grep -P "^\+") ]] || [[ $fq1_tail_line2_len != $fq1_tail_line4_len ]] || [[ $fq1_tail_line2_len == 0 ]]; then
echo -e "ERROR! fq1_tail_line format is wrong:\n$fq1_tail_line\n" >>"$logfile"
color_echo "yellow" "[INFO] $sample: fq1_tail_line format is wrong."
return 1
else
echo -e "FastqCheck passed:\n$fq1.\n\n" >>"$logfile"
return 0
fi
}
fqCheck_PE() {
local sample="${1}"
local fq1="${2}"
local fq2="${3}"
local logfile="${4}"
pigz -t "$(realpath "${fq1}")" 2>/dev/null
if [[ $? != 0 ]]; then
echo -e "ERROR! fq1:${fq1} is not a completed .gz file.\n" >>"$logfile"
color_echo "yellow" "[INFO] $sample: fq1 is not a completed .gz file."
return 1
fi
pigz -t "$(realpath "${fq2}")" 2>/dev/null
if [[ $? != 0 ]]; then
echo -e "ERROR! fq2:${fq2} is not a completed .gz file.\n" >>"$logfile"
color_echo "yellow" "[INFO] $sample: fq2 is not a completed .gz file."
return 1
fi
fq1_nlines=$(unpigz -c "$fq1" | wc -l)
fq1_tail_line=$(unpigz -c "$fq1" | tail -n4)
fq1_tail_line1=$(echo "$fq1_tail_line" | sed -n '1p')
fq1_tail_line2=$(echo "$fq1_tail_line" | sed -n '2p')
fq1_tail_line3=$(echo "$fq1_tail_line" | sed -n '3p')
fq1_tail_line4=$(echo "$fq1_tail_line" | sed -n '4p')
fq1_tail_line2_len=$(echo "$fq1_tail_line2" | wc -c)
fq1_tail_line4_len=$(echo "$fq1_tail_line4" | wc -c)
fq2_nlines=$(unpigz -c "$fq2" | wc -l)
fq2_tail_line=$(unpigz -c "$fq2" | tail -n4)
fq2_tail_line1=$(echo "$fq2_tail_line" | sed -n '1p')
fq2_tail_line2=$(echo "$fq2_tail_line" | sed -n '2p')
fq2_tail_line3=$(echo "$fq2_tail_line" | sed -n '3p')
fq2_tail_line4=$(echo "$fq2_tail_line" | sed -n '4p')
fq2_tail_line2_len=$(echo "$fq2_tail_line2" | wc -c)
fq2_tail_line4_len=$(echo "$fq2_tail_line4" | wc -c)
echo -e "fq1_nlines:$fq1_nlines fq1_nreads:$((fq1_nlines / 4))\nfq2_nlines:$fq2_nlines fq2_nreads:$((fq2_nlines / 4))" >"$logfile"
if [[ $fq1_nlines != $fq2_nlines ]]; then
echo -e "ERROR! $sample has different numbers of reads between paired fastq.\n" >>"$logfile"
color_echo "yellow" "[INFO] $sample: has different numbers of reads between paired fastq."
return 1
elif [[ $((fq1_nlines % 4)) != 0 ]] || [[ $((fq2_nlines % 4)) != 0 ]] || [[ $fq1_nlines == 0 ]] || [[ $fq2_nlines == 0 ]]; then
echo -e "ERROR! fq1_nlines or fq2_nlines count is zero or not divisible by 4.\n" >>"$logfile"
color_echo "yellow" "[INFO] $sample: fq1_nlines or fq2_nlines count is zero or not divisible by 4."
return 1
elif [[ ! $(echo "$fq1_tail_line1" | grep -P "^@") ]] || [[ ! $(echo "$fq1_tail_line3" | grep -P "^\+") ]] || [[ $fq1_tail_line2_len != $fq1_tail_line4_len ]] || [[ $fq1_tail_line2_len == 0 ]]; then
echo -e "ERROR! fq1_tail_line format is wrong:\n$fq1_tail_line\n" >>"$logfile"
color_echo "yellow" "[INFO] $sample: fq1_tail_line format is wrong."
return 1
elif [[ ! $(echo "$fq2_tail_line1" | grep -P "^@") ]] || [[ ! $(echo "$fq2_tail_line3" | grep -P "^\+") ]] || [[ $fq2_tail_line2_len != $fq2_tail_line4_len ]] || [[ $fq2_tail_line2_len == 0 ]]; then
echo -e "ERROR! fq2_tail_line format is wrong:\n$fq2_tail_line\n" >>"$logfile"
color_echo "yellow" "[INFO] $sample: fq2_tail_line format is wrong."
return 1
else
echo -e "FastqCheck passed:\n$fq1\n$fq2.\n\n" >>"$logfile"
return 0
fi
}
###### fifo $ntask_per_run ######
fifo() {
local ntask_per_run=$1
tempfifo=$$.fifo
trap_add "exec 1000>&-;exec 1000<&-;rm -f $tempfifo" SIGINT SIGTERM EXIT
mkfifo $tempfifo
exec 1000<>$tempfifo
rm -f $tempfifo
for ((i = 1; i <= ntask_per_run; i++)); do
echo >&1000
done
}