-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathrun-stat-flowd-tuple
134 lines (120 loc) · 3.41 KB
/
run-stat-flowd-tuple
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
#
# Synopsis:
# Generate a tuple of stats in schema/$1/run/flowd.{pid,gyr}
# Description:
# For flowd, the stats sum the columns in the *.fdr records.
#
# <green> is the sum of all *.fdr records with both processes and
# queries in an OK state. <yellow> is currently 0 and <red> is the
# sum of flows with at least 1 query of process not OK.
#
# See the particular etc/*.flow file for what is OK or not OK.
#
# State
# OK flowd verified running
# OFFL flowd offline, run/*.{pid,gyr} empty
# YETI flowd seems to exist but not seen by pgrep
# ZOMBI flowd online run/*.{pid,gyr} not empty
# Usage:
# sbin/run-stat-flowd-tuple flowd
# sbin/run-stat-flowd-tuple flowd>wework
# Exit Status:
# 0 gyr tuple written
# 1 unexpected error, no tuple written
# Note:
# Despartley need to need simplify and unifiy with similar script in
# blobio/sbin/run-stat-flowd-tuple!
#
STALE_AFTER=61 # need to derive heartbeat values in etc/<schema>.flow
die()
{
echo "$(basename $0): ERROR: $@" >&2
exit 1
}
test $# = 1 || die "wrong number of arguments: got $#, expected 1"
SCHEMA=$1 # process name (flowd or flowd>tag)
test -n "$SETSPACE_ROOT" || die "env not defined: SETSPACE_ROOT"
cd $SETSPACE_ROOT || die "cd SETSPACE_ROOT failed: exit status=$?"
SCHEMA_DIR=schema/$SCHEMA
test -d $SCHEMA_DIR || die "no schema dir: $SCHEMA_DIR"
cd $SCHEMA_DIR || die "cd schema dir ($SCHEMA_DIR) failed: exit status=$?"
FLOW_PATH=etc/$SCHEMA.flow # path to <schema>/etc/<schema>.flow file
PID_PATH=run/flowd.pid # path to $RUN_PATH/flowd.pid
PID_MTIME=null # most recent write to $RUN_PATH/flowd.pid
BOOT_EPOCH=null # tail -1 $RUN_PATH/flowd.pid
# can be overriden by value in "run/flowd.gyr"
BOOT_GYR='null null null'
RECENT_EPOCH=null # epoch of most recent GYR sample
RECENT_GYR='null null null'
GYR_PATH=run/flowd.gyr
#
# The service is enabled if etc/<schema>.flow exists. The state
# (UP,DOWN,ZOMBI,YETI) of the flowd process is determined by the
# existence, values in, or modify time of the files run/flowd.{pid,gyr}.
#
if [ -e $FLOW_PATH ]; then
if [ -e $PID_PATH ]; then
if [ -s $PID_PATH ]; then
if file-stale-mtime $STALE_AFTER $PID_PATH; then
_P=$(pgrep -F $PID_PATH >/dev/null)
STATUS=$?
case $STATUS in
0)
STATE=YETI
;;
1)
# pgrep grumbles about a syntax error
# in pidfile, yet still exits with
# status 1.
test -n "$_P" &&
die "pgrep failed: $_P"
STATE=ZOMBI
;;
*)
die "pgrep failed: exit status=$STATUS"\
"$_P"
;;
esac
else
STATE=UP
fi
else
STATE=ZOMBI # empty pid file
fi
else
STATE=DOWN
if [ -e $GYR_PATH ]; then # exit did not zap flowd.gyr
STATE=ZOMBI
fi
fi
else
STATE=OFFL
if [ -e $PID_PATH -o -e $GYR_PATH ]; then
STATE=ZOMBI
fi
fi
if [ -e $PID_PATH ]; then
if [ -s $PID_PATH ]; then
BOOT_EPOCH=$(tail -1 $PID_PATH)
fi
fi
# is -s test redundant with above -s test?
if [ -s $PID_PATH ]; then
PID_MTIME=$(file-stat-mtime $PID_PATH)
BOOT_EPOCH=$(tail -1 $PID_PATH)
fi
if [ -e $GYR_PATH ]; then
if [ -s $GYR_PATH ]; then
BOOT_EPOCH=$(grep '^boot ' $GYR_PATH | cut -f2)
BOOT_GYR=$(grep '^boot ' $GYR_PATH | cut -f3-5)
RECENT_EPOCH=$(grep '^recent ' $GYR_PATH | cut -f2)
RECENT_GYR=$(grep '^recent ' $GYR_PATH | cut -f3-5)
elif [ $STATE = UP ]; then
STATE=YETI
fi
elif [ $STATE = UP ]; then
STATE=YETI
fi
cat <<END
$SCHEMA $STATE $BOOT_EPOCH $BOOT_GYR $RECENT_EPOCH $RECENT_GYR
END