-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathplot_pump.py
241 lines (188 loc) · 8.08 KB
/
plot_pump.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
import numpy as np
import pandas as pd
import pump_watcher as pw
import csv
from numpy import genfromtxt
from datetime import datetime
from datetime import timedelta
import argparse
import matplotlib
def count_pump_on(data):
"""
Count how many times a PUMP_ON event occurs in the data
:param data: list of tuples, of the form (datetime, int)
:return: number of occurrances of PUMP_ON
"""
# cast to int in case the data is some other type
return len([d for d in data if int(d[2]) == pw.PUMP_ON])
def count_pump_off(data):
"""
Count how many times a PUMP_OFF event occurs in the data
:param data: list of tuples, of the form (datetime, int)
:return: number of occurrances of PUMP_OFF
"""
# cast to int in case the data is some other type
return len([d for d in data if int(d[2]) == pw.PUMP_OFF])
def find_next_event(data, event_type):
"""
Find the next sequence within data starting with event_type
:param data: list of tuples, of the form (datetime, int)
:return: subset of data starting with event_type
"""
for i, event in enumerate(data):
if int(event[2]) == event_type:
return data[i:]
return None
def find_next_duration(data, event1, event2):
"""
Get the duration of the next event1/event2 combination.
:param data: list of tuples, of the form (datetime, int)
:param event1: the event that starts the matching of the pattern
:param event2: the event that ends the matching of the pattern
:return tuple of the form (datetime, float, data) where datetime is the timestamp when event1 occurred, float is the duration between the next event1/event2 (in secs), and the remaining data with event1/event2 removed
"""
data = find_next_event(data, event1)
if data is None: # could not find event
return None, None, None
on_event_ts = data[0][0]
# print("next event %s @ %s" % (event1, on_event_ts))
data = find_next_event(data[1:], event2)
if data is None: # could not find event
return None, None, None
off_event_ts = data[0][0]
# print("next event %s @ %s" % (event2, off_event_ts))
return on_event_ts, off_event_ts - on_event_ts, data
def create_durations_for_event_pair(data, event1, event2, col, strip_outliers=False):
"""
Calculate durations of pump on/off.
:param data: list of tuples, of the form (datetime, int)
:param event1: event type to start the duration
:param event2: event type to end the duration
:param col: label for the durations column to use in the returned dataframe
:param strip_outliers: if true then ignore values +/- 1 std dev from the arithmetic mean
:return: dataframe containing time of each event1 and the duration for the event1-event2 pair
"""
durations = []
while data is not None:
event_ts, pump_duration, data = find_next_duration(data, event1, event2)
if pump_duration:
pump_duration = pump_duration.total_seconds() # convert to seconds - gives clearer graphs
durations.append({'time': event_ts, col: pump_duration})
df = pd.DataFrame(columns=["time", col])
# df = pd.DataFrame(durations)
if durations: # raises exception if try to append empty list
df = df.append(durations)
df.set_index('time', inplace=True)
# drop values > +/- 1 std dev
if strip_outliers:
mean = df.mean()
std_dev = df.std(ddof=0) # ddof handles single values
df = df[
(df >= mean - std_dev) & (df <= mean + std_dev)].dropna() # unsure why need to explicitly drop NaN values
return df
def thingspeak_str2date(x):
"""
Convert string in ThingSpeak files to datetime
:param x: the string to convert to a data of the form %Y-%m-%d %H:%M:%S UTC
:return a datetime representing x
"""
return datetime.strptime(x.decode("utf-8"), "%Y-%m-%d %H:%M:%S UTC")
def plot_durations(df):
"""
Plot a graph of durations (can be ON-OFF or OFF-ON)
:param df: dataframe containing time of each event1 and the duration for the event1-event2 pair
"""
# print(df)
ax = df.plot(kind="bar", linewidth=0, logy=False, legend=False)
ax.get_xaxis().set_ticks([]) # need to clear xticks here (cannot set in .plot function)
ax.set_xlabel("t")
ax.set_ylabel("duration")
# mark mean as horizontal line
mean = df.iloc[:, 0].mean()
ax.axhline(mean, color='r', linestyle="dashed")
# ax.text(0, mean, "mean = %s" % str(timedelta(seconds=int(mean))))
# mark last value
last_row = df.iloc[-1]
last_duration = last_row.iat[0]
last_ts = last_row.name
ax.axhline(last_duration, color='r')
ax.text(0, last_duration, "last = %s @ %s" % (str(timedelta(seconds=int(last_duration))), last_ts))
def build_graphs(filename, truncate, show_graphs=False):
"""
Generate graphs for pump data and save files as .PNG
:param show_graphs if true then show each graph interactively (as well as saving as PNG)
"""
print("Reading data from %s..." % filename)
data = genfromtxt(filename, delimiter=",", dtype=None, names=True, converters={0: thingspeak_str2date})
print("Read %d entries" % len(data))
print(data.dtype.names)
print("Generating graphs...")
if not show_graphs:
matplotlib.use("Agg") # allows figures to be generated on headless server
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style="dark")
time, _sample_id, event = zip(*data) # unpack to positional args --> unzip
# ---------- FIGURE ----------
# plot raw data
df = pd.DataFrame({"time": time, "pump": event})
# truncate data
# print("Truncating to %d entries" % truncate)
df = df[-truncate:]
# print(df)
ax = df.plot(x="time", y="pump", legend=False)
ax.set_ylabel("on/off")
plt.title("Pump activity")
plt.savefig("graphs/fig_pump.png", bbox_inches="tight")
# ---------- FIGURE ----------
# plot durations when pump is on/off
on_off_df = create_durations_for_event_pair(data, pw.PUMP_ON, pw.PUMP_OFF, "on_duration")
# truncate data
on_off_df = on_off_df[-truncate:]
plot_durations(on_off_df)
plt.title("Pump ON/OFF durations")
plt.savefig("graphs/fig_pump_durations_on_off.png", bbox_inches="tight")
# ---------- FIGURE ----------
off_on_df = create_durations_for_event_pair(data, pw.PUMP_OFF, pw.PUMP_ON, "off_duration")
# truncate data
off_on_df = off_on_df[-truncate:]
plot_durations(off_on_df)
plt.title("Pump OFF/ON durations")
plt.savefig("graphs/fig_pump_durations_off_on.png", bbox_inches="tight")
if show_graphs:
plt.show()
return
# do not create these graphs
# ---------- FIGURE ----------
# drop upper/lower percentiles
on_durations = on_durations[
(on_durations >= np.percentile(on_durations, 10)) & (on_durations <= np.percentile(on_durations, 90))]
print("stripped percentile durations:", len(on_durations))
print("mean duration = ", np.mean(on_durations))
df = pd.DataFrame({"excl. upper/lower percentile pump duration": on_durations})
# print(df)
df.plot(kind="bar", legend=False)
if show_graphs:
plt.show()
def main():
"""
Read ThingSpeak data and build graphs.
Supports multiple cmd-line args
"""
parser = argparse.ArgumentParser(description='Analyses data from ThinkSpeak.com and generates graphs.')
parser.add_argument('filename', help='file to process')
parser.add_argument('--show', dest='show', action='store_true', default=False,
help='show graphs (as well as saving)')
parser.add_argument('--create', dest="create_data", action="store_true", default=False, help="create test data")
args = parser.parse_args()
if args.create_data:
print("Creating test data...")
data = pw.gen_random_samples(40)
with open(args.filename, "w", newline='') as f:
writer = csv.writer(f)
header = [["created_at", "entry_id", "field1"]]
writer.writerows(header)
writer.writerows(data)
build_graphs(args.filename, 100, args.show)
if __name__ == "__main__":
main()