-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathdetector.py
488 lines (419 loc) · 18 KB
/
detector.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
""""
Detect people (and other objects) in a videostream. Videostream may be a
specified webcam or videofile.
It can show graphs with the history of detected people. People count is
always saved to file.
Usage: python detector.py --config config.ini
It uses a pre-trained YOLO v3 network for object detection, trained on the COCO dataset
Yolo v3: https://arxiv.org/abs/1804.02767
"""
import argparse
import time
import os
import sys
import configparser
import csv
import datetime
from PIL import Image, ImageDraw, ImageFont
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
import cv2
def define_args():
"""
Specify the arguments of the application
"""
ap = argparse.ArgumentParser()
ap.add_argument("-c", "--config", required=True, help="Configuration file")
return vars(ap.parse_args())
def download_if_not_present(url, file_name):
"""
Check if file is present, if not, download from url
:param url: Full URL to download location
:param file_name: filename for string the file, may include paths
:return:
"""
if not os.path.exists(file_name):
with open(file_name, "wb") as f:
response = requests.get(url, stream=True)
total_length = response.headers.get('content-length')
if total_length is None:
# no content length header
f.write(response.content)
else:
print_file_name = "..." + file_name[-17:] if len(file_name) > 20 else file_name
print_file_name = "{:<20}".format(print_file_name)
downloaded = 0
total_length = int(total_length)
for data in response.iter_content(chunk_size=4096):
downloaded += len(data)
f.write(data)
percentage = min(int(100 * downloaded / total_length), 100)
progress = min(int(50 * downloaded / total_length), 50)
sys.stdout.write("\rDownloading {} [{} {}] {}%".format(print_file_name, '=' * progress,
' ' * (50-progress), percentage))
sys.stdout.flush()
sys.stdout.write("\n")
sys.stdout.flush()
def print_ascii_large(text, font_size=18):
"""
Print large tekst in ASCII art style
:param text: Text to print
:param font_size: Font size (default = 18)
:return:
"""
myfont = ImageFont.truetype("verdanab.ttf", font_size)
img = Image.new("1", myfont.getsize(text), "black")
draw = ImageDraw.Draw(img)
draw.text((0, 0), text, "white", font=myfont)
pixels = np.array(img, dtype=np.uint8)
chars = np.array([' ', '#'], dtype="U1")[pixels]
strings = chars.view('U' + str(chars.shape[1])).flatten()
print()
for s in strings:
if len(s.strip()) > 0:
print(s)
print()
def read_config(filename):
"""
Read the configuration file
:param filename: Filename of the configuration file
:return: configuration object
"""
print("[INFO] Reading config: {}".format(filename))
if not os.path.isfile(filename):
print("[ERROR] Config file \"{}\" not found.".format(filename))
exit()
cfg = configparser.ConfigParser()
cfg.read(filename)
return cfg
def save_count(filename, n):
"""
Save the specified value to a file.
Value is appended to the end of the file
Format: <timestamp> , <value>
:param filename: filename of targetfile
:param n: value to store
:return:
"""
f = open(filename, "a")
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H-%M-%S")
line = "{} , {}\n".format(timestamp, n)
f.write(line)
f.close()
def read_existing_data(filename):
"""
Read existing data from file. If ifle not found, an empty
initialized frame is returned
:param filename: filename of targetfile
:return: timestamps, measurements
"""
times = []
values = []
if os.path.isfile(filename):
with open(filename) as csvfile:
csv_reader = csv.reader(csvfile, delimiter=',')
for row in csv_reader:
times.append(datetime.datetime.strptime(row[0], "%Y%m%d_%H-%M-%S "))
values.append(int(row[1]))
dataframe = pd.DataFrame()
dataframe['timestamp'] = pd.Series(dtype='datetime64[ns]')
dataframe['value'] = pd.Series(dtype=np.int32)
dataframe['timestamp'] = times
dataframe['value'] = values
dataframe.set_index('timestamp', inplace=True)
return dataframe
def blur_area(image, top_x, top_y, w, h):
"""
Blur the specified area of the frame.
Blurred area = <x,y> - <x+w, y+h>
:type image: RGB array
:type top_x: int
:type top_y: int
:type w: int
:type h: int
"""
# get the rectangle img around all the faces and apply blur
sub_frame = image[top_y:top_y+h, top_x:top_x+w]
sub_frame = cv2.GaussianBlur(sub_frame, (31, 31), 30)
# merge back into the frame
image[top_y:top_y+sub_frame.shape[0], top_x:top_x+sub_frame.shape[1]] = sub_frame
return image
def execute_network(image, network, layernames):
"""
Pull frame through the network
:type image: RGB array
:type network: object containing Yolo network
:type layernames: array of layer names
"""
blob = cv2.dnn.blobFromImage(image, 1 / 255.0, (416, 416), swapRB=True, crop=False)
start2 = time.time()
network.setInput(blob)
outputs = network.forward(layernames)
end2 = time.time()
print("[INFO] YOLO took : %2.1f sec" % (end2-start2))
return outputs
def load_network(network_folder):
"""
Load the Yolo network from disk.
https://pjreddie.com/media/files/yolov3.weights
https://github.com/pjreddie/darknet/blob/master/cfg/yolov3.cfg
:param network_folder: folder where network files are stored
"""
# Derive file paths and check existance
labelspath = os.path.sep.join([network_folder, "coco.names"])
download_if_not_present("https://github.com/pjreddie/darknet/blob/master/data/coco.names?raw=true", labelspath)
if not os.path.isfile(labelspath):
print("[ERROR] Network: Labels file \"{}\" not found.".format(labelspath))
exit()
weightspath = os.path.sep.join([network_folder, "yolov3.weights"])
download_if_not_present("https://pjreddie.com/media/files/yolov3.weights", weightspath)
if not os.path.isfile(weightspath):
print("[ERROR] Network: Weights file \"{}\" not found.".format(weightspath))
exit()
configpath = os.path.sep.join([network_folder, "yolov3.cfg"])
download_if_not_present("https://github.com/pjreddie/darknet/blob/master/cfg/yolov3.cfg?raw=true", configpath)
if not os.path.isfile(configpath):
print("[ERROR] Network: Configuration file \"{}\" not found.".format(configpath))
exit()
# load YOLO object detector trained on COCO dataset (80 classes)
# and determine only the *output* layer names that we need from YOLO
# Network storend in Darknet format
print("[INFO] loading YOLO from disk...")
labels = open(labelspath).read().strip().split("\n")
network = cv2.dnn.readNetFromDarknet(configpath, weightspath)
names = network.getLayerNames()
names = [names[i[0] - 1] for i in network.getUnconnectedOutLayers()]
return network, names, labels
def get_detected_items(layeroutputs, confidence_level, threshold, img_width, img_height):
"""
Determine the objects as found by the network. Found objects are filtered
on confidence leven and threshold.
"""
# initialize our lists of detected bounding boxes, confidences, and class IDs
detected_boxes = []
detection_confidences = []
detected_classes = []
for output in layeroutputs:
# loop over each of the detections
for detection in output:
# extract the class ID and confidence (i.e., probability) of the current object detection
scores = detection[5:]
classid = np.argmax(scores)
confidence = scores[classid]
# filter out weak predictions by ensuring the detected probability is greater than the minimum probability
if confidence > confidence_level:
# scale the bounding box coordinates back relative to the size of the image
box = detection[0:4] * np.array([img_width, img_height, img_width, img_height])
(center_x, center_y, width, height) = box.astype("int")
# use the center (x, y)-coordinates to derive the top left corner of the bounding box
top_x = int(center_x - (width / 2))
top_y = int(center_y - (height / 2))
# update our list of bounding box coordinates, confidences, and class IDs
detected_boxes.append([top_x, top_y, int(width), int(height)])
detection_confidences.append(float(confidence))
detected_classes.append(classid)
# apply non-maxima suppression to suppress weak, overlapping bounding boxes
indexes = cv2.dnn.NMSBoxes(detected_boxes, detection_confidences, confidence_level, threshold)
return indexes, detected_classes, detected_boxes, detection_confidences
def get_videowriter(outputfile, width, height, frames_per_sec=30):
"""
Create a writer for the output video
"""
# Initialise the writer
fourcc = cv2.VideoWriter_fourcc(*"MJPG")
video_writer = cv2.VideoWriter(outputfile, fourcc, frames_per_sec, (width, height), True)
return video_writer, frames_per_sec
def save_frame(video_writer, new_frame, count=1):
"""
Save frame <count> times to file.
:param video_writer: writer for target file
:param new_frame: frame to write
:param count: number of times to write the frame
:return:
"""
for _ in range(0, count):
video_writer.write(new_frame)
def get_webcamesource(webcam_id, width=640, height=480):
"""
Create a reader for the input video. Input can be a webcam
or a videofile
"""
print("[INFO] initialising video source...")
video_device = cv2.VideoCapture(webcam_id)
video_device.set(cv2.CAP_PROP_FRAME_WIDTH, width)
video_device.set(cv2.CAP_PROP_FRAME_HEIGHT, height)
(success, videoframe) = video_device.read()
if not success:
print("[ERROR] Could not read from webcam id {}".format(webcam_id))
(height, width) = videoframe.shape[:2]
print("[INFO] Frame W x H: {} x {}".format(width, height))
return video_device, width, height
def get_filesource(filename):
"""
Create a reader for the input video
"""
print("[INFO] initialising video source : {}".format(filename))
video_device = cv2.VideoCapture(filename)
width = int(video_device.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(video_device.get(cv2.CAP_PROP_FRAME_HEIGHT))
print("[INFO] Frame W x H: {} x {}".format(width, height))
return video_device, width, height
def update_frame(image, people_indxs, class_ids, detected_boxes, conf_levels, colors, labels,
show_boxes, blur, box_all_objects):
"""
Add bounding boxes and counted number of people to the frame
Return frame and number of people
"""
# ensure at least one detection exists
count_people = 0
if len(people_indxs) >= 1:
# loop over the indexes we are keeping
for i in people_indxs.flatten():
# extract the bounding box coordinates
(x, y, w, h) = (detected_boxes[i][0], detected_boxes[i][1], detected_boxes[i][2], detected_boxes[i][3])
if classIDs[i] == 0:
count_people += 1
# Blur, if required, people in the image
if blur:
image = blur_area(image, max(x, 0), max(y, 0), w, h)
# draw a bounding box rectangle and label on the frame
if (show_boxes and classIDs[i] == 0) or box_all_objects:
color = [int(c) for c in colors[class_ids[i]]]
cv2.rectangle(image, (x, y), (x + w, y + h), color, 2)
text = "{}: {:.2f}".format(labels[classIDs[i]], conf_levels[i])
cv2.putText(image, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
# write number of people in bottom corner
text = "Persons: {}".format(count_people)
cv2.putText(image, text, (10, image.shape[0] - 20), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
return image, count_people
def show_plots(data):
"""
Show the graphs with historical data
:param data: dataframe
:return:
"""
# Awful code to create new dataframes each time the graph is shown
df_1w = data[data.index >= pd.datetime.now() - pd.Timedelta('7D')]
df_1d = df_1w[df_1w.index >= pd.datetime.now() - pd.Timedelta('24H')]
df_8h = df_1d[df_1d.index >= pd.datetime.now() - pd.Timedelta('8H')]
df_2h = df_8h[df_8h.index >= pd.datetime.now() - pd.Timedelta('2H')]
# Resample to smooth the long running graphs
df_1w = df_1w.resample('1H').max()
df_1d = df_1d.resample('15min').max()
plt.gcf().clear()
plt.subplot(2, 2, 1)
plt.plot(df_1w.index.tolist(), df_1w['value'].tolist())
plt.title("Laatste week")
plt.ylabel("Personen")
plt.xlabel("Tijdstip")
plt.subplot(2, 2, 2)
plt.plot(df_1d.index.tolist(), df_1d['value'].tolist())
plt.title("Afgelopen 24 uur")
plt.ylabel("Personen")
plt.xlabel("Tijdstip")
plt.subplot(2, 2, 3)
plt.plot(df_8h.index.tolist(), df_8h['value'].tolist())
plt.title("Afgelopen 8 uur")
plt.ylabel("Personen")
plt.xlabel("Tijdstip")
plt.subplot(2, 2, 4)
plt.plot(df_2h.index.tolist(), df_2h['value'].tolist())
plt.title("Afgelopen 2 uur")
plt.ylabel("Personen")
plt.xlabel("Tijdstip")
plt.gcf().autofmt_xdate()
plt.show()
if __name__ == '__main__':
# construct the argument parse and parse the arguments
args = define_args()
config = read_config(args["config"])
# Load the trained network
(net, ln, LABELS) = load_network(config['NETWORK']['Path'])
# Initialise video source
webcam = (config['READER']['Webcam'] == "yes")
if webcam:
cam_id = int(config['READER']['WebcamID'])
cam_width = int(config['READER']['Width'])
cam_height = int(config['READER']['Height'])
(cam, W, H) = get_webcamesource(cam_id, cam_width, cam_height)
else:
(cam, cam_width, cam_height) = get_filesource(config['READER']['Filename'])
# determine if we need to show the enclosing boxes, etc
network_path = config['NETWORK']['Path']
webcam = (config['READER']['Webcam'] == "yes")
showpeopleboxes = (config['OUTPUT']['ShowPeopleBoxes'] == "yes")
showallboxes = (config['OUTPUT']['ShowAllBoxes'] == "yes")
blurpeople = (config['OUTPUT']['BlurPeople'] == "yes")
realspeed = (config['OUTPUT']['RealSpeed'] == "yes")
nw_confidence = float(config['NETWORK']['Confidence'])
nw_threshold = float(config['NETWORK']['Threshold'])
countfile = config['OUTPUT']['Countfile']
save_video = (config['OUTPUT']['SaveVideo'] == "yes")
show_graphs = (config['OUTPUT']['ShowGraphs'] == "yes")
print_ascii = (config['OUTPUT']['PrintAscii'] == "yes")
buffer_size = int(config['READER']['Buffersize'])
# initialize a list of colors to represent each possible class label
np.random.seed(42)
COLORS = np.random.randint(0, 255, size=(len(LABELS), 3), dtype="uint8")
# Initialise video ouptut writer
if save_video:
(writer, fps) = get_videowriter(config['OUTPUT']['Filename'], cam_width, cam_height,
int(config['OUTPUT']['FPS']))
else:
(writer, fps) = (None, 0)
# Create output windows, but limit on 1440x810
cv2.namedWindow('Video', cv2.WINDOW_NORMAL)
cv2.resizeWindow('Video', min(cam_width, 1440), min(cam_height, 810))
#cv2.resizeWindow('Video', min(cam_width, 640), min(cam_height, 360))
cv2.moveWindow('Video', 0, 0)
# Create plot
if show_graphs:
plt.ion()
plt.figure(num=None, figsize=(8, 7), dpi=80, facecolor='w', edgecolor='k')
df = read_existing_data(countfile)
else:
df = None
# loop while true
while True:
start = time.time()
# read the next frame from the webcam
# make sure that buffer is empty by reading specified amount of frames
for _ in (0, buffer_size):
(grabbed, frame) = cam.read() # type: (bool, np.ndarray)
if not grabbed:
break
# Feed frame to network
layerOutputs = execute_network(frame, net, ln)
# Obtain detected objects, including cof levels and bounding boxes
(idxs, classIDs, boxes, confidences) = get_detected_items(layerOutputs, nw_confidence, nw_threshold,
cam_width, cam_height)
# Update frame with recognised objects
frame, npeople = update_frame(frame, idxs, classIDs, boxes, confidences, COLORS, LABELS, showpeopleboxes,
blurpeople, showallboxes)
save_count(countfile, npeople)
if show_graphs:
# Add row to panda frame
new_row = pd.DataFrame([[npeople]], columns=["value"], index=[pd.to_datetime(datetime.datetime.now())])
df = pd.concat([df, pd.DataFrame(new_row)], ignore_index=False)
show_plots(df)
# Show frame with bounding boxes on screen
cv2.imshow('Video', frame)
# write the output frame to disk, repeat (time taken * 30 fps) in order to get a video at real speed
if save_video:
frame_cnt = int((time.time()-start)*fps) if webcam and realspeed else 1
save_frame(writer, frame, frame_cnt)
end = time.time()
print("[INFO] Total handling : %2.1f sec" % (end - start))
print("[INFO] People in frame : {}".format(npeople))
if print_ascii:
print_ascii_large(str(npeople)+ (" persons" if npeople > 1 else " person"))
# Check for exit
if cv2.waitKey(25) & 0xFF == ord('q'):
break
# release the file pointers
print("[INFO] cleaning up...")
if save_video:
writer.release()
cam.release()