-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcaptureImages.py
135 lines (106 loc) · 3.49 KB
/
captureImages.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
#!/usr/bin/env python3
"""
Dataset building utility.
Based on https://github.com/jgv7/CNN-HowManyFingers/blob/master/application.py
Script captures photos to directories. [A->Z].
Mapping Hebrew AlphaBet to English AlphaBet by letter order.
Controls:
use arrows to move the ROI box.
press 1 to turn capturing on/off.
press 2 to display/hide binary mask.
press english letters to choose destination folder.
press esc to exit.
@author: Netanel Azoulay
@author: Roman Koifman
"""
import os
import copy
import cv2
from utils import binaryMask
from projectParams import classes
# Globals
dataColor = (0, 255, 0)
font = cv2.FONT_HERSHEY_SIMPLEX
className = classes[0]
count = 0
dataFolder = 'captureData' # The data folder to save the captured images to.
def initClass(name):
global className, count
className = name
try:
os.makedirs(dataFolder + '/%s' % name)
except OSError as e:
print(e)
finally:
count = len(os.listdir(dataFolder + '/%s' % name))
def captureImages():
"""
Main loop.
"""
global font
global takingData, dataColor
global className, count
global showMask
showMask = 0
takingData = 0
fx, fy, fh = 10, 50, 45
x0, y0, width = 200, 220, 224
cam = cv2.VideoCapture(0)
cv2.namedWindow('Original', cv2.WINDOW_NORMAL)
while True:
# Get camera frame
ret, frame = cam.read()
frame = cv2.flip(frame, 1) # mirror
window = copy.deepcopy(frame)
cv2.rectangle(window, (x0, y0), (x0 + width - 1, y0 + width - 1), dataColor, 12)
# draw text
if takingData:
dataColor = (0, 250, 0)
cv2.putText(window, 'Data Taking: ON', (fx, fy), font, 1.2, dataColor, 2, 1)
else:
dataColor = (0, 0, 250)
cv2.putText(window, 'Data Taking: OFF', (fx, fy), font, 1.2, dataColor, 2, 1)
cv2.putText(window, 'Class Name: %s (%d)' % (className, count), (fx, fy + fh), font, 1.0, (245, 210, 65), 2, 1)
# get region of interest
roi = frame[y0:y0 + width, x0:x0 + width]
roi = binaryMask(roi)
# apply processed roi in frame
if showMask:
window[y0:y0 + width, x0:x0 + width] = cv2.cvtColor(roi, cv2.COLOR_GRAY2BGR)
# take data or apply predictions on ROI
if takingData:
cv2.imwrite(dataFolder + '/{0}/{0}_{1}.png'.format(className, count), roi)
count += 1
# show the window
cv2.imshow('Original', window)
# Keyboard inputs
key = cv2.waitKeyEx(10)
# use ESC key to close the program
if key & 0xff == 27:
break
elif key & 0xff == 255: # nothing pressed
continue
# adjust the position of window
elif key == 2490368: # up
y0 = max((y0 - 5, 0))
elif key == 2621440: # down
y0 = min((y0 + 5, window.shape[0] - width))
elif key == 2424832: # left
x0 = max((x0 - 5, 0))
elif key == 2555904: # right
x0 = min((x0 + 5, window.shape[1] - width))
key = key & 0xff
# Toggle data taking
if key == ord('1'):
takingData = not takingData
elif key == ord('2'):
showMask = not showMask
# Toggle class
elif ord('A') <= key <= ord('Z'):
initClass(chr(key))
elif ord('a') <= key <= ord('z'):
initClass(chr(key).upper())
cam.release()
if __name__ == '__main__':
initClass(className)
captureImages()