Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

webcam #16

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ write back to a pickle file which is used in a TensorFlow model as initial weigh
from the authors' repo.

2. Convert the model to a pickle file by:

#before running the below command make a blank mat2tf.pkl file
```
python matconvnet_hr101_to_pickle.py
--matlab_model_path /path/to/pretrained_model
Expand All @@ -36,6 +38,13 @@ python tiny_face_eval.py
--data_dir /path/to/input_image_directory
--output_dir /path/to/output_directory
```
## Tesing Tiny Face Detector in TensorFlow with webcam

Run:
```
python tiny_face_eval_webcam.py
--weight_file_path /path/to/pickle_file
```

# Neural network diagram

Expand Down
219 changes: 219 additions & 0 deletions tiny_face_eval_webcam.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,219 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf
import tiny_face_model
import util
from argparse import ArgumentParser
import cv2
import scipy.io
import numpy as np
import matplotlib.pyplot as plt
import cv2
import pickle
import sys

import pylab as pl
import time
import os
import sys
from scipy.special import expit
import glob

MAX_INPUT_DIM = 5000.0

def overlay_bounding_boxes(raw_img, refined_bboxes, lw):
"""Overlay bounding boxes of face on images.
Args:
raw_img:
A target image.
refined_bboxes:
Bounding boxes of detected faces.
lw:
Line width of bounding boxes. If zero specified,
this is determined based on confidence of each detection.
Returns:
None.
"""

# Overlay bounding boxes on an image with the color based on the confidence.
for r in refined_bboxes:
_score = expit(r[4])
cm_idx = int(np.ceil(_score * 255))
rect_color = [int(np.ceil(x * 255)) for x in util.cm_data[cm_idx]] # parula
_lw = lw
if lw == 0: # line width of each bounding box is adaptively determined.
bw, bh = r[2] - r[0] + 1, r[3] - r[0] + 1
_lw = 1 if min(bw, bh) <= 20 else max(2, min(3, min(bh / 20, bw / 20)))
_lw = int(np.ceil(_lw * _score))

_r = [int(x) for x in r[:4]]
cv2.rectangle(raw_img, (_r[0], _r[1]), (_r[2], _r[3]), rect_color, _lw)


def evaluate(weight_file_path,prob_thresh=0.5, nms_thresh=0.1, lw=3, display=False):
"""Detect faces in images.
Args:
prob_thresh:
The threshold of detection confidence.
nms_thresh:
The overlap threshold of non maximum suppression
weight_file_path:
A pretrained weight file in the pickle format
generated by matconvnet_hr101_to_tf.py.
data_dir:
A directory which contains images.
output_dir:
A directory into which images with detected faces are output.
lw:
Line width of bounding boxes. If zero specified,
this is determined based on confidence of each detection.
display:
Display tiny face images on window.
Returns:
None.
"""

# placeholder of input images. Currently batch size of one is supported.
x = tf.placeholder(tf.float32, [1, None, None, 3]) # n, h, w, c

# Create the tiny face model which weights are loaded from a pretrained model.
model = tiny_face_model.Model(weight_file_path)
score_final = model.tiny_face(x)


with open(weight_file_path, "rb") as f:
_, mat_params_dict = pickle.load(f)

average_image = model.get_data_by_key("average_image")
clusters = model.get_data_by_key("clusters")
clusters_h = clusters[:, 3] - clusters[:, 1] + 1
clusters_w = clusters[:, 2] - clusters[:, 0] + 1
normal_idx = np.where(clusters[:, 4] == 1)

# main
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())

#for filename in filenames:
#fname = filename.split(os.sep)[-1]
video_capture = cv2.VideoCapture(0)
while True:
# Capture frame-by-frame
ret, frame = video_capture.read()
#raw_img = cv2.imread(filename)
raw_img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
raw_img_f = raw_img.astype(np.float32)

def _calc_scales():
raw_h, raw_w = raw_img.shape[0], raw_img.shape[1]
min_scale = min(np.floor(np.log2(np.max(clusters_w[normal_idx] / raw_w))),
np.floor(np.log2(np.max(clusters_h[normal_idx] / raw_h))))
max_scale = min(1.0, -np.log2(max(raw_h, raw_w) / MAX_INPUT_DIM))
scales_down = pl.frange(min_scale, 0, 1.)
scales_up = pl.frange(0.5, max_scale, 0.5)
scales_pow = np.hstack((scales_down, scales_up))
scales = np.power(2.0, scales_pow)
return scales

scales = _calc_scales()
start = time.time()

# initialize output
bboxes = np.empty(shape=(0, 5))

# process input at different scales
for s in scales[:2]:
#print("Processing {} at scale {:.4f}".format(fname, s))
img = cv2.resize(raw_img_f, (0, 0), fx=s, fy=s, interpolation=cv2.INTER_LINEAR)
img = img - average_image
img = img[np.newaxis, :]

# we don't run every template on every scale ids of templates to ignore
tids = list(range(4, 12)) + ([] if s <= 1.0 else list(range(18, 25)))
ignoredTids = list(set(range(0, clusters.shape[0])) - set(tids))

# run through the net
score_final_tf = sess.run(score_final, feed_dict={x: img})

# collect scores
score_cls_tf, score_reg_tf = score_final_tf[:, :, :, :25], score_final_tf[:, :, :, 25:125]
prob_cls_tf = expit(score_cls_tf)
prob_cls_tf[0, :, :, ignoredTids] = 0.0

def _calc_bounding_boxes():
# threshold for detection
_, fy, fx, fc = np.where(prob_cls_tf > prob_thresh)

# interpret heatmap into bounding boxes
cy = fy * 8 - 1
cx = fx * 8 - 1
ch = clusters[fc, 3] - clusters[fc, 1] + 1
cw = clusters[fc, 2] - clusters[fc, 0] + 1

# extract bounding box refinement
Nt = clusters.shape[0]
tx = score_reg_tf[0, :, :, 0:Nt]
ty = score_reg_tf[0, :, :, Nt:2*Nt]
tw = score_reg_tf[0, :, :, 2*Nt:3*Nt]
th = score_reg_tf[0, :, :, 3*Nt:4*Nt]

# refine bounding boxes
dcx = cw * tx[fy, fx, fc]
dcy = ch * ty[fy, fx, fc]
rcx = cx + dcx
rcy = cy + dcy
rcw = cw * np.exp(tw[fy, fx, fc])
rch = ch * np.exp(th[fy, fx, fc])

scores = score_cls_tf[0, fy, fx, fc]
tmp_bboxes = np.vstack((rcx - rcw / 2, rcy - rch / 2, rcx + rcw / 2, rcy + rch / 2))
tmp_bboxes = np.vstack((tmp_bboxes / s, scores))
tmp_bboxes = tmp_bboxes.transpose()
return tmp_bboxes

tmp_bboxes = _calc_bounding_boxes()
bboxes = np.vstack((bboxes, tmp_bboxes)) # <class 'tuple'>: (5265, 5)


refind_idx = tf.image.non_max_suppression(tf.convert_to_tensor(bboxes[:, :4], dtype=tf.float32),
tf.convert_to_tensor(bboxes[:, 4], dtype=tf.float32),
max_output_size=bboxes.shape[0], iou_threshold=nms_thresh)
refind_idx = sess.run(refind_idx)
refined_bboxes = bboxes[refind_idx]
overlay_bounding_boxes(raw_img, refined_bboxes, lw)


# save image with bounding boxes
raw_img = cv2.cvtColor(raw_img, cv2.COLOR_RGB2BGR)
# Display the resulting frame
cv2.imshow('Video', raw_img)

if cv2.waitKey(1) & 0xFF == ord('q'):
break

# When everything is done, release the capture
video_capture.release()
cv2.destroyAllWindows()

#weight_file_path='/Users/apple/Downloads/Tiny_Faces_in_Tensorflow-master/mat2tf.pkl'
#evaluate(weight_file_path)
def main():

argparse = ArgumentParser()
argparse.add_argument('--weight_file_path', type=str, help='Pretrained weight file.', default="/path/to/mat2tf.pkl")

args = argparse.parse_args()

# check arguments
assert os.path.exists(args.weight_file_path), "weight file: " + args.weight_file_path + " not found."

with tf.Graph().as_default():
evaluate(
weight_file_path=args.weight_file_path)


if __name__ == '__main__':
main()