diff --git a/README.md b/README.md index ca0c55f..036566d 100755 --- a/README.md +++ b/README.md @@ -18,6 +18,8 @@ write back to a pickle file which is used in a TensorFlow model as initial weigh from the authors' repo. 2. Convert the model to a pickle file by: + +#before running the below command make a blank mat2tf.pkl file ``` python matconvnet_hr101_to_pickle.py --matlab_model_path /path/to/pretrained_model @@ -36,6 +38,13 @@ python tiny_face_eval.py --data_dir /path/to/input_image_directory --output_dir /path/to/output_directory ``` +## Tesing Tiny Face Detector in TensorFlow with webcam + +Run: +``` +python tiny_face_eval_webcam.py + --weight_file_path /path/to/pickle_file +``` # Neural network diagram diff --git a/tiny_face_eval_webcam.py b/tiny_face_eval_webcam.py new file mode 100644 index 0000000..6deda54 --- /dev/null +++ b/tiny_face_eval_webcam.py @@ -0,0 +1,219 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf +import tiny_face_model +import util +from argparse import ArgumentParser +import cv2 +import scipy.io +import numpy as np +import matplotlib.pyplot as plt +import cv2 +import pickle +import sys + +import pylab as pl +import time +import os +import sys +from scipy.special import expit +import glob + +MAX_INPUT_DIM = 5000.0 + +def overlay_bounding_boxes(raw_img, refined_bboxes, lw): + """Overlay bounding boxes of face on images. + Args: + raw_img: + A target image. + refined_bboxes: + Bounding boxes of detected faces. + lw: + Line width of bounding boxes. If zero specified, + this is determined based on confidence of each detection. + Returns: + None. + """ + + # Overlay bounding boxes on an image with the color based on the confidence. + for r in refined_bboxes: + _score = expit(r[4]) + cm_idx = int(np.ceil(_score * 255)) + rect_color = [int(np.ceil(x * 255)) for x in util.cm_data[cm_idx]] # parula + _lw = lw + if lw == 0: # line width of each bounding box is adaptively determined. + bw, bh = r[2] - r[0] + 1, r[3] - r[0] + 1 + _lw = 1 if min(bw, bh) <= 20 else max(2, min(3, min(bh / 20, bw / 20))) + _lw = int(np.ceil(_lw * _score)) + + _r = [int(x) for x in r[:4]] + cv2.rectangle(raw_img, (_r[0], _r[1]), (_r[2], _r[3]), rect_color, _lw) + + +def evaluate(weight_file_path,prob_thresh=0.5, nms_thresh=0.1, lw=3, display=False): + """Detect faces in images. + Args: + prob_thresh: + The threshold of detection confidence. + nms_thresh: + The overlap threshold of non maximum suppression + weight_file_path: + A pretrained weight file in the pickle format + generated by matconvnet_hr101_to_tf.py. + data_dir: + A directory which contains images. + output_dir: + A directory into which images with detected faces are output. + lw: + Line width of bounding boxes. If zero specified, + this is determined based on confidence of each detection. + display: + Display tiny face images on window. + Returns: + None. + """ + + # placeholder of input images. Currently batch size of one is supported. + x = tf.placeholder(tf.float32, [1, None, None, 3]) # n, h, w, c + + # Create the tiny face model which weights are loaded from a pretrained model. + model = tiny_face_model.Model(weight_file_path) + score_final = model.tiny_face(x) + + + with open(weight_file_path, "rb") as f: + _, mat_params_dict = pickle.load(f) + + average_image = model.get_data_by_key("average_image") + clusters = model.get_data_by_key("clusters") + clusters_h = clusters[:, 3] - clusters[:, 1] + 1 + clusters_w = clusters[:, 2] - clusters[:, 0] + 1 + normal_idx = np.where(clusters[:, 4] == 1) + + # main + with tf.Session() as sess: + sess.run(tf.global_variables_initializer()) + + #for filename in filenames: + #fname = filename.split(os.sep)[-1] + video_capture = cv2.VideoCapture(0) + while True: + # Capture frame-by-frame + ret, frame = video_capture.read() + #raw_img = cv2.imread(filename) + raw_img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + raw_img_f = raw_img.astype(np.float32) + + def _calc_scales(): + raw_h, raw_w = raw_img.shape[0], raw_img.shape[1] + min_scale = min(np.floor(np.log2(np.max(clusters_w[normal_idx] / raw_w))), + np.floor(np.log2(np.max(clusters_h[normal_idx] / raw_h)))) + max_scale = min(1.0, -np.log2(max(raw_h, raw_w) / MAX_INPUT_DIM)) + scales_down = pl.frange(min_scale, 0, 1.) + scales_up = pl.frange(0.5, max_scale, 0.5) + scales_pow = np.hstack((scales_down, scales_up)) + scales = np.power(2.0, scales_pow) + return scales + + scales = _calc_scales() + start = time.time() + + # initialize output + bboxes = np.empty(shape=(0, 5)) + + # process input at different scales + for s in scales[:2]: + #print("Processing {} at scale {:.4f}".format(fname, s)) + img = cv2.resize(raw_img_f, (0, 0), fx=s, fy=s, interpolation=cv2.INTER_LINEAR) + img = img - average_image + img = img[np.newaxis, :] + + # we don't run every template on every scale ids of templates to ignore + tids = list(range(4, 12)) + ([] if s <= 1.0 else list(range(18, 25))) + ignoredTids = list(set(range(0, clusters.shape[0])) - set(tids)) + + # run through the net + score_final_tf = sess.run(score_final, feed_dict={x: img}) + + # collect scores + score_cls_tf, score_reg_tf = score_final_tf[:, :, :, :25], score_final_tf[:, :, :, 25:125] + prob_cls_tf = expit(score_cls_tf) + prob_cls_tf[0, :, :, ignoredTids] = 0.0 + + def _calc_bounding_boxes(): + # threshold for detection + _, fy, fx, fc = np.where(prob_cls_tf > prob_thresh) + + # interpret heatmap into bounding boxes + cy = fy * 8 - 1 + cx = fx * 8 - 1 + ch = clusters[fc, 3] - clusters[fc, 1] + 1 + cw = clusters[fc, 2] - clusters[fc, 0] + 1 + + # extract bounding box refinement + Nt = clusters.shape[0] + tx = score_reg_tf[0, :, :, 0:Nt] + ty = score_reg_tf[0, :, :, Nt:2*Nt] + tw = score_reg_tf[0, :, :, 2*Nt:3*Nt] + th = score_reg_tf[0, :, :, 3*Nt:4*Nt] + + # refine bounding boxes + dcx = cw * tx[fy, fx, fc] + dcy = ch * ty[fy, fx, fc] + rcx = cx + dcx + rcy = cy + dcy + rcw = cw * np.exp(tw[fy, fx, fc]) + rch = ch * np.exp(th[fy, fx, fc]) + + scores = score_cls_tf[0, fy, fx, fc] + tmp_bboxes = np.vstack((rcx - rcw / 2, rcy - rch / 2, rcx + rcw / 2, rcy + rch / 2)) + tmp_bboxes = np.vstack((tmp_bboxes / s, scores)) + tmp_bboxes = tmp_bboxes.transpose() + return tmp_bboxes + + tmp_bboxes = _calc_bounding_boxes() + bboxes = np.vstack((bboxes, tmp_bboxes)) # : (5265, 5) + + + refind_idx = tf.image.non_max_suppression(tf.convert_to_tensor(bboxes[:, :4], dtype=tf.float32), + tf.convert_to_tensor(bboxes[:, 4], dtype=tf.float32), + max_output_size=bboxes.shape[0], iou_threshold=nms_thresh) + refind_idx = sess.run(refind_idx) + refined_bboxes = bboxes[refind_idx] + overlay_bounding_boxes(raw_img, refined_bboxes, lw) + + + # save image with bounding boxes + raw_img = cv2.cvtColor(raw_img, cv2.COLOR_RGB2BGR) + # Display the resulting frame + cv2.imshow('Video', raw_img) + + if cv2.waitKey(1) & 0xFF == ord('q'): + break + + # When everything is done, release the capture + video_capture.release() + cv2.destroyAllWindows() + +#weight_file_path='/Users/apple/Downloads/Tiny_Faces_in_Tensorflow-master/mat2tf.pkl' +#evaluate(weight_file_path) +def main(): + + argparse = ArgumentParser() + argparse.add_argument('--weight_file_path', type=str, help='Pretrained weight file.', default="/path/to/mat2tf.pkl") + + args = argparse.parse_args() + + # check arguments + assert os.path.exists(args.weight_file_path), "weight file: " + args.weight_file_path + " not found." + + with tf.Graph().as_default(): + evaluate( + weight_file_path=args.weight_file_path) + + +if __name__ == '__main__': + main()