Skip to content

Commit

Permalink
Merge pull request #6 from Rouast-Labs/long-video-support
Browse files Browse the repository at this point in the history
Long video support
  • Loading branch information
prouast authored Jul 20, 2024
2 parents 1a9923d + 781b4a4 commit 92b6728
Show file tree
Hide file tree
Showing 9 changed files with 147 additions and 77 deletions.
3 changes: 2 additions & 1 deletion examples/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
sys.path.append('../vitallens-python')
import argparse
import matplotlib.pyplot as plt
import os
import pandas as pd
from prpy.ffmpeg.probe import probe_video
from prpy.ffmpeg.readwrite import read_video_from_path
Expand All @@ -20,7 +21,7 @@

def run(args=None):
# Get ground truth vitals
vitals = pd.read_csv(args.vitals_path)
vitals = pd.read_csv(args.vitals_path) if os.path.exists(args.vitals_path) else []
ppg_gt = vitals['ppg'] if 'ppg' in vitals else None
resp_gt = vitals['resp'] if 'resp' in vitals else None
# Get video
Expand Down
9 changes: 8 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,20 @@ def test_video_fps():
fps, *_ = probe_video(TEST_VIDEO_PATH)
return fps

@pytest.fixture(scope='session')
def test_video_shape():
_, n, w, h, _, _, _ = probe_video(TEST_VIDEO_PATH)
return (n, h, w, 3)

@pytest.fixture(scope='session')
def test_video_faces(request):
det = FaceDetector(
max_faces=1, fs=1.0, iou_threshold=0.45, score_threshold=0.9)
test_video_ndarray = request.getfixturevalue('test_video_ndarray')
test_video_fps = request.getfixturevalue('test_video_fps')
boxes, _ = det(test_video_ndarray, fps=test_video_fps)
boxes, _ = det(test_video_ndarray,
inputs_shape=test_video_ndarray.shape,
fps=test_video_fps)
boxes = (boxes * [test_video_ndarray.shape[2], test_video_ndarray.shape[1], test_video_ndarray.shape[2], test_video_ndarray.shape[1]]).astype(int)
return boxes[:,0].astype(np.int64)

Expand Down
53 changes: 30 additions & 23 deletions tests/test_ssd.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
# SOFTWARE.

import numpy as np
from prpy.ffmpeg.probe import probe_video
import pytest

import sys
Expand Down Expand Up @@ -79,13 +80,13 @@ def test_enforce_temporal_consistency():
[[.5, .5, .75, .75 ], [.125, .5, .375, .75]],
[[.125, .625, .375, .875], [.625, .5, .875, .75]]])
info = np.array(
[[[0, 1, 1, 1, .99], [0, 1, 1, 1, .99]],
[[1, 1, 0, 0, .2 ], [1, 1, 1, 1, .99]],
[[2, 1, 1, 1, .99], [2, 1, 1, 1, .99]],
[[3, 1, 1, 1, .99], [3, 1, 1, 1, .99]],
[[4, 1, 1, 1, .99], [4, 1, 1, 1, .99]]])
[[[0, 1, 1, .99], [0, 1, 1, .99]],
[[1, 1, 0, .2 ], [1, 1, 1, .99]],
[[2, 1, 1, .99], [2, 1, 1, .99]],
[[3, 1, 1, .99], [3, 1, 1, .99]],
[[4, 1, 1, .99], [4, 1, 1, .99]]])
boxes_out, info_out = enforce_temporal_consistency(
boxes=boxes, info=info, inputs_shape=(5, 8, 8, 3))
boxes=boxes, info=info, n_frames=5)
np.testing.assert_equal(
boxes_out,
np.array(
Expand All @@ -97,11 +98,11 @@ def test_enforce_temporal_consistency():
np.testing.assert_equal(
info_out,
np.array(
[[[0, 1, 1, 1, .99], [0, 1, 1, 1, .99]],
[[1, 1, 1, 1, .99], [1, 1, 0, 0, .2 ]],
[[2, 1, 1, 1, .99], [2, 1, 1, 1, .99]],
[[3, 1, 1, 1, .99], [3, 1, 1, 1, .99]],
[[4, 1, 1, 1, .99], [4, 1, 1, 1, .99]]]))
[[[0, 1, 1, .99], [0, 1, 1, .99]],
[[1, 1, 1, .99], [1, 1, 0, .2 ]],
[[2, 1, 1, .99], [2, 1, 1, .99]],
[[3, 1, 1, .99], [3, 1, 1, .99]],
[[4, 1, 1, .99], [4, 1, 1, .99]]]))

def test_interpolate_unscanned_frames():
# Example with 2 moving faces, 3 time steps, no detection for face 1 in time step 2, faces swapped in time step 4
Expand All @@ -110,11 +111,11 @@ def test_interpolate_unscanned_frames():
[[.25, .5, .5, .75], [.125, .25, .375, .5 ]],
[[.375, .5, .625, .75], [.125, .375, .375, .625]]])
info = np.array(
[[[0, 1, 1, 1, .99], [0, 1, 1, 1, .99]],
[[1, 1, 1, 1, .99], [1, 1, 0, 0, .2 ]],
[[2, 1, 1, 1, .99], [2, 1, 1, 1, .99]]])
[[[0, 1, 1, .99], [0, 1, 1, .99]],
[[2, 1, 1, .99], [2, 1, 0, .2 ]],
[[4, 1, 1, .99], [4, 1, 1, .99]]])
boxes_out, info_out = interpolate_unscanned_frames(
boxes=boxes, info=info, scan_every=2, inputs_shape=(5, 8, 8, 3))
boxes=boxes, info=info, n_frames=5)
np.testing.assert_equal(
boxes_out,
np.array(
Expand All @@ -126,25 +127,31 @@ def test_interpolate_unscanned_frames():
np.testing.assert_equal(
info_out,
np.array(
[[[0, 1, 1, 1, .99], [0, 1, 1, 1, .99]],
[[1, 0, 0, 1, 0 ], [1, 0, 0, 1, 0 ]], # Imperfection of the implementation
[[2, 1, 1, 1, .99], [2, 1, 0, 0, .2 ]],
[[3, 0, 0, 1, 0 ], [3, 0, 0, 0, 0 ]],
[[4, 1, 1, 1, .99], [4, 1, 1, 1, .99]]]))
[[[0, 1, 1, .99], [0, 1, 1, .99]],
[[1, 0, 0, 0 ], [1, 0, 0, 0 ]], # Imperfection of the implementation
[[2, 1, 1, .99], [2, 1, 0, .2 ]],
[[3, 0, 0, 0 ], [3, 0, 0, 0 ]],
[[4, 1, 1, .99], [4, 1, 1, .99]]]))

@pytest.mark.parametrize("file", [True, False])
def test_FaceDetector(request, file):
det = FaceDetector(
max_faces=2, fs=1.0, iou_threshold=0.45, score_threshold=0.9)
if file:
test_video_path = request.getfixturevalue('test_video_path')
boxes, info = det(test_video_path)
test_video_shape = request.getfixturevalue('test_video_shape')
test_video_fps = request.getfixturevalue('test_video_fps')
boxes, info = det(inputs=test_video_path,
inputs_shape=test_video_shape,
fps=test_video_fps)
else:
test_video_ndarray = request.getfixturevalue('test_video_ndarray')
test_video_fps = request.getfixturevalue('test_video_fps')
boxes, info = det(test_video_ndarray, fps=test_video_fps)
boxes, info = det(inputs=test_video_ndarray,
inputs_shape=test_video_ndarray.shape,
fps=test_video_fps)
assert boxes.shape == (360, 1, 4)
assert info.shape == (360, 1, 5)
assert info.shape == (360, 1, 4)
np.testing.assert_allclose(boxes[0,0],
[0.32223, 0.118318, 0.572684, 0.696835],
atol=0.01)
Expand Down
5 changes: 3 additions & 2 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,12 +90,12 @@ def test_probe_video_inputs_wrong_type():
def test_parse_video_inputs(request, file, roi, target_size, target_fps):
if file:
test_video_path = request.getfixturevalue('test_video_path')
parsed, fps_in, video_shape_in, ds_factor = parse_video_inputs(
parsed, fps_in, video_shape_in, ds_factor, idxs = parse_video_inputs(
test_video_path, roi=roi, target_size=target_size, target_fps=target_fps)
else:
test_video_ndarray = request.getfixturevalue('test_video_ndarray')
test_video_fps = request.getfixturevalue('test_video_fps')
parsed, fps_in, video_shape_in, ds_factor = parse_video_inputs(
parsed, fps_in, video_shape_in, ds_factor, idxs = parse_video_inputs(
test_video_ndarray, fps=test_video_fps, roi=roi, target_size=target_size,
target_fps=target_fps)
assert parsed.shape == (360 if target_fps is None else 360 // 2,
Expand All @@ -105,6 +105,7 @@ def test_parse_video_inputs(request, file, roi, target_size, target_fps):
assert fps_in == 30
assert video_shape_in == (360, 480, 768, 3)
assert ds_factor == 1 if target_fps is None else 2
assert idxs == list(range(360)) if target_fps is None else list(range(0, 360, 2))

def test_parse_video_inputs_no_file():
with pytest.raises(Exception):
Expand Down
10 changes: 8 additions & 2 deletions vitallens/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def __init__(
method: Method = Method.VITALLENS,
api_key: str = None,
detect_faces: bool = True,
fdet_max_faces: int = 2,
fdet_max_faces: int = 1,
fdet_fs: float = 1.0,
fdet_score_threshold: float = 0.9,
fdet_iou_threshold: float = 0.3
Expand All @@ -65,6 +65,7 @@ def __init__(
self.api_key = api_key
# Load the config and model
self.config = load_config(method.name.lower() + ".yaml")
self.method = method
if self.config['model'] == 'g':
self.rppg = GRPPGMethod(self.config)
elif self.config['model'] == 'chrom':
Expand Down Expand Up @@ -151,10 +152,15 @@ def __call__(
"""
# Probe inputs
inputs_shape, fps = probe_video_inputs(video=video, fps=fps)
# TODO: Optimize performance of simple rPPG methods for long videos
# Warning if using long video
target_fps = override_fps_target if override_fps_target is not None else self.rppg.fps_target
if self.method != Method.VITALLENS and inputs_shape[0]/fps*target_fps > 3600:
logging.warn("Inference for long videos has yet to be optimized for POS / G / CHROM. This may run out of memory and crash.")
_, height, width, _ = inputs_shape
if self.detect_faces:
# Detect faces
faces_rel, _ = self.face_detector(inputs=video, fps=fps)
faces_rel, _ = self.face_detector(inputs=video, inputs_shape=inputs_shape, fps=fps)
# If no faces detected: return empty list
if len(faces_rel) == 0:
logging.warn("No faces to analyze")
Expand Down
2 changes: 1 addition & 1 deletion vitallens/methods/simple_rppg_method.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def __call__(
u_roi = merge_faces(faces)
faces = faces - [u_roi[0], u_roi[1], u_roi[0], u_roi[1]]
# Parse the inputs
frames_ds, fps, inputs_shape, ds_factor = parse_video_inputs(
frames_ds, fps, inputs_shape, ds_factor, _ = parse_video_inputs(
video=frames, fps=fps, target_size=None, roi=u_roi,
target_fps=override_fps_target if override_fps_target is not None else self.fps_target)
assert inputs_shape[0] == faces.shape[0], "Need same number of frames as face detections"
Expand Down
9 changes: 6 additions & 3 deletions vitallens/methods/vitallens.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,8 @@ def __call__(
(faces[:,3] - faces[:,1]) * 0.5 < np.maximum(0, faces[:,1] - roi[1]) + np.maximum(0, faces[:,3] - roi[3]))):
logging.warn("Large face movement detected")
# Parse the inputs
frames_ds, fps, inputs_shape, ds_factor = parse_video_inputs(
logging.debug("Preparing video for inference...")
frames_ds, fps, inputs_shape, ds_factor, _ = parse_video_inputs(
video=frames, fps=fps, target_size=self.input_size, roi=roi,
target_fps=override_fps_target if override_fps_target is not None else self.fps_target,
library='prpy', scale_algorithm='bilinear')
Expand All @@ -95,6 +96,7 @@ def __call__(
split_len = math.ceil((ds_len + (n_splits-1) * API_OVERLAP) / n_splits)
start_idxs = [i for i in range(0, ds_len - n_splits * API_OVERLAP, split_len - API_OVERLAP)]
end_idxs = [min(i + split_len, ds_len) for i in start_idxs]
logging.info("Running inference for {} frames using {} requests...".format(ds_len, n_splits))
# Process the splits in parallel
with concurrent.futures.ThreadPoolExecutor() as executor:
results = list(executor.map(lambda i: self.process_api(frames_ds[start_idxs[i]:end_idxs[i]]), range(n_splits)))
Expand Down Expand Up @@ -211,8 +213,9 @@ def postprocess(self, sig, fps, type='ppg', filter=True):
Lambda = detrend_lambda_for_rr_response(fps)
else:
raise ValueError("Type {} not implemented!".format(type))
# Detrend
sig = detrend(sig, Lambda)
if sig.shape[-1] < 4 * API_MAX_FRAMES:
# Detrend only for shorter videos for performance reasons
sig = detrend(sig, Lambda)
# Moving average
sig = moving_average(sig, size)
# Standardize
Expand Down
Loading

0 comments on commit 92b6728

Please sign in to comment.