-
Notifications
You must be signed in to change notification settings - Fork 33
/
Copy pathdataset_stats.py
49 lines (37 loc) · 1.36 KB
/
dataset_stats.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import csv
import json
file_clips = './HACS_v1.1.1/HACS_clips_v1.1.1.csv'
file_segments = './HACS_v1.1.1/HACS_segments_v1.1.1.json'
subsets = ['training', 'validation', 'testing']
# HACS Clips statistics
def parse_clips():
print('====Parsing clips====')
videos = {subset: set() for subset in subsets}
n_clips = {subset: 0 for subset in subsets}
with open(file_clips, 'r') as f:
reader = csv.reader(f, delimiter=',')
next(reader)
for row in reader:
classname, vid, subset, start, end, label = row
videos[subset].add(vid)
n_clips[subset] += 1
for subset in subsets:
print('[{} set]: {} videos, {} clips'.format(
subset, len(videos[subset]), n_clips[subset]))
# HACS Segments statistics
def parse_segments():
print('====Parsing segments====')
dataset_segments = json.load(open(file_segments, 'r'))['database']
n_videos = {subset: 0 for subset in subsets}
# n_segments = {subset: 0 for subset in subsets}
for vid, info in dataset_segments.items():
subset = info['subset']
n_videos[subset] += 1
# n_segments[subset] += len(info['annotations'])
for subset in subsets:
print('[{} set]: {} videos'.format(
subset, n_videos[subset]))
if __name__ == '__main__':
parse_clips()
parse_segments()
print('Done.')