-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathremove_duplicates.py
49 lines (45 loc) · 1.69 KB
/
remove_duplicates.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import argparse
from config import *
from post_process import remove_duplicates
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="This script is used to convert video frames into slide PDFs."
)
parser.add_argument("-f", "--folder", help="Path to the image folder", type=str)
parser.add_argument(
"-hf",
"--hash-func",
help="Hash function to use for image hashing. Only effective if post-processing is enabled",
default=HASH_FUNC,
choices=["dhash", "phash", "ahash"],
type=str,
)
parser.add_argument(
"-hs",
"--hash-size",
help="Hash size to use for image hashing. Only effective if post-processing is enabled",
default=HASH_SIZE,
choices=[8, 12, 16],
type=int,
)
parser.add_argument(
"--threshold",
help="Minimum similarity threshold (in percent) to consider 2 images to be similar. Only effective if post-processing is enabled",
default=SIM_THRESHOLD,
choices=range(80, 101),
type=int,
)
parser.add_argument(
"-q",
"--queue-len",
help="Number of history images used to find out duplicate image. Only effective if post-processing is enabled",
default=HASH_BUFFER_HISTORY,
type=int,
)
args = parser.parse_args()
queue_len = args.queue_len
hash_size = args.hash_size
hash_func = HASH_FUNC_DICT.get(args.hash_func)
sim_threshold = args.threshold
diff_threshold = int(hash_size * hash_size * (100 - sim_threshold) / 100)
remove_duplicates(args.folder, hash_size, hash_func, queue_len, diff_threshold)