-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsimilarity_buckets.py
129 lines (106 loc) · 3.55 KB
/
similarity_buckets.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import json
import math
import numpy as np
import os
import random
import calc
import getmaps
import getbuckets
import getsrs
def manhattan(a, b):
return sum(abs(a[i] - b[i]) for i in range(len(a)))
def euclidean(a, b):
return math.sqrt(sum((a[i] - b[i]) ** 2 for i in range(len(a))))
def get_buckets(buckets_file):
buckets = {}
with open(buckets_file, 'r') as f:
lines = f.readlines()
for i in range(0, len(lines), 2):
buckets[int(lines[i])] = np.array(eval(lines[i+1]))
return buckets
def kl_divergence(p, q):
return np.sum(np.where(p*q != 0, np.log(p / q), 0))
def min_similarity(p, q):
return np.sum(np.minimum(p, q))
def get_similarity(b1, b2):
tol = 0.025 # ms tolerance (% of time)
sim = 0
for t1 in b1:
for t2 in b2:
tol_ms = (t1 + t2) / 2 * tol
t_corr = min(max(0, 10 + tol_ms - abs(t1 - t2)), 10) / 10
sim += t_corr * min_similarity(b1[t1], b2[t2])
return sim
def get_similar(id, n=50, filters=None):
text = getmaps.get_map(id)
dist = calc.get_distribution_raw(text)
bkts = getbuckets.get_buckets_raw(dist)
key = str(id)
if key in srs:
sr = srs[key]
else:
chars = '1234567890qwertyuiopasdfghjklzxcvbnm'
temp_filename = ''.join(chars[random.randrange(len(chars))] for _ in range(10)) + '.osu'
with open(temp_filename, 'w', encoding='utf8', newline='') as f:
f.write(text)
sr = getsrs.get_sr_file(temp_filename)
os.remove(temp_filename)
similarities = []
def get_stat(id, key):
if key == 'id':
return int(id)
elif key in ['sr', 'star', 'stars']:
return getsrs.get_sr(id)[0]
elif key in ['aim', 'aimsr']:
return getsrs.get_sr(id)[1]
elif key in ['tap', 'tapsr']:
return getsrs.get_sr(id)[2]
elif id in stats and key in stats[id]:
return stats[id][key]
return None
for file in all_buckets:
if file.startswith(str(id)):
continue
if filters:
valid = True
funcs = {
'!=': lambda x, y: x != y,
'>=': lambda x, y: x >= y,
'<=': lambda x, y: x <= y,
'>': lambda x, y: x > y,
'<': lambda x, y: x < y,
'=': lambda x, y: x == y
}
for fil in filters:
key, operator, value = fil
if not funcs[operator](get_stat(file[:-5], key), value):
valid = False
break
if not valid:
continue
if not sr:
similarities.append((file, get_similarity(bkts, all_buckets[file])))
else:
key = file[:-5]
if key not in srs:
continue
if euclidean(srs[key][:2], sr[:2]) <= 0.5:
similarities.append((key, get_similarity(bkts, all_buckets[file]), euclidean(srs[key][:2], sr[:2])))
similarities.sort(key=lambda s: -s[1])
return similarities[:min(len(similarities), n)]
def get_all_buckets():
buckets = {}
bkts_dir = 'buckets'
for entry in os.scandir(bkts_dir):
if entry.is_file():
temp_bkts = get_buckets(entry.path)
buckets[entry.name] = temp_bkts
return buckets
all_buckets = get_all_buckets()
srs = getsrs.get_srs()
with open('stats.json') as fin:
stats = json.load(fin)
if __name__ == '__main__':
import time
start = time.time()
print(get_similar(2659353), time.time() - start)