-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathmodel.py
83 lines (64 loc) · 2.78 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
"""the simple baseline for autograph"""
import time
import numpy as np
from ag.system_ext import suppres_all_output
from env_utils import prepare_env
N_TOP = 3
class Model:
def __init__(self):
prepare_env()
import ray
ray.init(
num_gpus=1, num_cpus=5, memory=1e10, object_store_memory=1e10,
configure_logging=False, ignore_reinit_error=True,
log_to_driver=True,
include_webui=False
)
def train_predict(self, data, time_budget, n_class, schema):
start_time = time.time()
from ag.worker_executor import Executor
from ag.pyg_model import SEARCH_SPACE_FLAT, PYGModel, create_factory_method
from ag.pyg_utils import generate_pyg_data
from dgl import DGLGraph
import torch
print(data['fea_table'])
print(data['fea_table'].shape)
n_edge = len(data['edge_file'])
print('n_edge', n_edge)
n_cv = 1 if n_edge > 400000 else 2
data = generate_pyg_data(data, n_cv)
print('EDGE WEIGHTS', data.edge_weight, data.edge_weight.mean(), data.edge_weight.sum(), data.edge_weight.max(), data.edge_weight.shape)
g = DGLGraph((data.edge_index[1], data.edge_index[0]))
# g.add_edges(data.edge_index[1], data.edge_index[0])
print('DATAINFO', data, time_budget, n_class)
base_class = create_factory_method(n_classes=n_class)
executor = Executor(5 if n_edge < 400000 else 1, base_class, data, g, gpu_per_trial=0.2)
print('CONFIG', len(SEARCH_SPACE_FLAT))
SEARCH_SPACE_FLAT = SEARCH_SPACE_FLAT * 2
for config in SEARCH_SPACE_FLAT:
def func(base_class, data, g):
model = base_class(**config)
y_pred, score = model.fit_predict(data, g)
return y_pred, score
executor.apply(func, name={str(k): str(v) for k, v in config.items()})
results = []
while (len(results) != len(SEARCH_SPACE_FLAT)) and ((time.time() - start_time) < (time_budget - 4)):
r = executor.get(timeout=2)
if r is not None:
results.append(r)
sresults = list(sorted(results, key=lambda x: -x[1][1]))
print(len(results), time.time() - start_time)
print('\n'.join([f'{r[0]} {r[1][1]}' for r in sresults]))
predictions = np.vstack([r[1][0] for r in sresults[:N_TOP] if r[1][1] > sresults[0][1][1] - 0.02])
print(predictions.shape)
from scipy.stats import gmean
predictions = gmean(predictions, axis=0)
return predictions.argmax(axis=1)
def __del__(self):
try:
with suppres_all_output():
import ray
ray.shutdown()
time.sleep(1)
except:
pass