-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathFLTR_pred.py
181 lines (159 loc) · 6.41 KB
/
FLTR_pred.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
"""
Created on Wed Apr 29 2020
@author: Laura Iacovissi
"""
import time
import datetime
import argparse
import numpy as np
import pandas as pd
from numba import jit
from itertools import product
from multiprocessing import Pool
@jit(nopython=True)
def FLTM(t, Q, exp_level, influence, state, total, nodes, pred, n, G):
# vectorized version of the influence expantion
for i in range(n):
neigh = np.array([False]*n)
# dequeue
v = Q[0]
Q = Q[1:]
# define neighborhood mask
for j in list(np.nonzero(G[v,:])[0]):
neigh[j] = True
# update expantion levels
exp_level[~state & neigh] = exp_level[v] + 1
# update influence values
influence[~state & neigh] += 1
# define activation mask
activated = ~state & neigh & (influence > pred * t)
# update state values
state[activated] = True
# update counter of activated nodes
total += np.sum(activated)
# enqueue the activated nodes
act = nodes[activated]
Q = np.concatenate((Q, act))
if Q.size == 0:
break
return total, max(exp_level), np.mean(exp_level)
def expand_influence(n_job, args):
'''
This function computes the FLTR metric for the x node in the G graph.
INPUT
G : numpy, adjacency matrix of a graph (n x n). i -> j iff A{i,j} = 1
x : int, node of interest
t : float, resistance values (constant on nodes)
n : int, graph size
OUTPUT
total : int, FLTR(x)
max(exp_level): int, maximum expantion level reached during the computation
mean(exp_level): int, mean expantion level reached during the computation
'''
G, x, t, n, jobs = args
# info
print('Jobs {}/{}'.format(int(n_job), int(jobs)))
# save nodes in an numpy array
nodes = np.arange(n)
# compute the activation set for the node of interest
X = list(np.nonzero(G[x,:])[0]) + [x]
# initialize counter for the active nodes
total = len(X)
# list (queue) of active nodes
Q = np.array(sorted(X))
# node states (active = True, not active = False)
state = np.array([v in X for v in nodes])
# node incoming influence (starting from zero, at most n)
influence = np.array([0] * n)
# node expantion level (starting from 0 if in X, else -1. worst case: n)
exp_level = np.array([-int(not v in X) for v in nodes])
# number of predecessors for each node
pred = np.array([len(np.nonzero(G[:,v])[0]) for v in nodes])
return FLTM(t, Q, exp_level, influence, state, total, nodes, pred, n, G)
def run_simulation_parallel(params):
# load probabilities p_i
with open('data/keys_gc_{}.txt'.format(params.n), 'r') as f:
prob = eval(f.read())
# pick the probability of interest
p = prob[params.p]
del prob
# load resistance values
res = np.load('data/res_phase1.npy')
# check the directed value
if params.d:
lab = 'dir'
else:
lab = 'und'
# load graphs G(N,p_i) from adjacency matrices
matrices = np.load('data/graphs/graph_{}_{}_{}.npy'.format(params.n, p, lab))
matrices = [ matrices[i, :, :] for i in range(params.k) ] # convert the data in an iterable
# select the nodes of interest
if params.do_sample:
nodes = np.floor(params.n * np.random.rand(params.sample)).astype(int) # pick randomly some nodes
else:
nodes = np.arange(params.n) # use all available nodes
# info
start_time = time.time()
# run in parallel the expantion on a fixed value of p_i and save the outputs
pool = Pool() # initialize the constructor
# compute number of jobs
n_jobs = params.k * len(nodes) * len(res)
# associate processes to args
out = pd.DataFrame.from_records({
'args' : list(product(range(params.k), nodes, res)) ,
'output' : pool.starmap(expand_influence,
enumerate(product(matrices, nodes, res, [params.n], [n_jobs])))
})
# output converted in a dataframe
raw_data = pd.DataFrame.from_records(
out.apply(lambda x: [x.args[0],x.args[1],x.args[2],x.output[0],x.output[1],x.output[2]],axis=1),
columns= ['realization', 'node', 'resistance', 'metric', 'max_level', 'avg_level'])
del out
raw_data.to_csv('data/pred/data_{}_{}_{}.csv'.format(lab, params.n, p))
# statistics per node (double index: resistance and node)
data_per_node = raw_data.groupby('resistance').apply(lambda x: x[['metric', 'max_level', 'avg_level', 'node']].groupby('node').mean())
data_per_node.to_csv('data/pred/data_node_{}_{}_{}.csv'.format(lab, params.n, p))
del data_per_node
# statistics per graph G(n,p,t) (single index: resistance)
data_per_prob = raw_data.groupby('resistance').mean()[['metric', 'max_level', 'avg_level']]
data_per_prob.to_csv('data/pred/data_graph_{}_{}_{}.csv'.format(lab, params.n, p))
del data_per_prob
del raw_data
# close the constructor
pool.close()
# info
end_time = time.time()
uptime = end_time - start_time
human_uptime = datetime.timedelta(seconds=uptime)
print()
print("Size: {} \n Total uptime: {} \n".format(params.n, human_uptime))
if __name__ == "__main__":
# define arguments
parser = argparse.ArgumentParser()
# graph size
parser.add_argument('--n', type=int)
# index of the probability to use
parser.add_argument('--p', type=int)
# directed or not
parser.add_argument('--dir', dest='d', action='store_true')
parser.add_argument('--und', dest='d', action='store_false')
parser.set_defaults(d=True)
# do node sample or not
parser.add_argument('--yes_sample', dest='do_sample', action='store_true')
parser.add_argument('--no_sample', dest='do_sample', action='store_false')
parser.set_defaults(do_sample=False)
# node sample size
parser.add_argument('--sample', type=int, default=5000)
# number of samples for Gnp
parser.add_argument('--k', type=int, default=50)
'''
# Not impremented in the FLTR
# if weighted
parser.add_argument('--weighted', type=bool, default=False)
# weight interval
parser.add_argument('--a', type=int, default=0)
parser.add_argument('--b', type=int, default=1)
'''
# parse arguments to dictionary
args = parser.parse_args()
run_simulation_parallel(args)