-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmi-apriori.py
249 lines (195 loc) · 8.53 KB
/
mi-apriori.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# File : mi-apriori.py
# Author : Kaushik S Kalmady, Siddharth V
# Date : 21.11.2018
# Last Modified Date: 21.11.2018
# Last Modified By : Kaushik S Kalmady
"""
Implements the multi time interval apriori algorithm
"""
from __future__ import print_function
import sys
import logging
from copy import deepcopy as copy
from interval import make_table
from apriori_utils import joinable, joinC2, joinCk
from apriori_utils import contains, generate_one_itemsets
from config import TIME_INTERVALS, MIN_SUP, DB
#time_interval_matrix = make_table(TIME_INTERVALS)
#one_itemsets = generate_one_itemsets(DB, MIN_SUP)
logging.basicConfig(level=logging.DEBUG)
def show(sequences):
for seq in sequences:
seq.display()
class MultiTimeIntervalApriori:
"""Implements the Multi Time Interval Apriori Algorithm
multi-time-interval-sequences will be referred to as MTIS in comments"""
def __init__(self, db=DB, timeIntervals=TIME_INTERVALS, min_sup=MIN_SUP):
self.__db = db
self.__timeIntervals = timeIntervals
self.__timeIntervalMatrix = make_table(self.__timeIntervals)
self.__min_sup = min_sup
self.__oneItemsets = generate_one_itemsets(self.__db, self.__min_sup)
self.__frequentSequences = dict()
def getFrequentSequences(self, k=None):
if k is None:
return self.__frequentSequences
return self.__frequentSequences[k]
def support(self, sequence, verbose=False):
"""Returns support of multiTimeIntervalSequence sequence in db
This is equal to number of rows containing sequence
divided by total number of rows"""
item_in_rows = 0.0
num_rows = len(self.__db)
transactions = []
for row in self.__db:
if contains(row, sequence):
item_in_rows += 1
transactions.append(row)
if verbose:
print("The given sequence is present in the following transactions")
for idx, row in enumerate(transactions):
print("{0} : {1}".format(idx, row))
return item_in_rows/num_rows
def run_apriori(self, max_sequence_length=4, verbose=False):
"""Stub function to run the apriori algorithm.
Arguments:
max_sequence_length: Maximum multiTimeIntervalSequence length
"""
print("Generating frequent 2-MTIS")
self.generateC2(verbose=verbose)
for k in range(3, max_sequence_length + 1):
print("Generating {0}-MTIS".format(k))
# Get all Ck-1 MTIS
kminus1_sequences = self.__frequentSequences[k-1]
current_ksequence_list = []
# Check if two Ck-1 MTIS are joinable and join
for sequence_1 in kminus1_sequences:
for sequence_2 in kminus1_sequences:
if joinable(sequence_1, sequence_2):
# This can generate multiple Ck sequences
new_k_sequences = joinCk(sequence_1, sequence_2,
self.__timeIntervalMatrix)
current_ksequence_list += new_k_sequences
logging.debug("Sequences Generated : {0}".format(len(current_ksequence_list)))
# Choose only frequent Ck MTIS
current_ksequence_list = [sequence
for sequence in current_ksequence_list
if self.support(sequence) >= self.__min_sup]
logging.debug("Frequent Sequences Generated : {0}".format(len(current_ksequence_list)))
# Prune
current_ksequence_list = self.prune(current_ksequence_list, k)
logging.debug("Pruned Sequences Generated : {0}".format(len(current_ksequence_list)))
self.__frequentSequences[k] = current_ksequence_list
if verbose:
show(current_ksequence_list)
def generateC2(self, verbose=False):
two_multi_sequences = joinC2(self.__oneItemsets, self.__timeIntervals)
frequent_two_sequences = [sequence
for sequence in two_multi_sequences
if self.support(sequence) >= self.__min_sup]
self.__frequentSequences[2] = frequent_two_sequences
if verbose:
show(frequent_two_sequences)
def prune(self, sequence_list, k):
""" Returns the pruned list of (k)multiTimeIntervalSequence
Arguments:
sequence_list: List of (k)multiTimeIntervalSequence
"""
def gen_sub_sequences(sequence):
""" Returns the list of all (k-1)sub_sequence of the give (k)multiTimeIntervalSequence
Arguments:
sequence: (k)multiTimeIntervalSequence
"""
subsequences = []
for i in range(sequence.length):
t_items = copy(sequence.items)
t_intervals = copy(sequence.intervals)
del t_items[i]
if i != 0 and i != sequence.length-1:
del t_intervals[i-1]
for j in range(i-1, len(t_intervals)):
del t_intervals[j][i]
elif i == sequence.length-1:
del t_intervals[i-1]
elif i == 0:
del t_intervals[0]
for j in range(i, len(t_intervals)):
del t_intervals[j][i]
t_sequence = multiTimeIntervalSequence(t_items, t_intervals)
subsequences.append(t_sequence)
return subsequences
def checkAllSubsequencesExist(sequence, k):
sub_sequences = gen_sub_sequences(sequence)
for sub_sequence in sub_sequences:
if sub_sequence not in self.__frequentSequences[k-1]:
return False
return True
return [sequence
for sequence in sequence_list
if checkAllSubsequencesExist(sequence, k)]
if __name__=="__main__":
import argparse
parser = argparse.ArgumentParser(description="MI Apriori Algorithm")
parser.add_argument("--example", help="specify which example to run", type=int)
parser.add_argument("--minsup", help="specify minimum support",
nargs='?', const=MIN_SUP, type=float)
args = vars(parser.parse_args())
exid = args["example"]
min_sup = args["minsup"]
from apriori_utils import multiTimeIntervalSequence
m = MultiTimeIntervalApriori(min_sup=min_sup)
def t(idx):
return TIME_INTERVALS[idx]
def example1():
print("=======================================")
print(" RUNNING EXAMPLE 1 ")
print(" Check Support for example sequences ")
I1 = ['b', 'e', 'c']
T1 = [[t(1)], [t(3), t(2)]]
seq1 = multiTimeIntervalSequence(I1, T1)
print("\nSequence 1")
seq1.display()
print("Support for Sequence 1 : {0}".format(m.support(seq1, verbose=True)))
print('\n')
I1 = ['a', 'c']
T1 = [[t(2)]]
seq1 = multiTimeIntervalSequence(I1, T1)
print("Sequence 2")
seq1.display()
print("Support for Sequence 2 : {0}".format(m.support(seq1, verbose=True)))
print("=======================================")
def example2():
print("=======================================")
print(" RUNNING EXAMPLE 2 ")
print(" Join 2 sequences ")
I1 = ['b', 'c', 'd']
T1 = [[t(1)], [t(1), t(1)]]
seq1 = multiTimeIntervalSequence(I1, T1)
print("Sequence 1")
seq1.display()
print('\n')
I1 = ['c', 'd', 'e']
T1 = [[t(1)], [t(2), t(1)]]
seq2 = multiTimeIntervalSequence(I1, T1)
print("Sequence 2")
seq2.display()
print('\n')
timeIntervalMatrix = make_table(TIME_INTERVALS)
new_k_sequences = joinCk(seq1, seq2,timeIntervalMatrix)
print("Joined Sequence")
show(new_k_sequences)
print("=======================================")
def example3():
print("=======================================")
print(" RUNNING EXAMPLE 3 ")
print(" Running MI Apriori on sample data ")
m.run_apriori(max_sequence_length=6, verbose=True)
print("=======================================")
if exid == 1:
example1()
elif exid == 2:
example2()
elif exid == 3:
example3()