Skip to content

Commit

Permalink
DANMF
Browse files Browse the repository at this point in the history
  • Loading branch information
benedekrozemberczki committed Dec 8, 2019
1 parent 7a15d36 commit 749de44
Show file tree
Hide file tree
Showing 4 changed files with 184 additions and 11 deletions.
34 changes: 25 additions & 9 deletions examples.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,42 @@
"""Example runs with Karate Club."""

import networkx as nx
from karateclub import EgoNetSplitter, EdMot

g = nx.newman_watts_strogatz_graph(1000, 20, 0.05)
from karateclub import EgoNetSplitter, EdMot, DANMF

#------------------------------------
# Splitter example
#------------------------------------

splitter = EgoNetSplitter(1.0)
g = nx.newman_watts_strogatz_graph(100, 20, 0.05)


model = EgoNetSplitter(1.0)

splitter.fit(g)
model.fit(g)

print(splitter.get_memberships())
print(model.get_memberships())

#------------------------------------
# Edmot example
#------------------------------------

edmot = EdMot(2, 0.5)
g = nx.newman_watts_strogatz_graph(100, 10, 0.9)

model = EdMot(3, 0.5)

model.fit(g)

print(model.get_memberships())


#------------------------------------
# DANMF example
#------------------------------------

g = nx.newman_watts_strogatz_graph(100, 10, 0.02)

model = DANMF()

edmot.fit(g)
model.fit(g)

print(edmot.get_memberships())
print(model.get_memberships())
1 change: 1 addition & 0 deletions karateclub/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
from karateclub.ego_splitter import EgoNetSplitter
from karateclub.edmot import EdMot
from karateclub.danmf import DANMF
154 changes: 154 additions & 0 deletions karateclub/danmf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
"""DANMF class."""

import numpy as np
from tqdm import tqdm
import networkx as nx
from sklearn.decomposition import NMF

class DANMF(object):
"""
Deep autoencoder-like non-negative matrix factorization class.
"""
def __init__(self, layers=[32, 8], pre_iterations=100, iterations=100, seed=42, lamb=0.01):
"""
Initializing a DANMF object.
"""
self.layers = layers
self.pre_iterations = pre_iterations
self.iterations = iterations
self.seed = seed
self.lamb = lamb
self.p = len(self.layers)


def _setup_target_matrices(self, graph):
self.graph = graph
self.A = nx.adjacency_matrix(self.graph)
self.L = nx.laplacian_matrix(self.graph)
self.D = self.L+self.A

def _setup_z(self, i):
"""
Setup target matrix for pre-training process.
"""
if i == 0:
self.Z = self.A
else:
self.Z = self.V_s[i-1]

def _sklearn_pretrain(self, i):
"""
Pretraining a single layer of the model with sklearn.
:param i: Layer index.
"""
nmf_model = NMF(n_components=self.layers[i],
init="random",
random_state=self.seed,
max_iter=self.pre_iterations)

U = nmf_model.fit_transform(self.Z)
V = nmf_model.components_
return U, V

def _pre_training(self):
"""
Pre-training each NMF layer.
"""
print("\nLayer pre-training started. \n")
self.U_s = []
self.V_s = []
for i in tqdm(range(self.p), desc="Layers trained: ", leave=True):
self._setup_z(i)
U, V = self._sklearn_pretrain(i)
self.U_s.append(U)
self.V_s.append(V)

def _setup_Q(self):
"""
Setting up Q matrices.
"""
self.Q_s = [None for _ in range(self.p+1)]
self.Q_s[self.p] = np.eye(self.layers[self.p-1])
for i in range(self.p-1, -1, -1):
self.Q_s[i] = np.dot(self.U_s[i], self.Q_s[i+1])

def _update_U(self, i):
"""
Updating left hand factors.
:param i: Layer index.
"""
if i == 0:
R = self.U_s[0].dot(self.Q_s[1].dot(self.VpVpT).dot(self.Q_s[1].T))
R = R+self.A_sq.dot(self.U_s[0].dot(self.Q_s[1].dot(self.Q_s[1].T)))
Ru = 2*self.A.dot(self.V_s[self.p-1].T.dot(self.Q_s[1].T))
self.U_s[0] = (self.U_s[0]*Ru)/np.maximum(R, 10**-10)
else:
R = self.P.T.dot(self.P).dot(self.U_s[i]).dot(self.Q_s[i+1]).dot(self.VpVpT).dot(self.Q_s[i+1].T)
R = R+self.A_sq.dot(self.P).T.dot(self.P).dot(self.U_s[i]).dot(self.Q_s[i+1]).dot(self.Q_s[i+1].T)
Ru = 2*self.A.dot(self.P).T.dot(self.V_s[self.p-1].T).dot(self.Q_s[i+1].T)
self.U_s[i] = (self.U_s[i]*Ru)/np.maximum(R, 10**-10)

def _update_P(self, i):
"""
Setting up P matrices.
:param i: Layer index.
"""
if i == 0:
self.P = self.U_s[0]
else:
self.P = self.P.dot(self.U_s[i])

def _update_V(self, i):
"""
Updating right hand factors.
:param i: Layer index.
"""
if i < self.p-1:
Vu = 2*self.A.dot(self.P).T
Vd = self.P.T.dot(self.P).dot(self.V_s[i])+self.V_s[i]
self.V_s[i] = self.V_s[i] * Vu/np.maximum(Vd, 10**-10)
else:
Vu = 2*self.A.dot(self.P).T+(self.lamb*self.A.dot(self.V_s[i].T)).T
Vd = self.P.T.dot(self.P).dot(self.V_s[i])
Vd = Vd + self.V_s[i]+(self.lamb*self.D.dot(self.V_s[i].T)).T
self.V_s[i] = self.V_s[i] * Vu/np.maximum(Vd, 10**-10)

def _setup_VpVpT(self):
self.VpVpT = self.V_s[self.p-1].dot(self.V_s[self.p-1].T)

def _setup_Asq(self):
self.A_sq = self.A.dot(self.A.T)

def get_embedding(self):
"""
Get embedding matrix.
"""
embedding = [np.array(range(self.P.shape[0])).reshape(-1, 1), self.P, self.V_s[-1].T]
embedding = np.concatenate(embedding, axis=1)
return embedding

def get_memberships(self):
"""
Get cluster membership.
"""
index = np.argmax(self.P, axis=1)
membership = {int(i): int(index[i]) for i in range(len(index))}
return membership

def fit(self, graph):
"""
Training process after pre-training.
"""
print("\n\nTraining started. \n")

self._setup_target_matrices(graph)
self._pre_training()
self._setup_Asq()
for iteration in tqdm(range(self.iterations), desc="Training pass: ", leave=True):
self._setup_Q()
self._setup_VpVpT()
for i in range(self.p):
self._update_U(i)
self._update_P(i)
self._update_V(i)
6 changes: 4 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,22 @@
from distutils.core import setup

setup(
name = "karateclub",
packages = ["karateclub"],
version = "0.10",
version = "0.11",
license = "MIT",
description = "A general purpose library for community detection and graph clustering research.",
author = "Benedek Rozemberczki",
author_email = "benedek.rozemberczki@gmail.com",
url = "https://github.com/benedekrozemberczki/karateclub",
download_url = "https://github.com/benedekrozemberczki/karateclub/archive/v_010.tar.gz",
download_url = "https://github.com/benedekrozemberczki/karateclub/archive/v_011.tar.gz",
keywords = ["community", "detection", "networkx", "graph", "clustering"],
install_requires=[
"numpy",
"networkx",
"tqdm",
"python-louvain",
"sklearn",
],
classifiers=[
"Development Status :: 3 - Alpha",
Expand Down

0 comments on commit 749de44

Please sign in to comment.