Skip to content

Commit

Permalink
Merge pull request #88 from GiulioRossetti/karate_club_integration
Browse files Browse the repository at this point in the history
Methods integration
  • Loading branch information
GiulioRossetti authored Jan 30, 2020
2 parents 6358e44 + 472ba90 commit 1a54ecd
Show file tree
Hide file tree
Showing 18 changed files with 409 additions and 37 deletions.
7 changes: 4 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,15 @@
[![Language grade: Python](https://img.shields.io/lgtm/grade/python/g/GiulioRossetti/nclib.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/GiulioRossetti/nclib/context:python)
[![pyversions](https://img.shields.io/pypi/pyversions/cdlib.svg)](https://badge.fury.io/py/cdlib)
[![PyPI version](https://badge.fury.io/py/cdlib.svg)](https://badge.fury.io/py/cdlib)
[![PyPI download month](https://img.shields.io/pypi/dm/cdlib.svg?color=blue&style=plastic)](https://pypi.python.org/pypi/cdlib/)
[![DOI](https://zenodo.org/badge/159944561.svg)](https://zenodo.org/badge/latestdoi/159944561)
[![FOSSA Status](https://app.fossa.io/api/projects/git%2Bgithub.com%2FGiulioRossetti%2Fcdlib.svg?type=shield)](https://app.fossa.io/projects/git%2Bgithub.com%2FGiulioRossetti%2Fcdlib?ref=badge_shield)


``CDlib`` provides implementations of several community discovery algorithms.
Moreover, it implements a wide set of partition evaluation measures as well as predefined visualization facilities.
``CDlib`` is a meta-library for community discovery in complex networks: it implements algorithms, clustering fitness functions as well as visualization facilities.
Moreover

``CDlib`` is designed around the ``networkx`` python library: however, when needed, it takes care to authomatically convert (from and to) ``igraph`` object so to provide an abstraction on specific algorithm implementations to the final user.
``CDlib`` is designed around the ``networkx`` python library: however, when needed, it takes care to automatically convert (from and to) ``igraph`` object so to provide an abstraction on specific algorithm implementations to the final user.

``CDlib`` provides a standardized input/output facilities for several Community Discovery algorithms: whenever possible, to guarantee literature coherent results, implementations of CD algorithms are inherited from their original projects (see for instance [Infomap](https://pypi.org/project/infomap/),
[Louvain](https://github.com/taynaud/python-louvain), [Leiden](https://github.com/vtraag/leidenalg)).
Expand Down
1 change: 1 addition & 0 deletions cdlib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
from cdlib.classes.edge_clustering import EdgeClustering
from cdlib.classes.fuzzy_node_clustering import FuzzyNodeClustering
from cdlib.classes.attr_node_clustering import AttrNodeClustering
from cdlib.classes.bipartite_node_clustering import BiNodeClustering
1 change: 1 addition & 0 deletions cdlib/algorithms/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
from .crisp_partition import *
from .overlapping_partition import *
from .attribute_clustering import *
from .biparitte_clustering import *
41 changes: 41 additions & 0 deletions cdlib/algorithms/biparitte_clustering.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
from BiMLPA import BiMLPA_SqrtDeg, relabeling, output_community
from cdlib import BiNodeClustering

import networkx as nx
from cdlib.utils import convert_graph_formats


__all__ = ['bimlpa']


def bimlpa(g, theta=0.3, lambd=7):
"""
BiMLPA is designed to detect the many-to-many correspondence community in bipartite networks using multi-label propagation algorithm.
:param g: a networkx/igraph object
:param theta: Label weights threshold. Default 0.3.
:param lambd: The max number of labels. Default 7.
:return: BiNodeClustering object
:Example:
>>> from cdlib import algorithms
>>> import networkx as nx
>>> G = nx.karate_club_graph()
>>> coms = algorithms.bimlpa(G)
:References:
Taguchi, Hibiki, Tsuyoshi Murata, and Xin Liu. "BiMLPA: Community Detection in Bipartite Networks by Multi-Label Propagation." International Conference on Network Science. Springer, Cham, 2020.
.. note:: Reference implementation: https://github.com/hbkt/BiMLPA
"""
g = convert_graph_formats(g, nx.Graph)

bimlpa = BiMLPA_SqrtDeg(g, theta, lambd)
bimlpa.start()
relabeling(g)
top_coms, bottom_coms = output_community(g)

return BiNodeClustering(top_coms, bottom_coms, g, "BiMLPA", method_parameters={"theta": theta, "lambd": lambd})
44 changes: 42 additions & 2 deletions cdlib/algorithms/crisp_partition.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,15 @@
from cdlib.algorithms.internal.AGDL import Agdl
from cdlib.algorithms.internal.FuzzyCom import fuzzy_comm
from cdlib.algorithms.internal.Markov import markov
from karateclub import EdMot
import networkx as nx

from cdlib.utils import convert_graph_formats, __from_nx_to_graph_tool, affiliations2nodesets, nx_node_integer_mapping

__all__ = ["louvain", "leiden", "rb_pots", "rber_pots", "cpm", "significance_communities", "surprise_communities",
"greedy_modularity", "der", "label_propagation", "async_fluid", "infomap", "walktrap", "girvan_newman", "em",
"scan", "gdmp2", "spinglass", "eigenvector", "agdl", "frc_fgsn", "sbm_dl", "sbm_dl_nested",
"markov_clustering"]
"markov_clustering", "edmot"]


def girvan_newman(g, level):
Expand Down Expand Up @@ -977,7 +978,7 @@ def markov_clustering(g, max_loop=1000):
:param g: a networkx/igraph object
:param max_loop: maximum number of iterations, default 1000
:return: EdgeClustering object
:return: NodeClustering object
:Example:
Expand Down Expand Up @@ -1008,3 +1009,42 @@ def markov_clustering(g, max_loop=1000):
communities = [list(c) for c in communities]

return NodeClustering(communities, g, "Markov Clustering", method_parameters={"max_loop": max_loop})


def edmot(g, component_count=2, cutoff=10):
"""
The algorithm first creates the graph of higher order motifs. This graph is clustered by the Louvain method.
:param g: a networkx/igraph object
:param component_count: Number of extracted motif hypergraph components. Default is 2.
:param cutoff: Motif edge cut-off value. Default is 10.
:return: NodeClustering object
:Example:
>>> from cdlib import algorithms
>>> import networkx as nx
>>> G = nx.karate_club_graph()
>>> coms = algorithms.markov_clustering(G, max_loop=1000)
:References:
Li, Pei-Zhen, et al. "EdMot: An Edge Enhancement Approach for Motif-aware Community Detection." Proceedings of the 25th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining. 2019.
.. note:: Reference implementation: https://karateclub.readthedocs.io/
"""

g = convert_graph_formats(g, nx.Graph)
model = EdMot(component_count=2, cutoff=10)

model.fit(g)
members = model.get_memberships()

# Reshaping the results
coms_to_node = defaultdict(list)
for n, c in members.items():
coms_to_node[c].append(n)

coms = [list(c) for c in coms_to_node.values()]

return NodeClustering(coms, g, "EdMot", method_parameters={"component_count": component_count, "cutoff": cutoff})
2 changes: 1 addition & 1 deletion cdlib/algorithms/internal/CONGO.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
ig = None
import numpy as np
import operator
import logging
# import logging



Expand Down
207 changes: 194 additions & 13 deletions cdlib/algorithms/overlapping_partition.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from cdlib.algorithms.internal import OSSE
import networkx as nx
import numpy as np
from collections import defaultdict
from cdlib import NodeClustering
from cdlib.utils import suppress_stdout, convert_graph_formats, nx_node_integer_mapping
from cdlib.algorithms.internal.CONGO import Congo_
Expand All @@ -20,11 +21,12 @@
from cdlib.algorithms.internal import LEMON
from cdlib.algorithms.internal.SLPA_nx import slpa_nx
from cdlib.algorithms.internal.multicom import MultiCom
from cdlib.algorithms.internal import BIGCLAM
from karateclub import DANMF, EgoNetSplitter, NNSED, MNMF, BigClam


__all__ = ["ego_networks", "demon", "angel", "node_perception", "overlapping_seed_set_expansion", "kclique", "lfm",
"lais2", "congo", "conga", "lemon", "slpa", "multicom", "big_clam"]
"lais2", "congo", "conga", "lemon", "slpa", "multicom", "big_clam", "danmf", "egonet_splitter", "nnsed",
"nmnf"]


def ego_networks(g, level=1):
Expand Down Expand Up @@ -547,16 +549,15 @@ def multicom(g, seed_node):
return NodeClustering(communities, g, "Multicom", method_parameters={"seeds": seed_node}, overlap=True)


def big_clam(g, number_communities=5):
def big_clam(g, dimensions=8, iterations=50, learning_rate=0.005):
"""
BigClam is an overlapping community detection method that scales to large networks.
The model has three main ingredients:
1)The node community memberships are represented with a bipartite affiliation network that links nodes of the social network to communities that they belong to.
2)People tend to be involved in communities to various degrees. Therefore, each affiliation edge in the bipartite affiliation network has a nonnegative weight. The higher the node’s weight of the affiliation to the community the more likely is the node to be connected to other members in the community.
3)When people share multiple community affiliations, the links between them stem for one dominant reason. This means that for each community a pair of nodes shares we get an independent chance of connecting the nodes. Thus, naturally, the more communities a pair of nodes shares, the higher the probability of being connected.
The procedure uses gradient ascent to create an embedding which is used for deciding the node-cluster affiliations.
:param g: a networkx/igraph object
:param number_communities: number communities desired, default 5
:param dimensions: Number of embedding dimensions. Default 8.
:param iterations: Number of training iterations. Default 50.
:param learning_rate: Gradient ascent learning rate. Default is 0.005.
:return: NodeClustering object
Expand All @@ -565,17 +566,197 @@ def big_clam(g, number_communities=5):
>>> from cdlib import algorithms
>>> import networkx as nx
>>> G = nx.karate_club_graph()
>>> coms = algorithms.big_clam(G, 2)
>>> coms = algorithms.big_clam(G)
:References:
Yang, J., & Leskovec, J. (2013, February). `Overlapping community detection at scale: a nonnegative matrix factorization approach. <https://dl.acm.org/citation.cfm?id=2433471/>`_ In Proceedings of the sixth ACM international conference on Web search and data mining (pp. 587-596). ACM.
Yang, Jaewon, and Jure Leskovec. "Overlapping community detection at scale: a nonnegative matrix factorization approach." Proceedings of the sixth ACM international conference on Web search and data mining. 2013.
.. note:: Reference implementation: https://github.com/RobRomijnders/bigclam
.. note:: Reference implementation: https://karateclub.readthedocs.io/
"""

g = convert_graph_formats(g, nx.Graph)

communities = BIGCLAM.big_Clam(g, number_communities)
model = BigClam(dimensions=dimensions, iterations=iterations, learning_rate=learning_rate)
model.fit(g)
members = model.get_memberships()

# Reshaping the results
coms_to_node = defaultdict(list)
for n, c in members.items():
coms_to_node[c].append(n)

coms = [list(c) for c in coms_to_node.values()]

return NodeClustering(coms, g, "BigClam", method_parameters={"dimensions": dimensions, "iterations": iterations,
"learning_rate": learning_rate}, overlap=True)


def danmf(g, layers=(32, 8), pre_iterations=100, iterations=100, seed=42, lamb=0.01):
"""
The procedure uses telescopic non-negative matrix factorization in order to learn a cluster memmbership distribution over nodes. The method can be used in an overlapping and non-overlapping way.
:param g: a networkx/igraph object
:param layers: Autoencoder layer sizes in a list of integers. Default [32, 8].
:param pre_iterations: Number of pre-training epochs. Default 100.
:param iterations: Number of training epochs. Default 100.
:param seed: Random seed for weight initializations. Default 42.
:param lamb: Regularization parameter. Default 0.01.
:return: NodeClustering object
:Example:
>>> from cdlib import algorithms
>>> import networkx as nx
>>> G = nx.karate_club_graph()
>>> coms = algorithms.danmf(G)
:References:
Ye, Fanghua, Chuan Chen, and Zibin Zheng. "Deep autoencoder-like nonnegative matrix factorization for community detection." Proceedings of the 27th ACM International Conference on Information and Knowledge Management. 2018.
.. note:: Reference implementation: https://karateclub.readthedocs.io/
"""
g = convert_graph_formats(g, nx.Graph)
model = DANMF(layers, pre_iterations, iterations, seed, lamb)
model.fit(g)
members = model.get_memberships()

# Reshaping the results
coms_to_node = defaultdict(list)
for n, c in members.items():
coms_to_node[c].append(n)

coms = [list(c) for c in coms_to_node.values()]

return NodeClustering(coms, g, "DANMF", method_parameters={"layers": layers, "pre_iteration": pre_iterations,
"iterations": iterations, "seed": seed, "lamb": lamb},
overlap=True)


def egonet_splitter(g, resolution=1.0):
"""
The method first creates the egonets of nodes. A persona-graph is created which is clustered by the Louvain method.
:param g: a networkx/igraph object
:param resolution: Resolution parameter of Python Louvain. Default 1.0.
:return: NodeClustering object
:Example:
>>> from cdlib import algorithms
>>> import networkx as nx
>>> G = nx.karate_club_graph()
>>> coms = algorithms.egonet_splitter(G)
:References:
Epasto, Alessandro, Silvio Lattanzi, and Renato Paes Leme. "Ego-splitting framework: From non-overlapping to overlapping clusters." Proceedings of the 23rd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining. 2017.
.. note:: Reference implementation: https://karateclub.readthedocs.io/
"""
g = convert_graph_formats(g, nx.Graph)
model = EgoNetSplitter(resolution=resolution)
model.fit(g)
members = model.get_memberships()

# Reshaping the results
coms_to_node = defaultdict(list)
for n, cs in members.items():
for c in cs:
coms_to_node[c].append(n)

coms = [list(c) for c in coms_to_node.values()]

return NodeClustering(coms, g, "EgoNetSplitter", method_parameters={"resolution":resolution}, overlap=True)


def nnsed(g, dimensions=32, iterations=10, seed=42):
"""
The procedure uses non-negative matrix factorization in order to learn an unnormalized cluster membership distribution over nodes. The method can be used in an overlapping and non-overlapping way.
:param g: a networkx/igraph object
:param dimensions: Embedding layer size. Default is 32.
:param iterations: Number of training epochs. Default 10.
:param seed: Random seed for weight initializations. Default 42.
:return: NodeClustering object
:Example:
>>> from cdlib import algorithms
>>> import networkx as nx
>>> G = nx.karate_club_graph()
>>> coms = algorithms.nnsed(G)
:References:
Sun, Bing-Jie, et al. "A non-negative symmetric encoder-decoder approach for community detection." Proceedings of the 2017 ACM on Conference on Information and Knowledge Management. 2017.
.. note:: Reference implementation: https://karateclub.readthedocs.io/
"""
g = convert_graph_formats(g, nx.Graph)
model = NNSED(dimensions=dimensions,iterations=iterations, seed=seed)
model.fit(g)
members = model.get_memberships()

# Reshaping the results
coms_to_node = defaultdict(list)
for n, c in members.items():
coms_to_node[c].append(n)

coms = [list(c) for c in coms_to_node.values()]

return NodeClustering(coms, g, "NNSED", method_parameters={"dimension": dimensions, "iterations": iterations,
"seed": seed}, overlap=True)


def nmnf(g, dimensions=128, clusters=10, lambd=0.2, alpha=0.05, beta=0.05, iterations=200, lower_control=1e-15, eta=5.0):
"""
The procedure uses joint non-negative matrix factorization with modularity based regul;arization in order to learn a cluster memmbership distribution over nodes. The method can be used in an overlapping and non-overlapping way.
:param g: a networkx/igraph object
:param dimensions: Number of dimensions. Default is 128.
:param clusters: Number of clusters. Default is 10.
:param lambd: KKT penalty. Default is 0.2
:param alpha: Clustering penalty. Default is 0.05.
:param beta: Modularity regularization penalty. Default is 0.05.
:param iterations: Number of power iterations. Default is 200.
:param lower_control: Floating point overflow control. Default is 10**-15.
:param eta: Similarity mixing parameter. Default is 5.0.
:return: NodeClustering object
:Example:
>>> from cdlib import algorithms
>>> import networkx as nx
>>> G = nx.karate_club_graph()
>>> coms = algorithms.nmnf(G)
:References:
Wang, Xiao, et al. "Community preserving network embedding." Thirty-first AAAI conference on artificial intelligence. 2017.
.. note:: Reference implementation: https://karateclub.readthedocs.io/
"""
g = convert_graph_formats(g, nx.Graph)
model = MNMF(dimensions=dimensions, clusters=clusters, lambd=lambd, alpha=alpha, beta=beta, iterations=iterations,
lower_control=lower_control, eta=eta)
model.fit(g)
members = model.get_memberships()

# Reshaping the results
coms_to_node = defaultdict(list)
for n, c in members.items():
coms_to_node[c].append(n)

coms = [list(c) for c in coms_to_node.values()]

return NodeClustering(coms, g, "MNMF", method_parameters={"dimension": dimensions, "clusters": clusters,
"lambd": lambd, "alpha": alpha, "beta": beta,
"iterations": iterations, "lower_control": lower_control,
"eta": eta}, overlap=True)

return NodeClustering(communities, g, "BigClam", method_parameters={"number_communities": number_communities}, overlap=True)
Loading

0 comments on commit 1a54ecd

Please sign in to comment.