Skip to content

Commit

Permalink
Revert "Possible bug in starting_communities (#26)"
Browse files Browse the repository at this point in the history
This reverts commit 0f42264.
  • Loading branch information
daxpryce committed Nov 16, 2021
1 parent ada8134 commit 4d85c73
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 74 deletions.
46 changes: 1 addition & 45 deletions packages/network_partitions/src/leiden/leiden.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.

use std::collections::{HashMap, HashSet};
use std::iter;

use rand::Rng;

use crate::clustering::{ClusterItem, Clustering};
use crate::clustering::Clustering;
use crate::errors::CoreError;
use crate::log;
use crate::network::prelude::*;
Expand Down Expand Up @@ -82,13 +81,6 @@ where

let mut clustering: Clustering =
clustering.unwrap_or(Clustering::as_self_clusters(network.num_nodes().clone()));

guarantee_clustering_sanity(network, &mut clustering)?;

// verify initial clustering provided is in a sane state for leiden to operate
// any node in a cluster must either be a singleton in that cluster or be connected to at least
// one other node in that cluster

let mut improved: bool = false;

log!(
Expand Down Expand Up @@ -238,42 +230,6 @@ fn initial_clustering_for_induced(
return Clustering::as_defined(clusters_induced_network, next_cluster_id);
}

fn guarantee_clustering_sanity(
network: &CompactNetwork,
clustering: &mut Clustering,
) -> Result<(), CoreError> {
let mut node_neighbors: HashMap<CompactNodeId, HashSet<CompactNodeId>> = HashMap::new();
for CompactNodeItem { id: node, .. } in network.into_iter() {
let mut neighbors: HashSet<CompactNodeId> = HashSet::new();
for neighbor in network.neighbors_for(node) {
neighbors.insert(neighbor.id);
}
node_neighbors.insert(node, neighbors);
}
let mut cluster_membership: HashMap<ClusterId, HashSet<CompactNodeId>> = HashMap::new();
for ClusterItem { node_id, cluster } in clustering.into_iter() {
let cluster_members: &mut HashSet<CompactNodeId> =
cluster_membership.entry(cluster).or_insert(HashSet::new());
cluster_members.insert(node_id);
}

for (_cluster, cluster_members) in &cluster_membership {
if cluster_members.len() > 1 {
// we are only trying to move non-singletons if they don't have a possible connection
for cluster_member in cluster_members {
let neighbors = node_neighbors.get(cluster_member).unwrap();
if neighbors.is_disjoint(cluster_members) {
// we have no reason to be in this partition, because we have no links to anyone
// else in it. we should make our own partition, with ___ and ___.
let new_cluster: ClusterId = clustering.next_cluster_id();
clustering.update_cluster_at(*cluster_member, new_cluster)?;
}
}
}
}
return Ok(());
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down
6 changes: 3 additions & 3 deletions packages/pyo3/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
[package]
name = "graspologic_native"
version = "1.1.0"
version = "1.0.1"
authors = ["dwpryce@microsoft.com"]
edition = "2018"
license = "MIT"
description = "Python native companion module to the graspologic library"
readme = "README.md"

[package.metadata.maturin]
maintainer = "Dax Pryce"
maintainer-email = "daxpryce@microsoft.com"
maintainer = "Dwayne Pryce"
maintainer-email = "dwpryce@microsoft.com"
requires-python = ">=3.6,<3.11"
project-url = {"Github" = "https://github.com/microsoft/graspologic-native", "Graspologic"="https://github.com/microsoft/graspologic"}
classifier = ["Development Status :: 3 - Alpha", "License :: OSI Approved :: MIT License", "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Topic :: Scientific/Engineering :: Mathematics"]
Expand Down
6 changes: 6 additions & 0 deletions packages/pyo3/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,9 @@ impl PyObjectProtocol for HierarchicalCluster {
/// :rtype: Dict[str, int]
/// :raises ClusterIndexingError:
/// :raises EmptyNetworkError:
/// :raises InvalidCommunityMappingError: The starting communities dictionary either did not include
/// a community for each node in the edge list or the edge list did not contain a node present
/// in the starting community mapping.
/// :raises InternalNetworkIndexingError: An internal algorithm error. Please report with reproduction steps.
/// :raises ParameterRangeError: One of the parameters provided did not meet the requirements in the documentation.
/// :raises UnsafeInducementError: An internal algorithm error. Please report with reproduction steps.
Expand Down Expand Up @@ -194,6 +197,9 @@ fn leiden(
/// :rtype: List[HierarchicalCluster]
/// :raises ClusterIndexingError:
/// :raises EmptyNetworkError:
/// :raises InvalidCommunityMappingError: The starting communities dictionary either did not include
/// a community for each node in the edge list or the edge list did not contain a node present
/// in the starting community mapping.
/// :raises InternalNetworkIndexingError: An internal algorithm error. Please report with reproduction steps.
/// :raises ParameterRangeError: One of the parameters provided did not meet the requirements in the documentation.
/// :raises UnsafeInducementError: An internal algorithm error. Please report with reproduction steps.
Expand Down
45 changes: 19 additions & 26 deletions packages/pyo3/src/mediator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -188,36 +188,29 @@ fn communities_to_clustering(
network: &LabeledNetwork<String>,
communities: HashMap<String, usize>,
) -> Result<Clustering, PyLeidenError> {
// best effort for using a provided community mapping and keeping generated singleton ids
// otherwise

let mut clustering: Clustering = match communities.values().max() {
Some(max_community_id) => {
// if we have values, we have the max community id, which means we'll actually create
// a cluster mapping where every node gets a singleton cluster starting after the actual
// last max cluster.
let mut cluster_mapping: Vec<usize> = Vec::with_capacity(network.num_nodes());
let start: usize = max_community_id + 1;
let end: usize = start + network.num_nodes();
cluster_mapping.extend(start..end);
Clustering::as_defined(cluster_mapping, end)
}
None => Clustering::as_self_clusters(network.num_nodes()),
};
// if node not found in internal network, bounce
// if max(communities) > len(set(communities)), collapse result
// if all nodes are mapped, cool
// if not all nodes are mapped, generate integers for each new value

let mut clustering: Clustering = Clustering::as_self_clusters(network.num_nodes());

let mut all_communities: HashSet<usize> = HashSet::new();

for (node, community) in communities {
let mapping: Option<CompactNodeId> = network.compact_id_for(node);
if mapping.is_some() {
let compact_node_id: CompactNodeId = mapping.unwrap();
clustering
.update_cluster_at(compact_node_id, community)
.map_err(|_| PyLeidenError::ClusterIndexingError)?;
}
all_communities.insert(community);
let mapped_node: CompactNodeId = network
.compact_id_for(node)
.ok_or(PyLeidenError::InvalidCommunityMappingError)?;
clustering
.update_cluster_at(mapped_node, community)
.map_err(|_| PyLeidenError::InvalidCommunityMappingError)?;
}

// we can't easily know if there are empty clusters, so we'll just presume there are
// and compress any gaps
clustering.remove_empty_clusters();
if clustering.next_cluster_id() != all_communities.len() {
// we have some gaps, compress
clustering.remove_empty_clusters();
}

return Ok(clustering);
}

0 comments on commit 4d85c73

Please sign in to comment.