From 4d85c7392a89b74337888774338882500da3e76a Mon Sep 17 00:00:00 2001 From: Dax Pryce Date: Tue, 16 Nov 2021 12:37:20 -0800 Subject: [PATCH] Revert "Possible bug in starting_communities (#26)" This reverts commit 0f422645fc6cd2a4b39a72cd22c8a31da377cc31. --- .../network_partitions/src/leiden/leiden.rs | 46 +------------------ packages/pyo3/Cargo.toml | 6 +-- packages/pyo3/src/lib.rs | 6 +++ packages/pyo3/src/mediator.rs | 45 ++++++++---------- 4 files changed, 29 insertions(+), 74 deletions(-) diff --git a/packages/network_partitions/src/leiden/leiden.rs b/packages/network_partitions/src/leiden/leiden.rs index 276be22..31cef05 100644 --- a/packages/network_partitions/src/leiden/leiden.rs +++ b/packages/network_partitions/src/leiden/leiden.rs @@ -1,12 +1,11 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT license. -use std::collections::{HashMap, HashSet}; use std::iter; use rand::Rng; -use crate::clustering::{ClusterItem, Clustering}; +use crate::clustering::Clustering; use crate::errors::CoreError; use crate::log; use crate::network::prelude::*; @@ -82,13 +81,6 @@ where let mut clustering: Clustering = clustering.unwrap_or(Clustering::as_self_clusters(network.num_nodes().clone())); - - guarantee_clustering_sanity(network, &mut clustering)?; - - // verify initial clustering provided is in a sane state for leiden to operate - // any node in a cluster must either be a singleton in that cluster or be connected to at least - // one other node in that cluster - let mut improved: bool = false; log!( @@ -238,42 +230,6 @@ fn initial_clustering_for_induced( return Clustering::as_defined(clusters_induced_network, next_cluster_id); } -fn guarantee_clustering_sanity( - network: &CompactNetwork, - clustering: &mut Clustering, -) -> Result<(), CoreError> { - let mut node_neighbors: HashMap> = HashMap::new(); - for CompactNodeItem { id: node, .. } in network.into_iter() { - let mut neighbors: HashSet = HashSet::new(); - for neighbor in network.neighbors_for(node) { - neighbors.insert(neighbor.id); - } - node_neighbors.insert(node, neighbors); - } - let mut cluster_membership: HashMap> = HashMap::new(); - for ClusterItem { node_id, cluster } in clustering.into_iter() { - let cluster_members: &mut HashSet = - cluster_membership.entry(cluster).or_insert(HashSet::new()); - cluster_members.insert(node_id); - } - - for (_cluster, cluster_members) in &cluster_membership { - if cluster_members.len() > 1 { - // we are only trying to move non-singletons if they don't have a possible connection - for cluster_member in cluster_members { - let neighbors = node_neighbors.get(cluster_member).unwrap(); - if neighbors.is_disjoint(cluster_members) { - // we have no reason to be in this partition, because we have no links to anyone - // else in it. we should make our own partition, with ___ and ___. - let new_cluster: ClusterId = clustering.next_cluster_id(); - clustering.update_cluster_at(*cluster_member, new_cluster)?; - } - } - } - } - return Ok(()); -} - #[cfg(test)] mod tests { use super::*; diff --git a/packages/pyo3/Cargo.toml b/packages/pyo3/Cargo.toml index 91590ed..bd009d0 100644 --- a/packages/pyo3/Cargo.toml +++ b/packages/pyo3/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "graspologic_native" -version = "1.1.0" +version = "1.0.1" authors = ["dwpryce@microsoft.com"] edition = "2018" license = "MIT" @@ -8,8 +8,8 @@ description = "Python native companion module to the graspologic library" readme = "README.md" [package.metadata.maturin] -maintainer = "Dax Pryce" -maintainer-email = "daxpryce@microsoft.com" +maintainer = "Dwayne Pryce" +maintainer-email = "dwpryce@microsoft.com" requires-python = ">=3.6,<3.11" project-url = {"Github" = "https://github.com/microsoft/graspologic-native", "Graspologic"="https://github.com/microsoft/graspologic"} classifier = ["Development Status :: 3 - Alpha", "License :: OSI Approved :: MIT License", "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Topic :: Scientific/Engineering :: Mathematics"] diff --git a/packages/pyo3/src/lib.rs b/packages/pyo3/src/lib.rs index f568ab3..9de6499 100644 --- a/packages/pyo3/src/lib.rs +++ b/packages/pyo3/src/lib.rs @@ -102,6 +102,9 @@ impl PyObjectProtocol for HierarchicalCluster { /// :rtype: Dict[str, int] /// :raises ClusterIndexingError: /// :raises EmptyNetworkError: +/// :raises InvalidCommunityMappingError: The starting communities dictionary either did not include +/// a community for each node in the edge list or the edge list did not contain a node present +/// in the starting community mapping. /// :raises InternalNetworkIndexingError: An internal algorithm error. Please report with reproduction steps. /// :raises ParameterRangeError: One of the parameters provided did not meet the requirements in the documentation. /// :raises UnsafeInducementError: An internal algorithm error. Please report with reproduction steps. @@ -194,6 +197,9 @@ fn leiden( /// :rtype: List[HierarchicalCluster] /// :raises ClusterIndexingError: /// :raises EmptyNetworkError: +/// :raises InvalidCommunityMappingError: The starting communities dictionary either did not include +/// a community for each node in the edge list or the edge list did not contain a node present +/// in the starting community mapping. /// :raises InternalNetworkIndexingError: An internal algorithm error. Please report with reproduction steps. /// :raises ParameterRangeError: One of the parameters provided did not meet the requirements in the documentation. /// :raises UnsafeInducementError: An internal algorithm error. Please report with reproduction steps. diff --git a/packages/pyo3/src/mediator.rs b/packages/pyo3/src/mediator.rs index c22b76d..2c03aa2 100644 --- a/packages/pyo3/src/mediator.rs +++ b/packages/pyo3/src/mediator.rs @@ -188,36 +188,29 @@ fn communities_to_clustering( network: &LabeledNetwork, communities: HashMap, ) -> Result { - // best effort for using a provided community mapping and keeping generated singleton ids - // otherwise - - let mut clustering: Clustering = match communities.values().max() { - Some(max_community_id) => { - // if we have values, we have the max community id, which means we'll actually create - // a cluster mapping where every node gets a singleton cluster starting after the actual - // last max cluster. - let mut cluster_mapping: Vec = Vec::with_capacity(network.num_nodes()); - let start: usize = max_community_id + 1; - let end: usize = start + network.num_nodes(); - cluster_mapping.extend(start..end); - Clustering::as_defined(cluster_mapping, end) - } - None => Clustering::as_self_clusters(network.num_nodes()), - }; + // if node not found in internal network, bounce + // if max(communities) > len(set(communities)), collapse result + // if all nodes are mapped, cool + // if not all nodes are mapped, generate integers for each new value + + let mut clustering: Clustering = Clustering::as_self_clusters(network.num_nodes()); + + let mut all_communities: HashSet = HashSet::new(); for (node, community) in communities { - let mapping: Option = network.compact_id_for(node); - if mapping.is_some() { - let compact_node_id: CompactNodeId = mapping.unwrap(); - clustering - .update_cluster_at(compact_node_id, community) - .map_err(|_| PyLeidenError::ClusterIndexingError)?; - } + all_communities.insert(community); + let mapped_node: CompactNodeId = network + .compact_id_for(node) + .ok_or(PyLeidenError::InvalidCommunityMappingError)?; + clustering + .update_cluster_at(mapped_node, community) + .map_err(|_| PyLeidenError::InvalidCommunityMappingError)?; } - // we can't easily know if there are empty clusters, so we'll just presume there are - // and compress any gaps - clustering.remove_empty_clusters(); + if clustering.next_cluster_id() != all_communities.len() { + // we have some gaps, compress + clustering.remove_empty_clusters(); + } return Ok(clustering); }