Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat:exact topology matching and approximate feature matching #2

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 37 additions & 61 deletions matcher/vf2.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import sys
import networkx as nx
import numpy as np
from typing import Callable, Dict
from torch import Tensor, nn
from torch_geometric import data
from torch_geometric.utils import to_networkx

__all__ = ["GraphMatcher"]

Expand All @@ -26,11 +27,9 @@ class GraphMatcher:
"""

def __init__(self,
G1: nx.Graph, G2: nx.Graph,
e1: Dict[int, np.ndarray[np.double]], e2: [int, np.ndarray[np.double]],
comparator: Callable[[np.ndarray[np.double], np.ndarray[np.double]], np.double],
error_bound: float, phantom_degree_bound: int,
# TODO the constraint on phantom degrees should be based on the density (or arboricity) of the graph
G1: data.Data, G2: data.Data,
e1: Tensor, e2: Tensor, fv1: Tensor, fv2: Tensor,
feat_error_bound: float, embedding_error_bound: float,
) -> None:
"""Initialize GraphMatcher.

Expand All @@ -48,16 +47,18 @@ def __init__(self,
>>> G2 = nx.path_graph(4)
>>> GM = isomorphism.GraphMatcher(G1, G2)
"""
self.G1 = G1
self.G2 = G2
self.G1_nodes = set(G1.nodes())
self.G2_nodes = set(G2.nodes())
self.G2_node_order = {n: i for i, n in enumerate(G2)}
self.G1 = to_networkx(data=G1, to_undirected=True)
self.G2 = to_networkx(data=G2, to_undirected=True)
self.G1_nodes = set(self.G1.nodes())
self.G2_nodes = set(self.G2.nodes())
self.G2_node_order = {n: i for i, n in enumerate(self.G2)}
self.e1 = e1
self.e2 = e2
self.comparator = comparator
self.error_bound = error_bound
self.phantom_degree_bound = phantom_degree_bound
self.fv1 = fv1
self.fv2 = fv2
self.cosf = nn.CosineSimilarity(dim=0)
self.feat_error_bound = feat_error_bound
self.embedding_error_bound = embedding_error_bound

# Set recursion limit.
self.old_recursion_limit = sys.getrecursionlimit()
Expand Down Expand Up @@ -105,7 +106,7 @@ def candidate_pairs_iter(self):
# checkme: process the valid node pairs with the closest embedding first
ordered_T1_inout = sorted(
T1_inout.copy(),
key=lambda u: self.comparator(self.e1[u], self.e2[node_2])
key=lambda u: -self.cosf(self.e1[u], self.e2[node_2])
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This could be improved with a top K search?

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes

)

for node_1 in ordered_T1_inout:
Expand All @@ -122,7 +123,7 @@ def candidate_pairs_iter(self):
# checkme: process the valid node pairs with the closest embedding first
ordered_nodes = sorted(
[node for node in self.G1 if node not in self.core_1],
key=lambda node: self.comparator(self.e1[node], self.e2[other_node])
key=lambda node: -self.cosf(self.e1[node], self.e2[other_node])
)

for node in ordered_nodes:
Expand Down Expand Up @@ -156,10 +157,6 @@ def initialize(self):
self.inout_2 = {}
# Practically, these sets simply store the nodes in the subgraph.

# Used to backtrack cost
self.cost_map = {}
self.total_cost = 0

self.state = GMState(self)

# Provide a convenient way to access the isomorphism mapping.
Expand Down Expand Up @@ -228,7 +225,9 @@ def semantic_feasibility(self, G1_node, G2_node):
the above form to keep the match() method functional. Implementations
should consider multigraphs.
"""
return True

# features should be similar enough for the corresponding nodes
return self.cosf(self.fv1[G1_node], self.fv2[G2_node]) > 1 - self.feat_error_bound

def subgraph_is_isomorphic(self):
"""Returns True if a subgraph of G1 is isomorphic to G2."""
Expand Down Expand Up @@ -283,9 +282,11 @@ def syntactic_feasibility(self, G1_node, G2_node):
if self.G1.number_of_edges(G1_node, G1_node) != self.G2.number_of_edges(
G2_node, G2_node
):
cost = abs(self.G1.number_of_edges(G1_node, G1_node) - self.G2.number_of_edges(G2_node, G2_node))
if not self.update_cost(cost):
return False
return False

if self.cosf(self.e1[G1_node], self.e2[G2_node]) > 1 - self.embedding_error_bound:
# if two embeddings are similar enough, we assume there is a good much
return True

# R_neighbor

Expand All @@ -294,22 +295,20 @@ def syntactic_feasibility(self, G1_node, G2_node):
# edges must be equal.
for neighbor in self.G1[G1_node]:
if neighbor in self.core_1:
if self.core_1[neighbor] not in self.G2[G2_node]: # test on mapped nodes
cost = self.G1.number_of_edges(neighbor, G1_node)
else:
cost = abs(self.G1.number_of_edges(neighbor, G1_node) - self.G2.number_of_edges(self.core_1[neighbor], G2_node))

if not self.update_cost(cost):
if self.core_1[neighbor] not in self.G2[G2_node]:
return False
elif self.G1.number_of_edges(
neighbor, G1_node
) != self.G2.number_of_edges(self.core_1[neighbor], G2_node):
return False

for neighbor in self.G2[G2_node]:
if neighbor in self.core_2:
if self.core_2[neighbor] not in self.G1[G1_node]:
cost = self.G2.number_of_edges(neighbor, G2_node)
else:
cost = abs(self.G1.number_of_edges(self.core_2[neighbor], G1_node) - self.G2.number_of_edges(neighbor, G2_node))

if not self.update_cost(cost):
return False
elif self.G1.number_of_edges(
self.core_2[neighbor], G1_node
) != self.G2.number_of_edges(neighbor, G2_node):
return False

# Look ahead 1
Expand All @@ -326,11 +325,7 @@ def syntactic_feasibility(self, G1_node, G2_node):
if (neighbor in self.inout_2) and (neighbor not in self.core_2):
num2 += 1

# checkme: the look-aheads are essentially degree-based pruning and I feel we should handle them separately
# - to avoid double counting
# - enable us to short-circuit it more aggressively
# (the assumption here is that a good approximation is unlikely to have much more extra edges)
if not (num1 >= num2 + self.phantom_degree_bound):
if not (num1 >= num2):
return False

# Look ahead 2
Expand All @@ -349,24 +344,12 @@ def syntactic_feasibility(self, G1_node, G2_node):
if neighbor not in self.inout_2:
num2 += 1

if not (num1 >= num2 + self.phantom_degree_bound):
if not (num1 >= num2):
return False

# Otherwise, this node pair is syntactically feasible!
return True

def update_cost(self, cost: int) -> bool: # checkme: heuristic on staged cost
if cost == 0:
return True

self.total_cost += cost
self.cost_map[self.state.depth] += cost

# more tolerant in the first layers
return \
self.G2.number_of_nodes() ** 2 / (self.state.depth ** 2 + (self.state.depth != self.G2.number_of_nodes())) \
< cost / self.G2.number_of_edges() * self.error_bound


class GMState:
"""Internal representation of state for the GraphMatcher class.
Expand Down Expand Up @@ -397,8 +380,6 @@ def __init__(self, GM: GraphMatcher, G1_node=None, G2_node=None):
GM.core_2 = {}
GM.inout_1 = {}
GM.inout_2 = {}
GM.cost_map = {}
GM.total_cost = 0

# Watch out! G1_node == 0 should evaluate to True.
if G1_node is not None and G2_node is not None:
Expand All @@ -413,7 +394,6 @@ def __init__(self, GM: GraphMatcher, G1_node=None, G2_node=None):
# Now we must update the other two vectors.
# We will add only if it is not in there already!
self.depth = len(GM.core_1)
GM.cost_map[self.depth] = 0

# First we add the new nodes...
if G1_node not in GM.inout_1:
Expand Down Expand Up @@ -451,10 +431,6 @@ def restore(self):
del self.GM.core_1[self.G1_node]
del self.GM.core_2[self.G2_node]

# revert the cost in this level
self.GM.total_cost -= self.GM.cost_map[self.depth]
del self.GM.cost_map[self.depth]

# Now we revert the other two vectors.
# Thus, we delete all entries which have this depth level.
for vector in (self.GM.inout_1, self.GM.inout_2):
Expand Down