Skip to content

Commit

Permalink
fix #29 fix #13 fix #15 improve guide with cache and diff hid dim
Browse files Browse the repository at this point in the history
  • Loading branch information
kayzliu committed Apr 24, 2022
1 parent a94563f commit d180edc
Showing 1 changed file with 125 additions and 95 deletions.
220 changes: 125 additions & 95 deletions pygod/models/guide.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@
# Author: Kay Liu <zliu234@uic.edu>
# License: BSD 2 clause

import os
import torch
import hashlib
import numpy as np
import networkx as nx
import torch.nn as nn
Expand Down Expand Up @@ -42,8 +44,10 @@ class GUIDE(BaseDetector):
Parameters
----------
hid_dim : int, optional
Hidden dimension of model. Default: ``0``.
a_hid : int, optional
Hidden dimension for attribute autoencoder. Default: ``32``.
s_hid : int, optional
Hidden dimension for structure autoencoder. Default: ``4``.
num_layers : int, optional
Total number of layers in autoencoders. Default: ``4``.
dropout : float, optional
Expand Down Expand Up @@ -77,6 +81,9 @@ class GUIDE(BaseDetector):
selected_motif : bool
Use selected motifs which are defined in the original paper.
Default: ``True``.
cache_dir : str
The directory for the node motif degree caching.
Default: ``None``.
verbose : bool
Verbosity mode. Turn on to print out log information.
Default: ``False``.
Expand All @@ -90,7 +97,8 @@ class GUIDE(BaseDetector):
"""

def __init__(self,
hid_dim=64,
a_hid=32,
s_hid=4,
num_layers=4,
dropout=0.3,
weight_decay=0.,
Expand All @@ -104,11 +112,13 @@ def __init__(self,
num_neigh=-1,
graphlet_size=4,
selected_motif=True,
cache_dir=None,
verbose=False):
super(GUIDE, self).__init__(contamination=contamination)

# model param
self.hid_dim = hid_dim
self.a_hid = a_hid
self.s_hid = s_hid
self.num_layers = num_layers
self.dropout = dropout
self.weight_decay = weight_decay
Expand All @@ -131,6 +141,10 @@ def __init__(self,
self.verbose = verbose
self.model = None

if cache_dir is None:
cache_dir = os.path.join(os.path.expanduser('~'), '.pygod')
self.cache_dir = cache_dir

def fit(self, G, y_true=None):
"""
Description
Expand All @@ -152,17 +166,17 @@ def fit(self, G, y_true=None):
Fitted estimator.
"""
G.node_idx = torch.arange(G.x.shape[0])
G.s = self.calc_nmf(G)
print(G.s)
G.s = self.get_nmf(G, self.cache_dir)
if self.batch_size == 0:
self.batch_size = G.x.shape[0]
loader = NeighborLoader(G,
[self.num_neigh] * self.num_layers,
batch_size=self.batch_size)

self.model = GUIDE_Base(x_dim=G.x.shape[1],
self.model = GUIDE_Base(a_dim=G.x.shape[1],
s_dim=G.s.shape[1],
hid_dim=self.hid_dim,
a_hid=self.a_hid,
s_hid=self.s_hid,
num_layers=self.num_layers,
dropout=self.dropout,
act=self.act).to(self.device)
Expand Down Expand Up @@ -225,7 +239,7 @@ def decision_function(self, G):
"""
check_is_fitted(self, ['model'])
G.node_idx = torch.arange(G.x.shape[0])
G.s = self.calc_nmf(G)
G.s = self.get_nmf(G, self.cache_dir)
loader = NeighborLoader(G,
[self.num_neigh] * self.num_layers,
batch_size=self.batch_size)
Expand Down Expand Up @@ -289,7 +303,7 @@ def loss_func(self, x, x_, s, s_):
structure_errors
return score

def calc_nmf(self, G):
def get_nmf(self, G, cache_dir):
"""
Description
-----------
Expand All @@ -301,116 +315,132 @@ def calc_nmf(self, G):
----------
G : PyTorch Geometric Data instance (torch_geometric.data.Data)
The input data.
cache_dir : str
The directory for the node motif degree caching
Returns
-------
s : torch.Tensor
Structure matrix (node motif degree/graphlet degree)
"""
edge_index = G.edge_index
g = nx.from_edgelist(edge_index.T.tolist())

# create edge subsets
edge_subsets = dict()
subsets = [[edge[0], edge[1]] for edge in g.edges()]
edge_subsets[2] = subsets
unique_subsets = dict()
for i in range(3, self.graphlet_size + 1):
for subset in subsets:
for node in subset:
for neb in g.neighbors(node):
new_subset = subset + [neb]
if len(set(new_subset)) == i:
new_subset.sort()
unique_subsets[tuple(new_subset)] = 1
subsets = [list(k) for k, v in unique_subsets.items()]
edge_subsets[i] = subsets
unique_subsets = dict()
hash_func = hashlib.sha1()
hash_func.update(str(G).encode('utf-8'))
file_name = 'nmd_' + str(hash_func.hexdigest()[:8]) + \
str(self.graphlet_size) + \
str(self.selected_motif)[0] + '.pt'
file_path = os.path.join(cache_dir, file_name)
if os.path.exists(file_path):
s = torch.load(file_path)
else:
edge_index = G.edge_index
g = nx.from_edgelist(edge_index.T.tolist())

# enumerate graphs
graphs = graph_atlas_g()
interesting_graphs = {i: [] for i in
range(2, self.graphlet_size + 1)}
for graph in graphs:
if 1 < graph.number_of_nodes() < self.graphlet_size + 1:
if nx.is_connected(graph):
interesting_graphs[graph.number_of_nodes()].append(
graph)

# enumerate categories
main_index = 0
categories = dict()
for size, graphs in interesting_graphs.items():
categories[size] = dict()
for index, graph in enumerate(graphs):
categories[size][index] = dict()
degrees = list(
set([graph.degree(node) for node in graph.nodes()]))
for degree in degrees:
categories[size][index][degree] = main_index
main_index += 1
unique_motif_count = main_index

# setup feature
features = {node: {i: 0 for i in range(unique_motif_count)}
for node in g.nodes()}
for size, node_lists in edge_subsets.items():
graphs = interesting_graphs[size]
for nodes in node_lists:
sub_gr = g.subgraph(nodes)
# create edge subsets
edge_subsets = dict()
subsets = [[edge[0], edge[1]] for edge in g.edges()]
edge_subsets[2] = subsets
unique_subsets = dict()
for i in range(3, self.graphlet_size + 1):
for subset in subsets:
for node in subset:
for neb in g.neighbors(node):
new_subset = subset + [neb]
if len(set(new_subset)) == i:
new_subset.sort()
unique_subsets[tuple(new_subset)] = 1
subsets = [list(k) for k, v in unique_subsets.items()]
edge_subsets[i] = subsets
unique_subsets = dict()

# enumerate graphs
graphs = graph_atlas_g()
interesting_graphs = {i: [] for i in
range(2, self.graphlet_size + 1)}
for graph in graphs:
if 1 < graph.number_of_nodes() < self.graphlet_size + 1:
if nx.is_connected(graph):
interesting_graphs[graph.number_of_nodes()].append(
graph)

# enumerate categories
main_index = 0
categories = dict()
for size, graphs in interesting_graphs.items():
categories[size] = dict()
for index, graph in enumerate(graphs):
if nx.is_isomorphic(sub_gr, graph):
for node in sub_gr.nodes():
features[node][categories[size][index][
sub_gr.degree(node)]] += 1
break

motifs = [[n] + [features[n][i] for i in range(
unique_motif_count)] for n in g.nodes()]
motifs = torch.Tensor(motifs)
motifs = motifs[torch.sort(motifs[:, 0]).indices, 1:]

if self.selected_motif:
# use motif selected in the original paper only
s = torch.zeros((G.x.shape[0], 6))
# m31
s[:, 0] = motifs[:, 3]
# m32
s[:, 1] = motifs[:, 1] + motifs[:, 2]
# m41
s[:, 2] = motifs[:, 14]
# m42
s[:, 3] = motifs[:, 12] + motifs[:, 13]
# m43
s[:, 4] = motifs[:, 11]
# node degree
s[:, 5] = motifs[:, 0]
else:
# use graphlet degree
s = motifs
categories[size][index] = dict()
degrees = list(
set([graph.degree(node) for node in graph.nodes()]))
for degree in degrees:
categories[size][index][degree] = main_index
main_index += 1
unique_motif_count = main_index

# setup feature
features = {node: {i: 0 for i in range(unique_motif_count)}
for node in g.nodes()}
for size, node_lists in edge_subsets.items():
graphs = interesting_graphs[size]
for nodes in node_lists:
sub_gr = g.subgraph(nodes)
for index, graph in enumerate(graphs):
if nx.is_isomorphic(sub_gr, graph):
for node in sub_gr.nodes():
features[node][categories[size][index][
sub_gr.degree(node)]] += 1
break

motifs = [[n] + [features[n][i] for i in range(
unique_motif_count)] for n in g.nodes()]
motifs = torch.Tensor(motifs)
motifs = motifs[torch.sort(motifs[:, 0]).indices, 1:]

if self.selected_motif:
# use motif selected in the original paper only
s = torch.zeros((G.x.shape[0], 6))
# m31
s[:, 0] = motifs[:, 3]
# m32
s[:, 1] = motifs[:, 1] + motifs[:, 2]
# m41
s[:, 2] = motifs[:, 14]
# m42
s[:, 3] = motifs[:, 12] + motifs[:, 13]
# m43
s[:, 4] = motifs[:, 11]
# node degree
s[:, 5] = motifs[:, 0]
else:
# use graphlet degree
s = motifs

if not os.path.exists(cache_dir):
os.makedirs(cache_dir)
torch.save(s, file_path)

return s


class GUIDE_Base(nn.Module):
def __init__(self,
x_dim,
a_dim,
s_dim,
hid_dim,
a_hid,
s_hid,
num_layers,
dropout,
act):
super(GUIDE_Base, self).__init__()

self.attr_ae = GCN(in_channels=x_dim,
hidden_channels=hid_dim,
self.attr_ae = GCN(in_channels=a_dim,
hidden_channels=a_hid,
num_layers=num_layers,
out_channels=x_dim,
out_channels=a_dim,
dropout=dropout,
act=act)

self.struct_ae = GNA(in_channels=s_dim,
hidden_channels=hid_dim,
hidden_channels=s_hid,
num_layers=num_layers,
out_channels=s_dim,
dropout=dropout,
Expand Down

0 comments on commit d180edc

Please sign in to comment.