forked from FAME-code/FAME
-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathfastrp.py
136 lines (111 loc) · 4.67 KB
/
fastrp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import csv
import itertools
import math
import time
import logging
import sys
import os
import random
import warnings
import pandas as pd
import numpy as np
import scipy
import optuna
import sklearn.preprocessing as pp
from tqdm import tqdm_notebook as tqdm
from collections import Counter, defaultdict
from pathlib import Path
from sklearn import random_projection
from sklearn.preprocessing import normalize, scale, MultiLabelBinarizer
from scipy.sparse import coo_matrix, csr_matrix, csc_matrix, diags, spdiags, vstack, hstack
# projection method: choose from Gaussian and Sparse
# input matrix: choose from adjacency and transition matrix
# alpha adjusts the weighting of nodes according to their degree
def adj_matrix_weight_merge(A, adj_weight):
N = A[0][0].shape[0]
temp = csr_matrix((N,N))
for i in range(len(adj_weight)):
try:
temp = temp + adj_weight[i]*A[i][0].tocsr()
# temp = temp + adj_weight[i]*(A[i][0]+csc_matrix(np.eye(N)))
except:
temp = temp + adj_weight[i]*A[0][i].tocsr()
# temp = temp + adj_weight[i]*(A[0][i]+csc_matrix(np.eye(N)))
return temp+temp.transpose()
def fastrp_projection(train, feature, final_adj_matrix, edge_type, q=3, dim=128, projection_method='gaussian', input_matrix='adj', alpha=None, s=1, threshold=0.95, gama=1, feature_similarity=False):
assert input_matrix == 'adj' or input_matrix == 'trans'
assert projection_method == 'gaussian' or projection_method == 'sparse'
num_edge = len(edge_type)
M = final_adj_matrix
if feature_similarity == True:
feature = pp.normalize(feature, axis=1).T
# Gaussian projection matrix
if projection_method == 'gaussian':
transformer = random_projection.GaussianRandomProjection(n_components=dim, random_state=7)
# Sparse projection matrix
else:
transformer = random_projection.SparseRandomProjection(n_components=dim, random_state=7)
Y = transformer.fit(feature)
# Construct the inverse of the degree matrix
if input_matrix != 'adj':
rowsum = M.sum(axis=1)
colsum = M.sum(axis=0).T
rowsum = np.squeeze(np.asarray(rowsum+colsum))**-1
rowsum[np.isinf(rowsum)]=1
D_inv = diags(rowsum)
cur_U = transformer.transform(feature)
if feature_similarity == True:
cur_U = feature.T @ cur_U
cur_U = M @ cur_U
if input_matrix != 'adj':
# normalization
cur_U = D_inv @ cur_U
U_list = [cur_U]
for j in range(1, q):
# cur_U = M @ cur_U
cur_U = M.dot(cur_U)
if input_matrix != 'adj':
# normalization
cur_U = D_inv @ cur_U
U_list.append(cur_U)
return U_list
# When weights is None, concatenate instead of linearly combines the embeddings from different powers of A
def fastrp_merge(U_list, weights, edge_types, normalization=False, q=3):
print('merge')
num_edge = len(edge_types)
if weights is None:
# return np.concatenate(_U_list, axis=1)
return hstack(U_list)
U = np.zeros_like(U_list[0])
for cur_U, weight in zip(U_list, weights):
U += cur_U * weight
try:
U = U.todense()
except:
pass
U = np.squeeze(np.asarray(U)) # convert numpy matrix to array
return U.todense() if type(U) == csr_matrix else U
# A is always the adjacency matrix
# the choice between adj matrix and trans matrix is decided in the conf
def fastrp_wrapper(A, feature, motifs, conf):
final_adj_matrix = adj_matrix_weight_merge(A, adj_weight = conf['adj_weight'])
U_list = fastrp_projection(A,
feature,
final_adj_matrix,
q=conf['q'],
dim=conf['dim'],
projection_method=conf['projection_method'],
input_matrix=conf['input_matrix'],
edge_type = conf['edge_type'],
s=conf['s'],
feature_similarity=conf['feature_similarity']
)
U = fastrp_merge(U_list, conf['weights'], conf['edge_type'], conf['normalization'], conf['q'])
return U
def get_emb_filename(prefix, conf):
return prefix + '-dim=' + str(conf['dim']) + ',projection_method=' + conf['projection_method'] \
+ ',input_matrix=' + conf['input_matrix'] + ',normalization=' + str(conf['normalization']) \
+ ',weights=' + (','.join(map(str, conf['weights'])) if conf['weights'] is not None else 'None') \
+ ',alpha=' + (str(conf['alpha']) if 'alpha' in conf else '') \
+ ',C=' + (str(conf['C']) if 'alpha' in conf else '1.0') \
+ '.mat'