-
Notifications
You must be signed in to change notification settings - Fork 3.7k
/
Copy pathglnn.py
111 lines (90 loc) · 3.49 KB
/
glnn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# Implementation of:
# Graph-less Neural Networks: Teaching Old MLPs New Tricks via Distillation
import argparse
import os.path as osp
import time
import torch
import torch.nn.functional as F
import torch_geometric.transforms as T
from torch_geometric.datasets import Planetoid
from torch_geometric.nn import GCN, MLP
parser = argparse.ArgumentParser()
parser.add_argument('--lamb', type=float, default=0.0,
help='Balances loss from hard labels and teacher outputs')
args = parser.parse_args()
if torch.cuda.is_available():
device = torch.device('cuda')
elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
device = torch.device('mps')
else:
device = torch.device('cpu')
path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', 'Planetoid')
dataset = Planetoid(path, name='Cora', transform=T.NormalizeFeatures())
data = dataset[0].to(device)
gnn = GCN(dataset.num_node_features, hidden_channels=16,
out_channels=dataset.num_classes, num_layers=2).to(device)
mlp = MLP([dataset.num_node_features, 64, dataset.num_classes], dropout=0.5,
norm=None).to(device)
gnn_optimizer = torch.optim.Adam(gnn.parameters(), lr=0.01, weight_decay=5e-4)
mlp_optimizer = torch.optim.Adam(mlp.parameters(), lr=0.01, weight_decay=5e-4)
def train_teacher():
gnn.train()
gnn_optimizer.zero_grad()
out = gnn(data.x, data.edge_index)
loss = F.cross_entropy(out[data.train_mask], data.y[data.train_mask])
loss.backward()
gnn_optimizer.step()
return float(loss)
@torch.no_grad()
def test_teacher():
gnn.eval()
pred = gnn(data.x, data.edge_index).argmax(dim=-1)
accs = []
for _, mask in data('train_mask', 'val_mask', 'test_mask'):
accs.append(int((pred[mask] == data.y[mask]).sum()) / int(mask.sum()))
return accs
times = []
print('Training Teacher GNN:')
for epoch in range(1, 201):
start = time.time()
loss = train_teacher()
if epoch % 20 == 0:
train_acc, val_acc, test_acc = test_teacher()
print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, Train: {train_acc:.4f}, '
f'Val: {val_acc:.4f}, Test: {test_acc:.4f}')
times.append(time.time() - start)
start = time.time()
print(f"Median time per epoch: {torch.tensor(times).median():.4f}s")
with torch.no_grad(): # Obtain soft labels from the GNN:
y_soft = gnn(data.x, data.edge_index).log_softmax(dim=-1)
def train_student():
mlp.train()
mlp_optimizer.zero_grad()
out = mlp(data.x)
loss1 = F.cross_entropy(out[data.train_mask], data.y[data.train_mask])
loss2 = F.kl_div(out.log_softmax(dim=-1), y_soft, reduction='batchmean',
log_target=True)
loss = args.lamb * loss1 + (1 - args.lamb) * loss2
loss.backward()
mlp_optimizer.step()
return float(loss)
@torch.no_grad()
def test_student():
mlp.eval()
pred = mlp(data.x).argmax(dim=-1)
accs = []
for _, mask in data('train_mask', 'val_mask', 'test_mask'):
accs.append(int((pred[mask] == data.y[mask]).sum()) / int(mask.sum()))
return accs
times = []
print('Training Student MLP:')
for epoch in range(1, 501):
start = time.time()
loss = train_student()
if epoch % 20 == 0:
train_acc, val_acc, test_acc = test_student()
print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, Train: {train_acc:.4f}, '
f'Val: {val_acc:.4f}, Test: {test_acc:.4f}')
times.append(time.time() - start)
start = time.time()
print(f"Median time per epoch: {torch.tensor(times).median():.4f}s")