test/gcn4r_demo.Rmd

---
title: "GCN4R_demo"
author: "Joshua Levy"
date: "4/28/2020"
output:
  pdf_document: default
  html_document: default
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
library(gcn4r)
```

# Install package
```{r}
# uncomment for installation
# install.packages("reticulate")
# devtools::install_github("jlevy44/gcn4r",ref="master")
# conda_create("gcn4r")
# conda_install("gcn4r", "gcn4r",pip=T)
# use_condaenv("gcn4r")
```


# Import Library, link anaconda to R studio
```{r}
# http://moreno.ss.uci.edu/data.html
# reticulate:::conda_list(conda = "auto")
reticulate::use_condaenv(condaenv = "gcn4r", conda = "/anaconda2/bin/conda")
reticulate:::use_python("/anaconda2/envs/gcn4r/bin/python")
GCN4R<-import_gcn4r()
```


# Load data
Comment out below to download and save Lazega Network.
```{r}
# devtools::install_github("zalmquist/networkdata")
# library(networkdata)
# data("lazega")
# saveRDS(asIgraph(lazega$FRIENDSHIP),"netdata.rds")
net<-readRDS("netdata.rds")
```

```{r}
# alternative data
physician.files<-c("../gcn4r/data/A_physician.csv","../gcn4r/data/X_physician.csv")
lawyer.files<-c("A_lawyer.csv","X_lawyer.csv")
# load lawyer data from csv
# net.list<-generate.net.list(lawyer.files[1],lawyer.files[2]) 
# load lawyer data from igraph
net.list<-igraph2net.list(net)
# create factor variables
net.list$X[,c("na","vertex.names")]<-NULL
for (cat.var in c("gender","law_school","office","practice","status")){
  net.list$X[,cat.var]<-as.factor(net.list$X[,cat.var])
}
```

# Create design matrix to expand factors, update net.list
```{r}
net.list$X<-model.matrix(~.,data=net.list$X)[,-1]
head(net.list$X)
```

# Visualize data

```{r}
visualize.net2(net.list,covar = "office2")
```

# Load parameters

```{r}
parameters<-generate_default_parameters()
```

# Update parameters

```{r}
new.parameters<-list(encoder_base="GATConv",
                     K=3L,
                     epoch_cluster=150L,
                     n_layers=1L,
                     use_mincut=T,
                     ae_type="ARGA",
                     custom_dataset="none",
                     learning_rate=1e-2,
                     lambda_kl=0.,
                     lambda_adv=1e-3,
                     lambda_cluster=12.,
                     lambda_recon=3.,
                     model_save_loc="cluster.model.pkl")
parameters<-update.parameters(parameters,new.parameters)
```

# Fit cluster, embedding, classification, generation, or link prediction model
```{r}
cluster.model<-cluster.model.fit(parameters, net.list,verbose=F)
```

# Plot Objective Convergence
```{r}
plot.diagnostics(cluster.model)
```

# Summarize Results
```{r}
results<-extract.results(cluster.model)
graphs<-extract.graphs(cluster.model)
cl<-extract.clusters(cluster.model)
summary(cluster.model,additional.info=F)
```

# Visualize Results
```{r}
plot(cluster.model)

# Select predicted and save image
tiff(file = "test.tiff", width = 2000, height = 2000, units = "px", res = 600)
plot(cluster.model,plots=c(2))
dev.off()

# plot latent positions
plot(cluster.model,latent=T,plots=c(2))
```


# Interpret
# Attention between Two Individuals 
```{r}
attention.matrices<-visualize.attention(cluster.model,weight.scaling.factor = 4)

# With latent embeddings
attention.matrices<-visualize.attention(cluster.model,weight.scaling.factor = 4, latent=T)
```

# Important Samples and Features per Cluster Assignment 
```{r}
attributions<-interpret.predictors(cluster.model)
```

# Extract Motifs in Data
```{r}
parameters.2<-update.parameters(parameters,list(encoder_base="GCNConv",model_save_loc="cluster.model.2.pkl"))
cluster.model.2<-cluster.model.fit(parameters.2, net.list)
plot.diagnostics(cluster.model.2)
summary(cluster.model.2)
```
Motif Visualization:
```{r}
motif.graphs<-extract.motifs(cluster.model.2)

# extract specific motifs, with some perturbation of edges
motif.graphs.2<-extract.motifs(cluster.model.2,node_idx=c(4L,7L,23L,60L,70L))
motif.graphs.2<-extract.motifs(cluster.model.2,node_idx=c(4L,7L,23L,60L,70L),perturb="erdos",erdos_flip_p = 0.4, random_seed = 43L)

cl<-extract.clusters(cluster.model.2)
predictor.importance<-build.importance.matrix(motif.graphs,cl,extract.features(cluster.model.2))

vis.motif(motif.graphs.2,60L,cl,threshold=0.1, weight.scaling.factor = 2., important.node.size=10) 
vis.motif(motif.graphs,60L,cl,threshold=0.2)
vis.motif(motif.graphs,60L,cl,threshold=0.3, weight.scaling.factor = 2., important.node.size=10)
vis.motif(motif.graphs,60L,cl,threshold=0.4, weight.scaling.factor = 2., important.node.size=10)
vis.motif(motif.graphs,60L,cl,threshold=0.5)

vis.motif(motif.graphs,7L,cl,threshold=0.3)
vis.motif(motif.graphs,70L,cl,threshold=0.3)
# More advanced examples
vis.motif(motif.graphs,4L,cl,threshold=0.3, weight.scaling.factor=1.5)
vis.motif(motif.graphs,70L,cl,threshold=0.45, other.idx=c(1L,12L), weight.scaling.factor=1.5, important.node.size=10)
vis.motif(motif.graphs,70L,cl,threshold=0.25, weight.scaling.factor=1.5, other.idx=c(60L, 7L, 4L), centrality.measure="none")
motif<-vis.motif(motif.graphs,23L,cl,threshold=0.35, centrality.measure="strength", floor.size=3, ceil.size=15)
vis.motif(motif.graphs,23L,cl,threshold=0.35, centrality.measure="clusterrank", floor.size=3, ceil.size=15)

```

# Node importance measurements
```{r}
# performance-based
# change in cluster membership or decrease performance model
cl<-extract.clusters(cluster.model)
importance.performance<-plot.node.importance(cluster.model,importance.type = "performance", relate.cluster.meas=T, weight.scaling.factor=2, cl=cl, floor.size = 2., ceil.size=10)

# gradient-based
# sum abs value node features
importance.gradient<-plot.node.importance(cluster.model,importance.type = "gradient", relate.cluster.meas=T, weight.scaling.factor=2, cl=cl)

# attention-based
# strength of layer or aggregated across layers
importance.attention<-plot.node.importance(cluster.model,importance.type = "attention", relate.cluster.meas=T, weight.scaling.factor=2, cl=cl, layers.idx = c(1))

# motif-based, for now just centrality, measure
# what part of total graph does motif occupy * betweeness centrality or average path length
# or how much does removal of  motif effect accuracy
cl<-extract.clusters(cluster.model.2)
importance.motif<-plot.node.importance(cluster.model.2,importance.type = "motif", relate.cluster.meas=T, weight.scaling.factor=2, cl=cl, motif.threshold = 0.3)
```

# importance comparison
```{r}
importances<-data.frame(attention=importance.attention,
                       performance=importance.performance,
                       motif=importance.motif,
                       gradient=importance.gradient)
ggplot(importances)+
  geom_jitter(aes(attention,performance))+
  labs(x="Attention Node Importance",y="Performance Node Importance")+
  ggtitle(paste("Comparison of Attention to Performance Node Importances;","SpearmanR =",toString(round(cor(importance.attention,importance.performance,method="spearman"),2))))+
  theme_minimal()

ggplot(importances)+
  geom_jitter(aes(attention,motif))+
  labs(x="Attention Node Importance",y="Motif Node Importance")+
  ggtitle(paste("Comparison of Attention to Motif Node Importances;","SpearmanR =",toString(round(cor(importance.attention,importance.motif,method="spearman"),2))))+
  theme_minimal()

```

# Matching graph embeddings to multivariate normal distribution
```{r}
parameters.3<-update.parameters(parameters,list(ae_type="ARGVA",
                                                K=2L,
                                                lambda_kl=1e-2,
                                                kl_warmup=10L,
                                                lambda_adv=1e-4,
                                                lambda_recon=1.,
                                                lambda_cluster=3.,
                                                epoch_cluster=120L,
                                                n_layers=2L,
                                                model_save_loc="generative.model.pkl"))
embedding.model<-cluster.model.fit(parameters.3, net.list)
z<-extract.embeddings(embedding.model)
cl<-extract.clusters(embedding.model)
lo<-make.layout(z)
G.true<-extract.graphs(embedding.model)$A.true
plot.net(G.true,cl,layout=lo)
```

# Plot diagnostics with added variational KL divergence loss
```{r}
summary(embedding.model)
plot.diagnostics(embedding.model)
```

# Simulate Networks from Variational Graph Auto-Encoder model
```{r}
sim.graphs<-simulate.networks(embedding.model,nsim=30)
cl<-extract.clusters(embedding.model)
for (i in c(1,2,20,30)){
  net.sim<-sim.graphs$networks[[i]]
  embedding<-sim.graphs$embeddings[[i]]
  lo<-make.layout(embedding)
  plot.net(net.sim,cl,layout = lo)
}
```

# Run classification model
```{r}
parameters.4<-update.parameters(parameters,list(ae_type="GAE",
                                                K=2L,
                                                lambda_kl=0,
                                                lambda_recon=0.,
                                                lambda_cluster=0.,
                                                epoch_cluster=120L,
                                                n_layers=2L,
                                                lambda_pred=5.,
                                                prediction_column="office2",
                                                model_save_loc="prediction.model.pkl"))

prediction.model<-classify.model.fit(parameters.4, net.list)
plot.diagnostics(prediction.model)
summary(prediction.model)

z<-extract.embeddings(prediction.model)
y<-apply(extract.prediction(prediction.model),1,function(x) which.max(x))
lo<-make.layout(z)
G.true<-extract.graphs(prediction.model)$A.true
plot.net(G.true,y,layout=lo)
```

# Comparisons to other R Libraries
# Social Influence 
```{r}
# fit lnam model
X<-data.frame(vertex_attr(net))
geo.dist<-distances(net)
inv.geo.dist<-geo.dist^(-1)
diag(inv.geo.dist)<-0
fit <- lnam(y=V(net)$practice,x=as.matrix(X[,c("seniority","age","gender")]),NULL,inv.geo.dist)

# inspect autoregressive dependence
summary(fit)

# get social influence from fit model
inf<-matrix(0,ncol=fit$df.total,nrow=fit$df.total)
if(!is.null(fit$W1))
   inf<-inf+qr.solve(diag(fit$df.total)-apply(sweep(fit$W1,1,fit$rho1,"*"), c(2,3),sum))
if(!is.null(fit$W2))
   inf<-inf+qr.solve(diag(fit$df.total)-apply(sweep(fit$W2,1,fit$rho2,"*"), c(2,3),sum))
syminf<-abs(inf)+abs(t(inf))
diag(syminf)<-0
infco<-cmdscale(as.dist(max(syminf)-syminf),k=2)
diag(inf)<-NA
stdinf<-inf-mean(inf,na.rm=TRUE)
infsd<-sd(as.vector(stdinf),na.rm=TRUE)
stdinf<-stdinf/infsd
stdinf[is.na(stdinf)]<-0

# plot social influence matrix
vis.weighted.graph(abs(stdinf),weight.scaling.factor = 0.7,cl=X$practice,threshold=1.96)

# 
vis.weighted.graph((attention.matrices[[1]])%*%(attention.matrices[[2]]),cl=extract.clusters(cluster.model))
vis.weighted.graph(attention.matrices[[1]]%*%attention.matrices[[2]],cl=extract.clusters(cluster.model),threshold=0.05)
vis.weighted.graph(attention.matrices[[1]],cl=extract.clusters(cluster.model),threshold=0.1)
vis.weighted.graph(attention.matrices[[2]],cl=extract.clusters(cluster.model),threshold=0.002)


net.list.2<-net.list
net.list.2$X<-net.list.2$X[,c("seniority","age","practice2","gender2")]
net.list.2$A<-(net.list.2$A+t(net.list.2$A))/2
diag(net.list.2$A)<-0
parameters.6<-update.parameters(parameters,list(ae_type="GAE",
                                                K=2L,
                                                lambda_kl=0,
                                                lambda_recon=0.,
                                                lambda_cluster=0.,
                                                epoch_cluster=120L,
                                                n_layers=3L,
                                                lambda_pred=5.,
                                                prediction_column="practice2",
                                                model_save_loc="prediction.model.2.pkl"))

prediction.model<-classify.model.fit(parameters.6, net.list.2)
plot.diagnostics(prediction.model)
summary(prediction.model)
attn.matrices<-visualize.attention(prediction.model,plot=F)
torch<-import("torch")
for (i in 1:4){
  vis.weighted.graph(attn.matrices[[i]],cl=X$practice,threshold=0.05,weight.scaling.factor = 2.5)
}

vis.weighted.graph(attn.matrices[[2]]%*%attn.matrices[[3]],cl=X$practice,threshold=0.06,weight.scaling.factor = 2.5)

vis.weighted.graph(attn.matrices[[1]]%*%attn.matrices[[2]]%*%attn.matrices[[3]]%*%attn.matrices[[4]],cl=X$practice,threshold=0.02,weight.scaling.factor = 0.75)

vis.weighted.graph(abs(stdinf),weight.scaling.factor = 0.7,cl=X$practice,threshold=1.96)
#torch$argmax(prediction.model$results$y,dim=1L)$numpy()
```

# Louvain Modularity / LatentNet Clustering
```{r}
sklearn<-import("sklearn")

# Louvain
cl.louvain<-run.louvain(net)
cl.gnn<-extract.clusters(cluster.model)

sklearn$metrics$normalized_mutual_info_score(cl.gnn,cl.louvain)
plot(cluster.model)

# LatentNet
latent.cluster.fit<-ergm.network(net~rreceiver+rsender+bilinear(d=2,G=3),cluster.model, distance.metric = "correlation",add.dist = F,simulate=F,pseudo=F, use_ergmm = T)
summary(latent.cluster.fit)
plot(latent.cluster.fit)
cl.latent.net<-latent.cluster.fit$mcmc.mle$Z.K
sklearn$metrics$normalized_mutual_info_score(cl.gnn,cl.latent.net)
sklearn$metrics$normalized_mutual_info_score(cl.louvain,cl.latent.net)
lo<-make.layout(latent.cluster.fit$mcmc.mle$Z)
plot.net(net,cl.latent.net,layout=lo)

set.seed(42)
plot.net(net,group=cl.latent.net)
set.seed(42)
plot.net(net,group=cl.gnn)
set.seed(42)
plot.net(net,group=cl.louvain)
```