Skip to content

Commit

Permalink
FinalRuns Added
Browse files Browse the repository at this point in the history
  • Loading branch information
guntas-13 committed Apr 23, 2024
1 parent 36e07a7 commit 498a6d3
Show file tree
Hide file tree
Showing 11 changed files with 2,137 additions and 177 deletions.
286 changes: 286 additions & 0 deletions FinalRuns/AmazonGraph.ipynb

Large diffs are not rendered by default.

286 changes: 286 additions & 0 deletions FinalRuns/DBLPGraph.ipynb

Large diffs are not rendered by default.

286 changes: 286 additions & 0 deletions FinalRuns/EmailGraph.ipynb

Large diffs are not rendered by default.

286 changes: 286 additions & 0 deletions FinalRuns/FacebookGraph.ipynb

Large diffs are not rendered by default.

413 changes: 413 additions & 0 deletions FinalRuns/Networks.py

Large diffs are not rendered by default.

Binary file added FinalRuns/Networks.zip
Binary file not shown.
286 changes: 286 additions & 0 deletions FinalRuns/YouTubeGraph.ipynb

Large diffs are not rendered by default.

71 changes: 71 additions & 0 deletions FinalRuns/latex.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import matplotlib

from math import sqrt
SPINE_COLOR = 'gray'

def latexify(fig_width=None, fig_height=None, columns=1):
"""Set up matplotlib's RC params for LaTeX plotting.
Call this before plotting a figure.
Parameters
----------
fig_width : float, optional, inches
fig_height : float, optional, inches
columns : {1, 2}
"""

# code adapted from http://www.scipy.org/Cookbook/Matplotlib/LaTeX_Examples

# Width and max height in inches for IEEE journals taken from
# computer.org/cms/Computer.org/Journal%20templates/transactions_art_guide.pdf

assert(columns in [1,2])

if fig_width is None:
fig_width = 2.0 if columns==1 else 4.0 # width in inches

if fig_height is None:
golden_mean = 1/1.62 # Aesthetic ratio
fig_height = fig_width*golden_mean # height in inches

MAX_HEIGHT_INCHES = 8.0
if fig_height > MAX_HEIGHT_INCHES:
print("WARNING: fig_height too large:" + str(fig_height) +
"so will reduce to" + str(MAX_HEIGHT_INCHES) + "inches.")
fig_height = MAX_HEIGHT_INCHES

params = {'backend': 'ps',
'text.latex.preamble': '\\usepackage{gensymb}',
'axes.labelsize': 10, # fontsize for x and y labels (was 10)
'axes.titlesize': 10,
'font.size': 10, # was 10
'legend.fontsize': 10, # was 10
'xtick.labelsize': 10,
'ytick.labelsize': 10,
'text.usetex': True,
'figure.figsize': [fig_width,fig_height],
'font.family': 'serif'
}

matplotlib.rcParams.update(params)


def format_axes(ax):

for spine in ['top', 'right']:
ax.spines[spine].set_visible(False)

for spine in ['left', 'bottom']:
ax.spines[spine].set_color(SPINE_COLOR)
ax.spines[spine].set_linewidth(0.5)

ax.xaxis.set_ticks_position('bottom')
ax.yaxis.set_ticks_position('left')

for axis in [ax.xaxis, ax.yaxis]:
axis.set_tick_params(direction='out', color=SPINE_COLOR)

return ax
284 changes: 110 additions & 174 deletions Iter2_LPA_All4.ipynb

Large diffs are not rendered by default.

116 changes: 113 additions & 3 deletions Networks.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import numpy as np
import pandas as pd
import random
from cdlib import algorithms
from sklearn.metrics import adjusted_rand_score, normalized_mutual_info_score
from latex import latexify
latexify(columns = 2)
Expand Down Expand Up @@ -129,6 +130,72 @@ def edge_L_Spar_sparsification(G, r):
H.add_edge(edge[0], edge[1])
return H

def clustering_coeffs_edge_sampling(graph, sampling_ratio):
H = nx.Graph()
edge_cc_prod = []
edges = []
clustering_coeffs = nx.clustering(graph)
for edge in graph.edges():
edges.append(edge)
edge_cc_prod.append(clustering_coeffs[edge[0]]*clustering_coeffs[edge[1]])
indices = np.argsort(edge_cc_prod)[::-1]
sampled_edges = np.array(edges)[indices][: int(graph.number_of_edges()*sampling_ratio)]
H.add_edges_from(sampled_edges)
H.add_nodes_from(graph.nodes)
return H

def metropolis_hastings_algorithm(graph:nx.Graph, sampling_ratio):
seed_node = np.random.choice(np.array(graph.nodes()))
current_node = seed_node
num_edges = 0
patience = 0
alpha = 0.3

H = nx.Graph()
while num_edges < int(graph.number_of_edges()*sampling_ratio):
if patience == 10:
current_node = np.random.choice(np.array(graph.nodes()))
patience = 0
alpha = alpha**1.5
neighbors = graph.neighbors(current_node)
new_node = np.random.choice(list(neighbors))
if not H.has_edge(current_node, new_node):
r = np.random.uniform(alpha, 1)
if r < (graph.degree(current_node)/graph.degree(new_node)):
H.add_edges_from([(current_node, new_node)])
current_node = new_node
num_edges += 1
else:
current_node = current_node
else:
current_node = current_node
patience += 1

H.add_nodes_from(graph.nodes)
return H

def effective_resistance_sampling_2(graph:nx.Graph, sampling_ratio):
list_sampled_edges = []
l = 0
for component in list(nx.connected_components(graph)):
subgraph = nx.subgraph(graph, component)
edges = np.array(subgraph.edges())
resistances = []
for i in range(edges.shape[0]):
resistances.append(nx.resistance_distance(subgraph, edges[i,0], edges[i,1]))
sorted_edges = edges[np.argsort(resistances)[::-1]]
list_sampled_edges.append(sorted_edges[:int(subgraph.number_of_edges()*sampling_ratio)])
l += int(subgraph.number_of_edges()*sampling_ratio)
sampled_edges = np.zeros(shape=(l, 2))
p = 0
for arr in list_sampled_edges:
sampled_edges[p: p+len(arr), :] = arr
p += len(arr)
H = nx.Graph()
H.add_edges_from(sampled_edges)
H.add_nodes_from(graph.nodes)
return H

def run_louvain(G):
communities = community.community_louvain.best_partition(G)
return communities
Expand All @@ -137,6 +204,12 @@ def run_lpa(G):
communities = nx.community.label_propagation.label_propagation_communities(G)
return communities

def run_walktrap(G):
return algorithms.walktrap(G)

def run_infomap(G):
return algorithms.infomap(G)

def metrics(ground_truth, predicted):
ari = adjusted_rand_score(list(ground_truth.values()), list(predicted.values()))
nmi = normalized_mutual_info_score(list(ground_truth.values()), list(predicted.values()))
Expand All @@ -146,6 +219,9 @@ def modularity(G, communities):
m = nx.community.modularity(G, communities)
return m

def clustering_coefficient(G):
return nx.average_clustering(G)



# MAJOR PLOT FUNCTIONS
Expand All @@ -159,7 +235,7 @@ def plot_metrics_sparse(G, ground_truth, sparseFunctions, k_values, AlgoFunction
sparseFunctions (list): list of tuples containing the name and the function to generate the sparse graph
k_values (list): list of values for the percentage of edges to retain
AlgoFunction (function): Community Detection Algorithm
flag (int): 0 if the output of the algorithm is a dictionary, 1 if the output is a list of communities
flag (int): 0 if the output of the algorithm is a dictionary, 1 if the output is a list of communities, 2 if the output is a community object
networkName (str, optional): Name of Network to go in the Title of the Plots. Defaults to None.
AlgoName (str, optional): Algorithm Name to go in the Title of the Plots. Defaults to None.
Expand All @@ -170,6 +246,7 @@ def plot_metrics_sparse(G, ground_truth, sparseFunctions, k_values, AlgoFunction
ari_values = [[0] * len(k_values) for _ in sparseFunctions]
modularity_values = [[0] * len(k_values) for _ in sparseFunctions]
nmi_values = [[0] * len(k_values) for _ in sparseFunctions]
clust_coeff_values = [[0] * len(k_values) for _ in sparseFunctions]
names = [name for name, _ in sparseFunctions]
SparseGraphs = [[0] * len(k_values) for _ in sparseFunctions]

Expand All @@ -179,8 +256,11 @@ def plot_metrics_sparse(G, ground_truth, sparseFunctions, k_values, AlgoFunction
predicted = AlgoFunction(H)
if (flag == 1):
predicted = get_community_dict(predicted)
elif (flag == 2):
predicted = get_community_dict(predicted.communities)
ari, nmi = metrics(ground_truth, predicted)
modularity_values[idx][i] = modularity(H, get_communities(predicted))
clust_coeff_values[idx][i] = clustering_coefficient(H)
ari_values[idx][i] = ari
nmi_values[idx][i] = nmi
SparseGraphs[idx][i] = H
Expand All @@ -196,6 +276,7 @@ def plot_metrics_sparse(G, ground_truth, sparseFunctions, k_values, AlgoFunction
plt.title(f"ARI for {AlgoName} vs k for {networkName}")
plt.legend()
plt.grid()
plt.savefig(f"{AlgoName}_{networkName}_ARI.png")
plt.show()

plt.figure(figsize = (12, 8))
Expand All @@ -209,6 +290,7 @@ def plot_metrics_sparse(G, ground_truth, sparseFunctions, k_values, AlgoFunction
plt.title(f"NMI for {AlgoName} vs k for {networkName}")
plt.legend()
plt.grid()
plt.savefig(f"{AlgoName}_{networkName}_NMI.png")
plt.show()

plt.figure(figsize = (12, 8))
Expand All @@ -224,12 +306,40 @@ def plot_metrics_sparse(G, ground_truth, sparseFunctions, k_values, AlgoFunction
plt.title(f"Modularity for {AlgoName} vs k for {networkName}")
plt.legend()
plt.grid()
plt.savefig(f"{AlgoName}_{networkName}_Modularity.png")
plt.show()

plt.figure(figsize = (12, 8))
plt.xticks(range(len(k_values)), [f"{100 * k}%" for k in k_values])
for idx in range(len(sparseFunctions)):
plt.plot(clust_coeff_values[idx], label = names[idx], marker = "o")
for i, txt in enumerate(clust_coeff_values[idx]):
plt.annotate(f"{txt:.4f}", (i, clust_coeff_values[idx][i]))

plt.axhline(y = clustering_coefficient(G), color = "black", linestyle = "--", label = "Original Graph")
plt.xlabel("Percentage Retention of Edges")
plt.ylabel("Clustering Coefficient")
plt.title(f"Clustering Coefficient for {AlgoName} vs k for {networkName}")
plt.legend()
plt.grid()
plt.savefig(f"{AlgoName}_{networkName}_Clustering_Coefficients.png")
plt.show()

return SparseGraphs
return SparseGraphs, ari_values, nmi_values, modularity_values, clust_coeff_values

def createDataFrames(ari_values, nmi_values, modularity_values, clust_coeff_values, k_values, sparseFunctionNames, algorithm, dataset):
ari_df = pd.DataFrame(ari_values, columns = k_values, index = sparseFunctionNames)
nmi_df = pd.DataFrame(nmi_values, columns = k_values, index = sparseFunctionNames)
modularity_df = pd.DataFrame(modularity_values, columns = k_values, index = sparseFunctionNames)
clust_coeff_df = pd.DataFrame(clust_coeff_values, columns = k_values, index = sparseFunctionNames)

ari_df.to_csv(f"{algorithm}_{dataset}_ARI.csv")
nmi_df.to_csv(f"{algorithm}_{dataset}_NMI.csv")
modularity_df.to_csv(f"{algorithm}_{dataset}_Modularity.csv")
clust_coeff_df.to_csv(f"{algorithm}_{dataset}_Clustering_Coefficients.csv")
print("DataFrames Created Successfully")

print("Function Description:\n1. plotRandomCommunity(G, community, title = None)\n2. get_community_dict(communities)\n3. get_communities(community_dict)\n4. run_louvain(G)\n5. metrics(ground_truth, predicted)\n6. plot_metrics_sparse(G, ground_truth, sparseFunctions, k_values, AlgoFunction, flag, networkName = None, AlgoName = None)\n\nSampling Methods:\n1. edge_betweenness_sparsification(G, k)\n2. edge_random_sparsification(G, k)\n3. edge_jaccard_sparsification(G, k)\n4. edge_L_Spar_sparsification(G, r)\n\n")
print("Function Description:\n1. plotRandomCommunity(G, community, title = None)\n2. get_community_dict(communities)\n3. get_communities(community_dict)\n4. metrics(ground_truth, predicted)\n5. plot_metrics_sparse(G, ground_truth, sparseFunctions, k_values, AlgoFunction, flag, networkName = None, AlgoName = None)\n6. createDataFrames(ari_values, nmi_values, modularity_values, clust_coeff_values, k_values, sparseFunctionNames, algorithm, dataset)\n\nSampling Methods:\n1. edge_betweenness_sparsification(G, k)\n2. edge_random_sparsification(G, k)\n3. edge_jaccard_sparsification(G, k)\n4. edge_L_Spar_sparsification(G, r)\n5. clustering_coeffs_edge_sampling(G, k)\n6. metropolis_hastings_algorithm(G, r)\n7. effective_resistance_sampling_2(G, r)\n\nCommunity Detection Algorithms:\n1. run_louvain(G)\n2. run_lpa(G)\n3. run_walktrap(G)\n4. run_infomap(G)\n\n")


# DBLP GRAPH
Expand Down
Binary file modified __pycache__/Networks.cpython-39.pyc
Binary file not shown.

0 comments on commit 498a6d3

Please sign in to comment.