-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathglc_3d_rotate.py
190 lines (152 loc) · 7.83 KB
/
glc_3d_rotate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.preprocessing import normalize
from sklearn.model_selection import train_test_split
import argparse
from concurrent.futures import ThreadPoolExecutor
import cProfile
from collections import Counter
# Function for profiling
def profile_func(func):
def wrapper(*args, **kwargs):
profile = cProfile.Profile()
profile.enable()
result = func(*args, **kwargs)
profile.disable()
profile.print_stats()
return result
return wrapper
# Parse command line arguments
parser = argparse.ArgumentParser(description="3D GLC-L Visualization with SVM Boundary Curve")
parser.add_argument("--file_path", required=True, help="Path to the CSV file containing the dataset")
args = parser.parse_args()
# Load dataset from CSV
df = pd.read_csv(args.file_path)
data = df.drop(columns=['class']).values # Dropping the 'class' column to get features
labels = df['class'].values # Getting the labels from the 'class' column
# Normalize the data
data_normalized = normalize(data, axis=0, norm='max')
# Function to calculate angle from coefficient
def calculateAngle(coefficient):
return np.arctan(coefficient)
# Function to evaluate coefficients using LDA classifier
def evaluateCoefficientsLDA(X, y, coefficients):
X_projected = X.dot(coefficients)
lda = LinearDiscriminantAnalysis()
lda.fit(X_projected.reshape(-1, 1), y)
accuracy = lda.score(X_projected.reshape(-1, 1), y)
return accuracy
# Function to run one epoch and find coefficients
def one_epoch(data, labels):
coefficients = np.random.uniform(-1, 1, data.shape[1])
current_accuracy = evaluateCoefficientsLDA(data, labels, coefficients)
return coefficients, current_accuracy
# Function to find the best coefficients using GLC-AL with SVM and ThreadPoolExecutor
@profile_func
def coefficients_search_svm_parallel(data, labels, epochs=100):
best_coefficients = None
best_accuracy = 0
with ThreadPoolExecutor() as executor:
results = list(executor.map(one_epoch, [data]*epochs, [labels]*epochs))
for coefficients, current_accuracy in results:
if current_accuracy > best_accuracy:
best_coefficients = coefficients
best_accuracy = current_accuracy
return best_coefficients, best_accuracy
# Ensure results are cached so we don't re-run this part
if 'best_coefficients' not in globals():
# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(data_normalized, labels, test_size=0.2, random_state=42)
# Find the best coefficients using GLC-AL with SVM
best_coefficients, best_accuracy = coefficients_search_svm_parallel(X_train, y_train)
# Once you have found the best coefficients, train the final LDA model
X_projected = data_normalized.dot(best_coefficients).reshape(-1, 1)
lda = LinearDiscriminantAnalysis()
lda.fit(X_projected, labels)
# Get the LDA predictions for the entire dataset
predictions = lda.predict(X_projected)
# Unique labels in the dataset
unique_labels = np.unique(labels)
# Color map
color_map = {label: plt.cm.jet(i/float(len(unique_labels)-1)) for i, label in enumerate(unique_labels)}
# Function to update the plot for each rotation angle
def update(num):
ax.cla() # Clear the previous plot
ax.view_init(azim=1*num, elev=30)
z_offset_factor = 0.0 # How much to lift the first class along the Z-axis
line_alpha = 0.1 # Lowering the opacity of the lines (set between 0 and 1)
x_coords, y_coords, z_coords = [], [], []
for i in range(data_normalized.shape[0]):
x, y, z = 0, 0, 0
for j in range(data_normalized.shape[1]):
radius = data_normalized[i, j]
angle = calculateAngle(best_coefficients[j])
new_x = x + radius * np.cos(angle)
new_y = y + radius * np.sin(angle)
new_z = z + radius * np.tan(angle)
if labels[i] == unique_labels[0]: # If this point belongs to the first class
new_z += z_offset_factor # Lift the endpoint along the Z-axis
else:
new_z -= z_offset_factor # Lower the endpoint along the Z-axis
ax.plot([x, new_x], [y, new_y], [z, new_z], color=color_map[labels[i]], alpha=line_alpha)
# Show the endpoints
if j == data_normalized.shape[1] - 1:
ax.scatter(new_x, new_y, new_z, color='black', s=10, marker='s') # Larger size for end points
x, y, z = new_x, new_y, new_z
# Store the endpoint coordinates for each vector
x_coords.append(x)
y_coords.append(y)
z_coords.append(z)
# Identify misclassified points based on SVM predictions
misclassified_indices = np.where(predictions != labels)[0]
misclassified_points = np.array([[x, y, z] for idx, (x, y, z) in enumerate(zip(x_coords, y_coords, z_coords)) if idx in misclassified_indices])
# Find the furthest left and right misclassified points based on x-coordinate
if len(misclassified_points) > 0:
sorted_misclassified = sorted(misclassified_points, key=lambda x: x[0])
furthest_left = sorted_misclassified[0]
furthest_right = sorted_misclassified[-1]
# Calculate midpoint between furthest left and right misclassified points
midpoint = (furthest_left + furthest_right) / 2.0
# Create a classification boundary plane through the midpoint, parallel to the yz-plane
yy, zz = np.meshgrid(np.linspace(min(y_coords), max(y_coords), 50),
np.linspace(min(z_coords), max(z_coords), 50))
xx = np.full_like(yy, midpoint[0]) # The plane is at the x-coordinate of the midpoint
# Plot the classification boundary plane
ax.plot_surface(xx, yy, zz, color='c', alpha=0.4)
# Count points for each class on both sides of the boundary plane
left_counts = Counter()
right_counts = Counter()
for x, label in zip(x_coords, labels):
if x < midpoint[0]:
left_counts[label] += 1
else:
right_counts[label] += 1
# Calculate and display percentages
annotation_text = "Boundary plane:\n"
for label in unique_labels:
total_count = left_counts[label] + right_counts[label]
left_percentage = (left_counts[label] / total_count) * 100 if total_count else 0
right_percentage = (right_counts[label] / total_count) * 100 if total_count else 0
annotation_text += f"{left_percentage:.2f}% of {label} class on the left side\n"
annotation_text += f"{right_percentage:.2f}% of {label} class on the right side\n"
# Display the text outside the plot
plt.annotate(annotation_text, xy=(-0.25, 0.95), xycoords='axes fraction', fontsize=10)
ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_zlabel('Z')
ax.set_title(f'3D GLC-L (Rotation: {1*num} degrees)')
# Add a legend outside the plot area with class names
legend_handles = [plt.Line2D([0], [0], marker='s', color='w', label=str(label), markerfacecolor=color_map[label], markersize=5) for label in unique_labels]
ax.legend(handles=legend_handles, title="Classes", loc='upper left', bbox_to_anchor=(1, 1))
# Initialize the figure and axis for the animation
fig = plt.figure(figsize=(12, 8))
ax = fig.add_subplot(111, projection='3d')
# Create an animation rotating the plot by 1 degree at each frame
ani = FuncAnimation(fig, update, frames=range(360), repeat=False)
# Save the animation
#ani.save('glcl_3d_rotation_1_degree_svm.gif', writer='pillow', fps=10)
# Or show the animation (uncomment the line below)
plt.show()