-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy path4. step2.py
53 lines (34 loc) · 1.1 KB
/
4. step2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import codecademylib3_seaborn
import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets
iris = datasets.load_iris()
samples = iris.data
x = samples[:,0]
y = samples[:,1]
sepal_length_width = np.array(list(zip(x, y)))
# Step 1: Place K random centroids
k = 3
centroids_x = np.random.uniform(min(x), max(x), size=k)
centroids_y = np.random.uniform(min(y), max(y), size=k)
centroids = np.array(list(zip(centroids_x, centroids_y)))
# Step 2: Assign samples to nearest centroid
# Distance formula
def distance(a, b):
one = (a[0] - b[0]) **2
two = (a[1] - b[1]) **2
distance = (one+two) ** 0.5
return distance
# Cluster labels for each point (either 0, 1, or 2)
labels = np.zeros(len(samples))
# Distances to each centroid
distances = np.zeros(k)
# Assign to the closest centroid
for i in range(len(samples)):
distances[0] = distance(sepal_length_width[i], centroids[0])
distances[1] = distance(sepal_length_width[i], centroids[1])
distances[2] = distance(sepal_length_width[i], centroids[2])
cluster = np.argmin(distances)
labels[i] = cluster
# Print labels
print(labels)