-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathSMOTE.py
58 lines (48 loc) · 1.89 KB
/
SMOTE.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# coding=utf8
"""
该脚本实现以下算法:
1,采用SMOTE算法对minority samples进行over-sampling
2,(可选)对majority samples进行down-sampling
3,建立分类器模型
"""
import random
import numpy as np
from sklearn.neighbors import NearestNeighbors
def SMOTE(minority_samples, N, k):
"""
The SMOTE algorithm, please refer to: [JAIR'02]SMOTE - Synthetic Minority Over-sampling Technique
minority_samples The minority sample array
N Amount of SMOTE N%
k Number of nearest neighbors
@return (N/100)*len(minority_samples) synthetic minority class samples
"""
T = len(minority_samples) # number of minority samples
if N < 100:
T = N * 1.0 / 100 * T
N = 100
N = int(N * 1.0 / 100)
neigh = NearestNeighbors(n_neighbors = k, radius=1.0, algorithm='auto', leaf_size=30, p=2)
neigh = neigh.fit(minority_samples)
synthetic_samples = []
for i in range(T):
target_sample = minority_samples[i]
tmp = neigh.kneighbors(target_sample, k, return_distance=False)
nnarray = tmp[0]
populate(minority_samples, N, k, i, nnarray, synthetic_samples)
return np.array(synthetic_samples, float)
def populate(minority_samples, N, k, i, nnarray, synthetic_samples):
"""
根据target_sample扩充整个sample集合,并存放如synthetic_samples中
"""
target_sample = minority_samples[i]
numattrs = len(target_sample) # number of attr
while N > 0:
nn = random.choice(range(k)) # make suer nn >=1 and nn <= k
dif = [0] * numattrs
tmp = [0] * numattrs
for attr in range(numattrs):
dif = minority_samples[nnarray[nn]][attr] - target_sample[attr]
gap = random.random() # gap >=0 and gap < 1
tmp[attr] = target_sample[attr] + gap * dif
synthetic_samples.append(tmp)
N -= 1