forked from DeepSleepUCDenver/sleep_models
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsfs.py
More file actions
107 lines (87 loc) · 3.15 KB
/
sfs.py
File metadata and controls
107 lines (87 loc) · 3.15 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from sklearn.metrics import multilabel_confusion_matrix
from sklearn.preprocessing import scale, normalize
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.semi_supervised import label_propagation
from sklearn.semi_supervised import LabelSpreading
from mlxtend.feature_selection import SequentialFeatureSelector as sfs
from mlxtend.plotting import plot_sequential_feature_selection as plot_sfs
import matplotlib.pyplot as plt
from sklearn.metrics import plot_confusion_matrix
n_features = 15
# Read am partition the matrix
def load_data():
data = pd.read_feather('./feature_stage_data_all.ftr')
x = data[data.columns[3:]]
y = data['stage']
x = x.values
x = normalize(x)
y = y.values
nnl = lambda a: np.invert(np.isnan(a))
nul = lambda a: np.isnan(a)
x_obs = x[nnl(y)]
y_obs = y[nnl(y)]
x_nuls = x[nul(y)]
return x_obs, y_obs, x_nuls
x_obs, y_obs, x_nuls = load_data()
def do_sfs(x_tr, y_tr):
sfs_kern = sfs(svm.SVC(kernel='rbf'),
k_features=n_features,
forward=True,
floating=True,
verbose=2,
scoring='accuracy',
cv=5)
sfs_kern.fit(x_tr, y_tr)
return sfs_kern
# Do some initial spliting
x, y = shuffle(x_obs, y_obs, random_state=42)
smpnum = min([sum(y==i) for i in range(1,6)])
y_btr = y[y == 1][:smpnum]
x_btr = x[y == 1][:smpnum]
for i in range(2,6):
x_btr = np.concatenate([x_btr, x[y == i][:smpnum]])
y_btr = np.concatenate([y_btr, y[y == i][:smpnum]])
x_tr, x_te, y_tr, y_te = train_test_split(x_btr, y_btr, test_size = 0.20, )
best = do_sfs(x_tr, y_tr)
# examine the results
plot = plot_sfs(best.get_metric_dict())
plot[1].figure.savefig("SFS-" + str(n_features) + ".png")
for i in range(1,11):
best.get_metric_dict()[i]['avg_score']
test_svm(x_all, y_all)
# make a more select dataset
# Filter the rest of the data
x_obs, y_obs, x_nuls = load_data()
keep = list(best.k_feature_idx_)
np.save('sfs_features', keep)
# keep = np.load('sfs_features.npy')
x_obs = x_obs[:,keep]
x_nuls = x_nuls[:,keep]
# apply LabelSpreading
label_spread = LabelSpreading(kernel='knn', alpha=0.8)
label_spread.fit(x_obs, y_obs)
x_all = np.concatenate([x_obs, x_nuls], axis=0)
y_all = np.concatenate([y_obs, label_spread.predict(x_nuls)], axis=0)
x, y = shuffle(x_all, y_all, random_state=42)
smpnum = min([sum(y==i) for i in range(1,6)])
y_btr = y[y == 1][:smpnum]
x_btr = x[y == 1][:smpnum]
for i in range(2,6):
x_btr = np.concatenate([x_btr, x[y == i][:smpnum]])
y_btr = np.concatenate([y_btr, y[y == i][:smpnum]])
x_tr, x_te, y_tr, y_te = train_test_split(x_btr, y_btr, test_size = 0.20)
mod = svm.SVC(kernel='rbf')
mod.fit(x_tr, y_tr)
mod.score(x_te, y_te)
disp = plot_confusion_matrix(mod, x_te, y_te,
cmap=plt.cm.Blues
,normalize='true')
disp.ax_.set_title("RBF Kernel with " + str(n_features) + " best features")
cfm = disp.plot()
cfm.figure_.savefig("CM-SVM-RBF-" + str(n_features) + ".png")