-
Notifications
You must be signed in to change notification settings - Fork 53
Expand file tree
/
Copy pathlog_reg.py
More file actions
128 lines (83 loc) · 2.57 KB
/
log_reg.py
File metadata and controls
128 lines (83 loc) · 2.57 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
from numpy import loadtxt, where, zeros, e, array, log, ones, mean, where
from pylab import scatter, show, legend, xlabel, ylabel, plot
from scipy.optimize import fmin_bfgs
def sigmoid(X):
'''Compute the sigmoid function '''
#d = zeros(shape=(X.shape))
den = 1.0 + e ** (-1.0 * X)
d = 1.0 / den
return d
def compute_cost(theta, X, y):
'''
Comput cost for logistic regression
'''
#Number of training samples
theta.shape = (1, 3)
m = y.size
h = sigmoid(X.dot(theta.T))
J = (1.0 / m) * ((-y.T.dot(log(h))) - ((1.0 - y.T).dot(log(1.0 - h))))
return - 1 * J.sum()
def compute_grad(theta, X, y):
#print theta.shape
theta.shape = (1, 3)
grad = zeros(3)
h = sigmoid(X.dot(theta.T))
delta = h - y
l = grad.size
for i in range(l):
sumdelta = delta.T.dot(X[:, i])
grad[i] = (1.0 / m) * sumdelta * - 1
theta.shape = (3,)
return grad
#load the dataset
data = loadtxt('ex2data1.txt', delimiter=',')
X = data[:, 0:2]
y = data[:, 2]
pos = where(y == 1)
neg = where(y == 0)
scatter(X[pos, 0], X[pos, 1], marker='o', c='b')
scatter(X[neg, 0], X[neg, 1], marker='x', c='r')
xlabel('Exam 1 score')
ylabel('Exam 2 score')
legend(['Not Admitted', 'Admitted'])
#show()
m, n = X.shape
y.shape = (m, 1)
#Add intercept term to x and X_test
it = ones(shape=(m, 3))
it[:, 1:3] = X
def decorated_cost(it, y):
def f(theta):
return compute_cost(theta, it, y)
def fprime(theta):
return compute_grad(theta, it, y)
#Initialize theta parameters
theta = zeros(3)
return fmin_bfgs(f, theta, fprime, disp=True, maxiter=400)
decorated_cost(it, y)
theta = [-25.161272, 0.206233, 0.201470]
#Plotting the decision boundary
plot_x = array([min(it[:, 1]) - 2, max(it[:, 2]) + 2])
plot_y = (- 1.0 / theta[2]) * (theta[1] * plot_x + theta[0])
plot(plot_x, plot_y)
legend(['Decision Boundary', 'Not admitted', 'Admitted'])
#show()
prob = sigmoid(array([1.0, 45.0, 85.0]).dot(array(theta).T))
print 'For a student with scores 45 and 85, we predict and admission ' + \
'probability of %f' % prob
def predict(theta, X):
'''Predict whether the label
is 0 or 1 using learned logistic
regression parameters '''
m, n = X.shape
p = zeros(shape=(m, 1))
h = sigmoid(X.dot(theta.T))
for it in range(0, h.shape[0]):
if h[it] > 0.5:
p[it, 0] = 1
else:
p[it, 0] = 0
return p
#Compute accuracy on our training set
p = predict(array(theta), it)
print 'Train Accuracy: %f' % ((y[where(p == y)].size / float(y.size)) * 100.0)