Skip to content

Commit 21e15c5

Browse files
author
wkd-lidashuang
committed
update
1 parent e5aac80 commit 21e15c5

File tree

3 files changed

+132
-90
lines changed

3 files changed

+132
-90
lines changed

ex1/machine-learning-ex1.py

-3
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,6 @@
55

66

77
def plot_data(data):
8-
# print(data.head())
9-
# print(data.describe())
10-
# print(data)
118
data.plot(kind='scatter', x='Population', y='Profit', figsize=(8, 5))
129
plt.xlabel('Population of City in 10,000s')
1310
plt.ylabel('Profit in $10,000s')

ex2/machine-learning-ex2.py

+105-58
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,7 @@
44
import scipy.optimize as opt
55

66

7-
def visualizing_data(data):
8-
print(data.head())
7+
def plotData(data):
98
pos = data[data.admitted.isin(['1'])]
109
neg = data[data.admitted.isin(['0'])]
1110
plt.scatter(pos['exam1'], pos['exam2'], marker='+', c='k', label='Admitted')
@@ -17,12 +16,24 @@ def visualizing_data(data):
1716
plt.show()
1817

1918

19+
def plotDecisionBoundary(theta, data):
20+
x1 = np.arange(100, step=0.1)
21+
x2 = -(theta[0] + x1 * theta[1]) / theta[2]
22+
pos = data[data.admitted.isin(['1'])]
23+
neg = data[data.admitted.isin(['0'])]
24+
plt.scatter(pos['exam1'], pos['exam2'], marker='+', c='k', label='Admitted')
25+
plt.scatter(neg['exam1'], neg['exam2'], marker='o', c='y', label='Not Admitted')
26+
plt.plot(x1, x2)
27+
plt.axis([30, 100, 30, 100])
28+
plt.xlabel('Exam 1 Score')
29+
plt.ylabel('Exam 2 Score')
30+
plt.legend(loc=1, prop={'size': 9})
31+
plt.title('Decision Boundary')
32+
33+
2034
def visualizing_data2(data):
2135
print(data.head())
22-
pos = data[data.Accepted.isin(['1'])]
23-
neg = data[data.Accepted.isin(['0'])]
24-
plt.scatter(pos['Test 1'], pos['Test 2'], marker='+', c='k', label='y=1')
25-
plt.scatter(neg['Test 1'], neg['Test 2'], marker='o', c='y', label='y=0')
36+
2637
# 设置横纵坐标名
2738
plt.xlabel('Microchip Test 1')
2839
plt.ylabel('Microchip Test 2')
@@ -49,26 +60,6 @@ def predict(theta, x):
4960
return [1 if x >= 0.5 else 0 for x in probability]
5061

5162

52-
def evaluating_logistic(final_theta, x, y, data):
53-
predictions = predict(final_theta, x)
54-
correct = [1 if a == b else 0 for (a, b) in zip(predictions, y)]
55-
accuracy = sum(correct) / len(x)
56-
print(accuracy)
57-
x1 = np.arange(100, step=0.1)
58-
x2 = -(final_theta[0] + x1 * final_theta[1]) / final_theta[2]
59-
pos = data[data.admitted.isin(['1'])]
60-
neg = data[data.admitted.isin(['0'])]
61-
plt.scatter(pos['exam1'], pos['exam2'], marker='+', c='k', label='Admitted')
62-
plt.scatter(neg['exam1'], neg['exam2'], marker='o', c='y', label='Not Admitted')
63-
plt.plot(x1, x2)
64-
plt.axis([30, 100, 30, 100])
65-
plt.xlabel('Exam 1 Score')
66-
plt.ylabel('Exam 2 Score')
67-
plt.legend(loc=1, prop={'size': 9})
68-
plt.title('Decision Boundary')
69-
plt.show()
70-
71-
7263
def feature_mapping(x1, x2, power):
7364
data = {}
7465
for i in np.arange(power + 1):
@@ -90,35 +81,91 @@ def gradientReg(theta, X, y, l=1):
9081
return gradient(theta, X, y) + reg
9182

9283

93-
def main():
94-
# path = 'ex2data1.txt'
95-
# data = pd.read_csv(path, names=['exam1', 'exam2', 'admitted'])
96-
# # visualizing_data(data)
97-
# data.insert(0, 'Ones', 1)
98-
# x = data.iloc[:, :-1].values
99-
# y = data.iloc[:, -1].values
100-
# theta = np.zeros(x.shape[1])
101-
# print(cost(theta, x, y))
102-
# print(gradient(theta, x, y))
103-
# result = opt.fmin_tnc(func=cost, x0=theta, fprime=gradient, args=(x, y))
104-
# final_theta = result[0]
105-
# evaluating_logistic(final_theta, x, y, data)
106-
data2 = pd.read_csv('ex2data2.txt', names=['Test 1', 'Test 2', 'Accepted'])
107-
# visualizing_data2(data2)
108-
x1 = data2['Test 1'].values
109-
x2 = data2['Test 2'].values
110-
_data2 = feature_mapping(x1, x2, power=6)
111-
x = _data2.values
112-
y = data2['Accepted'].values
113-
theta = np.zeros(x.shape[1])
114-
print(costReg(theta, x, y, lam=1))
115-
result2 = opt.fmin_tnc(func=costReg, x0=theta, fprime=gradientReg, args=(x, y, 2))
116-
final_theta = result2[0]
117-
predictions = predict(final_theta, x)
118-
correct = [1 if a == b else 0 for (a, b) in zip(predictions, y)]
119-
accuracy = sum(correct) / len(correct)
120-
print(accuracy)
121-
122-
123-
if __name__ == '__main__':
124-
main()
84+
# ==================== Part 1: Plotting ====================
85+
print('Plotting data with + indicating (y = 1) examples and o indicating (y = 0) examples.')
86+
path = 'ex2data1.txt'
87+
data = pd.read_csv(path, names=['exam1', 'exam2', 'admitted'])
88+
plotData(data)
89+
90+
data.insert(0, 'Ones', 1)
91+
X = data.iloc[:, :-1].values
92+
y = data.iloc[:, -1].values
93+
input("Program paused. Press Enter to continue...")
94+
95+
# # ============ Part 2: Compute Cost and Gradient ============
96+
initial_theta = np.zeros(X.shape[1])
97+
J = cost(initial_theta, X, y)
98+
print('Cost at initial theta (zeros): %f' % J)
99+
grad = gradient(initial_theta, X, y)
100+
print('Gradient at initial theta (zeros): ' + str(grad))
101+
102+
input("Program paused. Press Enter to continue...")
103+
104+
# ============= Part 3: Optimizing using scipy =============
105+
result = opt.fmin_tnc(func=cost, x0=initial_theta, fprime=gradient, args=(X, y))
106+
theta = result[0]
107+
# Plot Boundary
108+
plotDecisionBoundary(theta, data)
109+
plt.show()
110+
input("Program paused. Press Enter to continue...")
111+
112+
# ============== Part 4: Predict and Accuracies ==============
113+
prob = sigmoid(np.array([1, 45, 85]).dot(theta))
114+
print('For a student with scores 45 and 85, we predict an admission probability of %f' % prob)
115+
116+
# Compute accuracy on our training set
117+
p = predict(theta, X)
118+
correct = [1 if a == b else 0 for (a, b) in zip(p, y)]
119+
accuracy = sum(correct) / len(correct)
120+
print('Train Accuracy: %f' % accuracy)
121+
122+
input("Program paused. Press Enter to continue...")
123+
124+
# =========== Part 5: Regularized Logistic Regression ============
125+
126+
data2 = pd.read_csv('ex2data2.txt', names=['Test 1', 'Test 2', 'Accepted'])
127+
128+
x1 = data2['Test 1'].values
129+
x2 = data2['Test 2'].values
130+
_data2 = feature_mapping(x1, x2, power=6)
131+
X = _data2.values
132+
y = data2['Accepted'].values
133+
initial_theta = np.zeros(X.shape[1])
134+
135+
J = costReg(initial_theta, X, y, lam=1)
136+
print('Cost at initial theta (zeros): %f' % J)
137+
result2 = opt.fmin_tnc(func=costReg, x0=initial_theta, fprime=gradientReg, args=(X, y, 2))
138+
theta = result2[0]
139+
predictions = predict(theta, X)
140+
correct = [1 if a == b else 0 for (a, b) in zip(predictions, y)]
141+
accuracy = sum(correct) / len(correct)
142+
print(accuracy)
143+
144+
input("Program paused. Press Enter to continue...")
145+
146+
147+
# ============= Part 3: Optional Exercises =============
148+
149+
# Plot Boundary
150+
def plotBoundary(theta, data2, Lambda):
151+
plt.title(r'$\lambda$ = ' + str(Lambda))
152+
x = np.linspace(-1, 1.5, 250)
153+
xx, yy = np.meshgrid(x, x)
154+
z = feature_mapping(xx.ravel(), yy.ravel(), power=6).values
155+
z = z @ theta
156+
z = z.reshape(xx.shape)
157+
pos = data2[data2.Accepted.isin(['1'])]
158+
neg = data2[data2.Accepted.isin(['0'])]
159+
plt.scatter(pos['Test 1'], pos['Test 2'], marker='+', c='k', label='y=1')
160+
plt.scatter(neg['Test 1'], neg['Test 2'], marker='o', c='y', label='y=0')
161+
plt.contour(xx, yy, z, 0)
162+
plt.ylim(-.8, 1.2)
163+
plt.show()
164+
165+
166+
for Lambda in np.arange(0.0, 10.1, 1.0):
167+
result2 = opt.fmin_tnc(func=costReg, x0=initial_theta, fprime=gradientReg, args=(X, y, Lambda))
168+
theta = result2[0]
169+
print('lambda = ' + str(Lambda))
170+
print('theta:', ["%0.4f" % i for i in theta])
171+
plotBoundary(theta, data2, Lambda)

ex3/machine-learning-ex3.py

+27-29
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,7 @@ def load_data(path):
1212
return x, y
1313

1414

15-
def plot_an_image(X, y):
16-
pick_one = np.random.randint(0, 5000)
17-
image = X[pick_one, :]
18-
plt.matshow(image.reshape((20, 20)))
19-
plt.show()
20-
print('this should be {}'.format(y[pick_one]))
21-
22-
23-
def plot_100_image(X):
15+
def display_data(X):
2416
"""
2517
随机画100个数字
2618
"""
@@ -31,7 +23,7 @@ def plot_100_image(X):
3123

3224
for row in range(10):
3325
for column in range(10):
34-
ax_array[row, column].matshow(sample_images[10 * row + column].reshape((20, 20)))
26+
ax_array[row, column].matshow(sample_images[10 * row + column].reshape((20, 20)), cmap='gray')
3527
plt.xticks([])
3628
plt.yticks([])
3729
plt.show()
@@ -70,12 +62,12 @@ def regularized_gradient(theta, X, y, l):
7062
return first + reg
7163

7264

73-
def one_vs_all(X, y, l, K):
65+
def one_vs_all(X, y, Lambda, K):
7466
"""generalized logistic regression
7567
args:
7668
X: feature matrix, (m, n+1) # with incercept x0=1
7769
y: target vector, (m, )
78-
l: lambda constant for regularization
70+
Lambda: lambda constant for regularization
7971
K: numbel of labels
8072
return: trained parameters
8173
"""
@@ -85,10 +77,9 @@ def one_vs_all(X, y, l, K):
8577
theta = np.zeros(X.shape[1]) # (401,)
8678
y_i = np.array([1 if label == i else 0 for label in y])
8779

88-
ret = minimize(fun=regularized_cost, x0=theta, args=(X, y_i, l), method='TNC',
80+
ret = minimize(fun=regularized_cost, x0=theta, args=(X, y_i, Lambda), method='TNC',
8981
jac=regularized_gradient, options={'disp': True})
9082
all_theta[i - 1, :] = ret.x
91-
print(all_theta.shape)
9283
return all_theta
9384

9485

@@ -112,11 +103,9 @@ def load_weight(path):
112103
def neural_networks():
113104
# Neural Networks
114105
theta1, theta2 = load_weight('ex3weights.mat')
115-
print(theta1.shape, theta2.shape)
116106
X, y = load_data('ex3data1.mat')
117107
y = y.flatten()
118108
X = np.insert(X, 0, 1, axis=1) # intercept
119-
print(X.shape)
120109
a1 = X
121110
z2 = a1 @ theta1.T
122111
a2 = np.insert(sigmoid(z2), 0, 1, axis=1)
@@ -127,18 +116,27 @@ def neural_networks():
127116
print('accuracy = {0}%'.format(accuracy * 100)) # accuracy = 97.52%
128117

129118

130-
def main():
131-
x, y = load_data('ex3data1.mat')
132-
# plot_an_image(x, y)
133-
# plot_100_image(x)
134-
x = np.insert(x, 0, 1, axis=1) # (5000, 401)
135-
y = y.flatten() # 这里消除了一个维度,方便后面的计算 or .reshape(-1) (5000,)
136-
all_theta = one_vs_all(x, y, 1, 10) # 每一行是一个分类器的一组参数
137-
y_pred = predict_all(x, all_theta)
138-
accuracy = np.mean(y_pred == y)
139-
print('accuracy = {0}%'.format(accuracy * 100))
119+
## =========== Part 1: Loading and Visualizing Data =============
120+
print('Loading and Visualizing Data ...')
121+
X, y = load_data('ex3data1.mat')
122+
123+
# Randomly select 100 data points to display
124+
display_data(X)
125+
126+
input("Program paused. Press Enter to continue...")
127+
128+
## ============ Part 2: Vectorize Logistic Regression ============
129+
print('Training One-vs-All Logistic Regression...')
130+
Lambda = 1
131+
X = np.insert(X, 0, 1, axis=1) # (5000, 401)
132+
y = y.flatten() # 这里消除了一个维度,方便后面的计算 or .reshape(-1) (5000,)
133+
all_theta = one_vs_all(X, y, Lambda, 10)
134+
input("Program paused. Press Enter to continue...")
140135

136+
## ================ Part 3: Predict for One-Vs-All ================
137+
y_pred = predict_all(X, all_theta)
138+
accuracy = np.mean(y_pred == y)
139+
print('accuracy = {0}%'.format(accuracy * 100))
141140

142-
if __name__ == '__main__':
143-
# main()
144-
neural_networks()
141+
## ================Neural Networks
142+
neural_networks()

0 commit comments

Comments
 (0)