4
4
import scipy .optimize as opt
5
5
6
6
7
- def visualizing_data (data ):
8
- print (data .head ())
7
+ def plotData (data ):
9
8
pos = data [data .admitted .isin (['1' ])]
10
9
neg = data [data .admitted .isin (['0' ])]
11
10
plt .scatter (pos ['exam1' ], pos ['exam2' ], marker = '+' , c = 'k' , label = 'Admitted' )
@@ -17,12 +16,24 @@ def visualizing_data(data):
17
16
plt .show ()
18
17
19
18
19
+ def plotDecisionBoundary (theta , data ):
20
+ x1 = np .arange (100 , step = 0.1 )
21
+ x2 = - (theta [0 ] + x1 * theta [1 ]) / theta [2 ]
22
+ pos = data [data .admitted .isin (['1' ])]
23
+ neg = data [data .admitted .isin (['0' ])]
24
+ plt .scatter (pos ['exam1' ], pos ['exam2' ], marker = '+' , c = 'k' , label = 'Admitted' )
25
+ plt .scatter (neg ['exam1' ], neg ['exam2' ], marker = 'o' , c = 'y' , label = 'Not Admitted' )
26
+ plt .plot (x1 , x2 )
27
+ plt .axis ([30 , 100 , 30 , 100 ])
28
+ plt .xlabel ('Exam 1 Score' )
29
+ plt .ylabel ('Exam 2 Score' )
30
+ plt .legend (loc = 1 , prop = {'size' : 9 })
31
+ plt .title ('Decision Boundary' )
32
+
33
+
20
34
def visualizing_data2 (data ):
21
35
print (data .head ())
22
- pos = data [data .Accepted .isin (['1' ])]
23
- neg = data [data .Accepted .isin (['0' ])]
24
- plt .scatter (pos ['Test 1' ], pos ['Test 2' ], marker = '+' , c = 'k' , label = 'y=1' )
25
- plt .scatter (neg ['Test 1' ], neg ['Test 2' ], marker = 'o' , c = 'y' , label = 'y=0' )
36
+
26
37
# 设置横纵坐标名
27
38
plt .xlabel ('Microchip Test 1' )
28
39
plt .ylabel ('Microchip Test 2' )
@@ -49,26 +60,6 @@ def predict(theta, x):
49
60
return [1 if x >= 0.5 else 0 for x in probability ]
50
61
51
62
52
- def evaluating_logistic (final_theta , x , y , data ):
53
- predictions = predict (final_theta , x )
54
- correct = [1 if a == b else 0 for (a , b ) in zip (predictions , y )]
55
- accuracy = sum (correct ) / len (x )
56
- print (accuracy )
57
- x1 = np .arange (100 , step = 0.1 )
58
- x2 = - (final_theta [0 ] + x1 * final_theta [1 ]) / final_theta [2 ]
59
- pos = data [data .admitted .isin (['1' ])]
60
- neg = data [data .admitted .isin (['0' ])]
61
- plt .scatter (pos ['exam1' ], pos ['exam2' ], marker = '+' , c = 'k' , label = 'Admitted' )
62
- plt .scatter (neg ['exam1' ], neg ['exam2' ], marker = 'o' , c = 'y' , label = 'Not Admitted' )
63
- plt .plot (x1 , x2 )
64
- plt .axis ([30 , 100 , 30 , 100 ])
65
- plt .xlabel ('Exam 1 Score' )
66
- plt .ylabel ('Exam 2 Score' )
67
- plt .legend (loc = 1 , prop = {'size' : 9 })
68
- plt .title ('Decision Boundary' )
69
- plt .show ()
70
-
71
-
72
63
def feature_mapping (x1 , x2 , power ):
73
64
data = {}
74
65
for i in np .arange (power + 1 ):
@@ -90,35 +81,91 @@ def gradientReg(theta, X, y, l=1):
90
81
return gradient (theta , X , y ) + reg
91
82
92
83
93
- def main ():
94
- # path = 'ex2data1.txt'
95
- # data = pd.read_csv(path, names=['exam1', 'exam2', 'admitted'])
96
- # # visualizing_data(data)
97
- # data.insert(0, 'Ones', 1)
98
- # x = data.iloc[:, :-1].values
99
- # y = data.iloc[:, -1].values
100
- # theta = np.zeros(x.shape[1])
101
- # print(cost(theta, x, y))
102
- # print(gradient(theta, x, y))
103
- # result = opt.fmin_tnc(func=cost, x0=theta, fprime=gradient, args=(x, y))
104
- # final_theta = result[0]
105
- # evaluating_logistic(final_theta, x, y, data)
106
- data2 = pd .read_csv ('ex2data2.txt' , names = ['Test 1' , 'Test 2' , 'Accepted' ])
107
- # visualizing_data2(data2)
108
- x1 = data2 ['Test 1' ].values
109
- x2 = data2 ['Test 2' ].values
110
- _data2 = feature_mapping (x1 , x2 , power = 6 )
111
- x = _data2 .values
112
- y = data2 ['Accepted' ].values
113
- theta = np .zeros (x .shape [1 ])
114
- print (costReg (theta , x , y , lam = 1 ))
115
- result2 = opt .fmin_tnc (func = costReg , x0 = theta , fprime = gradientReg , args = (x , y , 2 ))
116
- final_theta = result2 [0 ]
117
- predictions = predict (final_theta , x )
118
- correct = [1 if a == b else 0 for (a , b ) in zip (predictions , y )]
119
- accuracy = sum (correct ) / len (correct )
120
- print (accuracy )
121
-
122
-
123
- if __name__ == '__main__' :
124
- main ()
84
+ # ==================== Part 1: Plotting ====================
85
+ print ('Plotting data with + indicating (y = 1) examples and o indicating (y = 0) examples.' )
86
+ path = 'ex2data1.txt'
87
+ data = pd .read_csv (path , names = ['exam1' , 'exam2' , 'admitted' ])
88
+ plotData (data )
89
+
90
+ data .insert (0 , 'Ones' , 1 )
91
+ X = data .iloc [:, :- 1 ].values
92
+ y = data .iloc [:, - 1 ].values
93
+ input ("Program paused. Press Enter to continue..." )
94
+
95
+ # # ============ Part 2: Compute Cost and Gradient ============
96
+ initial_theta = np .zeros (X .shape [1 ])
97
+ J = cost (initial_theta , X , y )
98
+ print ('Cost at initial theta (zeros): %f' % J )
99
+ grad = gradient (initial_theta , X , y )
100
+ print ('Gradient at initial theta (zeros): ' + str (grad ))
101
+
102
+ input ("Program paused. Press Enter to continue..." )
103
+
104
+ # ============= Part 3: Optimizing using scipy =============
105
+ result = opt .fmin_tnc (func = cost , x0 = initial_theta , fprime = gradient , args = (X , y ))
106
+ theta = result [0 ]
107
+ # Plot Boundary
108
+ plotDecisionBoundary (theta , data )
109
+ plt .show ()
110
+ input ("Program paused. Press Enter to continue..." )
111
+
112
+ # ============== Part 4: Predict and Accuracies ==============
113
+ prob = sigmoid (np .array ([1 , 45 , 85 ]).dot (theta ))
114
+ print ('For a student with scores 45 and 85, we predict an admission probability of %f' % prob )
115
+
116
+ # Compute accuracy on our training set
117
+ p = predict (theta , X )
118
+ correct = [1 if a == b else 0 for (a , b ) in zip (p , y )]
119
+ accuracy = sum (correct ) / len (correct )
120
+ print ('Train Accuracy: %f' % accuracy )
121
+
122
+ input ("Program paused. Press Enter to continue..." )
123
+
124
+ # =========== Part 5: Regularized Logistic Regression ============
125
+
126
+ data2 = pd .read_csv ('ex2data2.txt' , names = ['Test 1' , 'Test 2' , 'Accepted' ])
127
+
128
+ x1 = data2 ['Test 1' ].values
129
+ x2 = data2 ['Test 2' ].values
130
+ _data2 = feature_mapping (x1 , x2 , power = 6 )
131
+ X = _data2 .values
132
+ y = data2 ['Accepted' ].values
133
+ initial_theta = np .zeros (X .shape [1 ])
134
+
135
+ J = costReg (initial_theta , X , y , lam = 1 )
136
+ print ('Cost at initial theta (zeros): %f' % J )
137
+ result2 = opt .fmin_tnc (func = costReg , x0 = initial_theta , fprime = gradientReg , args = (X , y , 2 ))
138
+ theta = result2 [0 ]
139
+ predictions = predict (theta , X )
140
+ correct = [1 if a == b else 0 for (a , b ) in zip (predictions , y )]
141
+ accuracy = sum (correct ) / len (correct )
142
+ print (accuracy )
143
+
144
+ input ("Program paused. Press Enter to continue..." )
145
+
146
+
147
+ # ============= Part 3: Optional Exercises =============
148
+
149
+ # Plot Boundary
150
+ def plotBoundary (theta , data2 , Lambda ):
151
+ plt .title (r'$\lambda$ = ' + str (Lambda ))
152
+ x = np .linspace (- 1 , 1.5 , 250 )
153
+ xx , yy = np .meshgrid (x , x )
154
+ z = feature_mapping (xx .ravel (), yy .ravel (), power = 6 ).values
155
+ z = z @ theta
156
+ z = z .reshape (xx .shape )
157
+ pos = data2 [data2 .Accepted .isin (['1' ])]
158
+ neg = data2 [data2 .Accepted .isin (['0' ])]
159
+ plt .scatter (pos ['Test 1' ], pos ['Test 2' ], marker = '+' , c = 'k' , label = 'y=1' )
160
+ plt .scatter (neg ['Test 1' ], neg ['Test 2' ], marker = 'o' , c = 'y' , label = 'y=0' )
161
+ plt .contour (xx , yy , z , 0 )
162
+ plt .ylim (- .8 , 1.2 )
163
+ plt .show ()
164
+
165
+
166
+ for Lambda in np .arange (0.0 , 10.1 , 1.0 ):
167
+ result2 = opt .fmin_tnc (func = costReg , x0 = initial_theta , fprime = gradientReg , args = (X , y , Lambda ))
168
+ theta = result2 [0 ]
169
+ print ('lambda = ' + str (Lambda ))
170
+ print ('theta:' , ["%0.4f" % i for i in theta ])
171
+ plotBoundary (theta , data2 , Lambda )
0 commit comments