Skip to content

Commit 353c780

Browse files
committed
add svm
1 parent 70a2dfd commit 353c780

File tree

3 files changed

+154
-0
lines changed

3 files changed

+154
-0
lines changed

README.md

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -482,4 +482,21 @@ print '快慢指针方式,单链表中间节点为:%s,索引为:%s,只遍历
482482
```
483483

484484

485+
### 支持向量机 svm.py
486+
```
487+
迟早会忘记的svm
488+
分类算法,寻找一个最优超平面
489+
demo为线性可分离数据
490+
491+
参考1:https://zh.wikipedia.org/zh-hans/支持向量机
492+
参考2:http://blog.csdn.net/viewcode/article/details/12840405
493+
参考3:http://blog.csdn.net/lisi1129/article/details/70209945?locationNum=8&fps=1
494+
495+
依赖:
496+
pip install numpy
497+
pip install matplotlib
498+
499+
执行:python svm.py
500+
```
501+
![](https://github.com/LockGit/Py/blob/master/img/svm.png)
485502

img/svm.png

108 KB
Loading

svm.py

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
# -*- coding: utf-8 -*-
2+
# @Author: lock
3+
# @Date: 2017-12-21 09:58:01
4+
# @Last Modified by: lock
5+
# @Last Modified time: 2017-12-21 17:41:07
6+
# 分类算法之SVM,比KNN算法更加复杂
7+
# demo最简单的线性可分离数据
8+
# 参考:http://blog.csdn.net/lisi1129/article/details/70209945?locationNum=8&fps=1
9+
10+
import numpy as np
11+
from matplotlib import pyplot
12+
import math
13+
import sys
14+
15+
class SVM(object):
16+
def __init__(self, visual=True):
17+
self.visual = visual
18+
self.colors = {1:'r', -1:'b'}
19+
if self.visual:
20+
self.fig = pyplot.figure()
21+
self.ax = self.fig.add_subplot(1,1,1)
22+
23+
def train(self, data):
24+
self.data = data
25+
opt_dict = {}
26+
27+
transforms = [[1,1],
28+
[-1,1],
29+
[-1,-1],
30+
[1,-1]]
31+
32+
# 找到数据集中最大值和最小值
33+
self.max_feature_value = float('-inf') # 正无穷
34+
self.min_feature_value = float('inf') # 负无穷
35+
for y in self.data:
36+
for features in self.data[y]:
37+
for feature in features:
38+
if feature > self.max_feature_value:
39+
self.max_feature_value = feature
40+
if feature < self.min_feature_value:
41+
self.min_feature_value = feature
42+
print(self.max_feature_value, self.min_feature_value)
43+
44+
# 和梯度下降一样,定义每一步的大小;开始快,然后慢,越慢越耗时
45+
step_sizes = [self.max_feature_value * 0.1, self.max_feature_value * 0.01, self.max_feature_value * 0.001]
46+
47+
b_range_multiple = 5
48+
b_multiple = 5
49+
lastest_optimum = self.max_feature_value * 10
50+
51+
for step in step_sizes:
52+
w = np.array([lastest_optimum,lastest_optimum])
53+
optimized = False
54+
while not optimized:
55+
for b in np.arange(self.max_feature_value*b_range_multiple*-1, self.max_feature_value*b_range_multiple, step*b_multiple):
56+
for transformation in transforms:
57+
w_t = w * transformation
58+
found_option = True
59+
for i in self.data:
60+
for x in self.data[i]:
61+
y = i
62+
if not y*(np.dot(w_t, x)+b) >= 1:
63+
found_option = False
64+
#print(x,':',y*(np.dot(w_t, x)+b)) 逐渐收敛
65+
66+
if found_option:
67+
opt_dict[np.linalg.norm(w_t)] = [w_t,b]
68+
69+
if w[0] < 0:
70+
optimized = True
71+
else:
72+
w = w - step
73+
74+
norms = sorted([n for n in opt_dict])
75+
opt_choice = opt_dict[norms[0]]
76+
self.w = opt_choice[0]
77+
self.b = opt_choice[1]
78+
print(self.w, self.b)
79+
lastest_optimum = opt_choice[0][0] + step*2
80+
81+
82+
def predict(self, features):
83+
classification = np.sign( np.dot(features, self.w) + self.b )
84+
85+
if classification != 0 and self.visual:
86+
self.ax.scatter(features[0], features[1], s=300, marker='*', c=self.colors[classification])
87+
88+
return classification
89+
90+
91+
# 显示picture
92+
def visualize(self):
93+
for i in self.data:
94+
for x in self.data[i]:
95+
self.ax.scatter(x[0], x[1], s=50, c=self.colors[i])
96+
97+
# 超平面
98+
def hyperplane(x,w,b,v):
99+
return (-w[0]*x-b+v) / w[1]
100+
101+
data_range = (self.min_feature_value*0.9, self.max_feature_value*1.1)
102+
103+
hyp_x_min = data_range[0]
104+
hyp_x_man = data_range[1]
105+
106+
psv1 = hyperplane(hyp_x_min, self.w, self.b, 1)
107+
psv2 = hyperplane(hyp_x_man, self.w, self.b, 1)
108+
self.ax.plot([hyp_x_min, hyp_x_man], [psv1, psv2], c=self.colors[1])
109+
110+
nsv1 = hyperplane(hyp_x_min, self.w, self.b, -1)
111+
nsv2 = hyperplane(hyp_x_man, self.w, self.b, -1)
112+
self.ax.plot([hyp_x_min, hyp_x_man], [nsv1, nsv2], c=self.colors[-1])
113+
114+
db1 = hyperplane(hyp_x_min, self.w, self.b, 0)
115+
db2 = hyperplane(hyp_x_man, self.w, self.b, 0)
116+
self.ax.plot([hyp_x_min, hyp_x_man], [db1, db2], 'y--')
117+
118+
pyplot.show()
119+
120+
if __name__ == '__main__':
121+
data_set = {-1:np.array([[1,7],
122+
[2,8],
123+
[3,8]]),
124+
1:np.array([[5,1],
125+
[6,-1],
126+
[7,3]])}
127+
print(data_set)
128+
129+
svm = SVM()
130+
svm.train(data_set)
131+
132+
# 预测
133+
for predict_feature in [[0,10],[2,6],[1,3], [4,3], [5.5,7.5], [8,3]]:
134+
print(svm.predict(predict_feature))
135+
136+
svm.visualize()
137+

0 commit comments

Comments
 (0)