-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathFeatureLoader.py
46 lines (37 loc) · 1.15 KB
/
FeatureLoader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import numpy as np
import json
dict= json.load(open("DataSets/dataset50transpose.json"))
def get_training_data():
infile = open("DataSets/trainingdata.txt", "r")
y = np.asmatrix(np.zeros(8)).reshape(1, 8)
x = np.asmatrix(np.zeros(len(dict))).reshape(1, len(dict))
X = x
Y = y
i = 0
for line in infile:
if i != 0:
words = line.split(" ")
classified = int(words[0])
X = np.vstack((X, x))
for word in words[1:]:
if word in dict:
X[i, int(dict[word]["position"])] = 1
Y = np.vstack((Y, y))
Y[i, classified-1] = 1
i += 1
infile.close()
X = X[1:, :]
Y = Y[1:, :]
X = np.hstack((np.ones(X.shape[0]).reshape(X.shape[0], 1), X))
return X, Y
def get_testing_data(data):
y = np.asmatrix(np.zeros(8)).reshape(1, 8)
x = np.asmatrix(np.zeros(len(dict))).reshape(1, len(dict))
X = x
Y = y
words = data.split(" ")
for word in words:
if word in dict:
X[0, int(dict[word]["position"])] = 1
X = np.hstack((np.ones(X.shape[0]).reshape(X.shape[0], 1), X))
return X