modify runner and evaluation code

supercoderhawk · supercoderhawk · commit 71d9f38dd0b3 · 2017-12-02T14:52:00.000+08:00
diff --git a/python/dnlp/utils/evaluation.py b/python/dnlp/utils/evaluation.py
@@ -1,5 +1,6 @@
 # -*- coding: UTF-8 -*-
 import pickle
+from sklearn.metrics import f1_score,precision_score,recall_score
 from dnlp.utils.constant import TAG_BEGIN, TAG_INSIDE, TAG_END, TAG_SINGLE
 
 
@@ -75,13 +76,26 @@ def evaluate_cws(model, data_path: str):
     characters = data['characters']
     labels_true = data['labels']
     c_count = 0
+
     p_count = 0
+
     r_count = 0
+
+    all_labels_true = []
+    all_labels_predict = []
     for sentence, label in zip(characters, labels_true):
-      words, labels_predict = model.predict(sentence, return_labels=True)
+      words, labels_predict = model.predict_ll(sentence, return_labels=True)
+      #print("============")
+      #print(words)
+      all_labels_predict.extend(labels_predict)
+      all_labels_true.extend(label)
       c, p, r = get_cws_statistics(label, labels_predict)
       c_count += c
       p_count += p
       r_count += r
     print(c_count / p_count)
     print(c_count / r_count)
+    average = 'macro'
+    print(precision_score(all_labels_true,all_labels_predict,average=average))
+    print(recall_score(all_labels_true,all_labels_predict,average=average))
+
diff --git a/python/scripts/cws_ner.py b/python/scripts/cws_ner.py
@@ -7,20 +7,21 @@
 
 
 def train_cws():
-  data_path = '../dnlp/data/cws/pku_training.pickle'
+  data_path = '../dnlp/data/cws/msr_training.pickle'
   config = DnnCrfConfig()
-  dnncrf = DnnCrf(config=config, data_path=data_path, nn='lstm')
-  dnncrf.fit_ll()
+  dnncrf = DnnCrf(config=config, data_path=data_path, nn='bilstm')
+  dnncrf.fit()
 
 
 def test_cws():
   sentence = '小明来自南京师范大学'
-  model_path = '../dnlp/models/cws1.ckpt'
+  sentence = '中国人民决心继承邓小平同志的遗志，继续把建设有中国特色社会主义事业推向前进。'
+  model_path = '../dnlp/models/cws4.ckpt'
   config = DnnCrfConfig()
-  dnncrf = DnnCrf(config=config, mode='predict', model_path=model_path, nn='lstm')
-  res, labels = dnncrf.predict(sentence, return_labels=True)
+  dnncrf = DnnCrf(config=config, mode='predict', model_path=model_path, nn='bilstm')
+  res, labels = dnncrf.predict_ll(sentence, return_labels=True)
   print(res)
-  evaluate_cws(dnncrf, '../dnlp/data/cws/pku_test.pickle')
+  evaluate_cws(dnncrf, '../dnlp/data/cws/msr_test.pickle')
 
 
 if __name__ == '__main__':