3
3
from shutil import copyfile
4
4
from dnlp .data_process .process_cws import ProcessCWS
5
5
6
+ def init ():
7
+ model_path = '../dnlp/models/'
8
+ if not os .path .exists (model_path ):
9
+ os .makedirs (model_path )
6
10
7
11
def copy ():
8
12
src_folder = '../../datasets/'
9
13
dst_base_folder = '../dnlp/data/cws/'
10
14
if not os .path .exists (dst_base_folder ):
11
15
os .makedirs (dst_base_folder )
12
- files = ['pku_training.utf8' ,'pku_test.utf8' ]
16
+ files = ['pku_training.utf8' ,'pku_test.utf8' , 'msr_training.utf8' , 'msr_test.utf8' ]
13
17
for f in files :
14
18
copyfile (src_folder + f , dst_base_folder + f )
15
19
@@ -19,9 +23,13 @@ def build_cws_datasets():
19
23
if not os .path .exists (base_folder ):
20
24
os .makedirs (base_folder )
21
25
ProcessCWS (files = ('pku_training.utf8' ,), base_folder = base_folder , name = 'pku_training' )
22
- dict_path = base_folder + 'pku_training_dict.utf8'
23
- ProcessCWS (files = ('pku_test.utf8' ,), dict_path = dict_path ,base_folder = base_folder , name = 'pku_test' ,mode = 'test' )
26
+ ProcessCWS (files = ('msr_training.utf8' ,), base_folder = base_folder , name = 'msr_training' )
27
+ pku_dict_path = base_folder + 'pku_training_dict.utf8'
28
+ ProcessCWS (files = ('pku_test.utf8' ,), dict_path = pku_dict_path ,base_folder = base_folder , name = 'pku_test' ,mode = 'test' )
29
+ msr_dict_path = base_folder + 'msr_training_dict.utf8'
30
+ ProcessCWS (files = ('msr_test.utf8' ,), dict_path = msr_dict_path , base_folder = base_folder , name = 'msr_test' , mode = 'test' )
24
31
25
32
if __name__ == '__main__' :
33
+ init ()
26
34
copy ()
27
35
build_cws_datasets ()
0 commit comments