Skip to content

Commit 1d2491f

Browse files
add msra init data
1 parent 52344e4 commit 1d2491f

File tree

1 file changed

+11
-3
lines changed

1 file changed

+11
-3
lines changed

python/scripts/init_datasets.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,17 @@
33
from shutil import copyfile
44
from dnlp.data_process.process_cws import ProcessCWS
55

6+
def init():
7+
model_path = '../dnlp/models/'
8+
if not os.path.exists(model_path):
9+
os.makedirs(model_path)
610

711
def copy():
812
src_folder = '../../datasets/'
913
dst_base_folder = '../dnlp/data/cws/'
1014
if not os.path.exists(dst_base_folder):
1115
os.makedirs(dst_base_folder)
12-
files = ['pku_training.utf8','pku_test.utf8']
16+
files = ['pku_training.utf8','pku_test.utf8','msr_training.utf8','msr_test.utf8']
1317
for f in files:
1418
copyfile(src_folder + f, dst_base_folder + f)
1519

@@ -19,9 +23,13 @@ def build_cws_datasets():
1923
if not os.path.exists(base_folder):
2024
os.makedirs(base_folder)
2125
ProcessCWS(files=('pku_training.utf8',), base_folder=base_folder, name='pku_training')
22-
dict_path = base_folder + 'pku_training_dict.utf8'
23-
ProcessCWS(files=('pku_test.utf8',), dict_path=dict_path,base_folder=base_folder, name='pku_test',mode='test')
26+
ProcessCWS(files=('msr_training.utf8',), base_folder=base_folder, name='msr_training')
27+
pku_dict_path = base_folder + 'pku_training_dict.utf8'
28+
ProcessCWS(files=('pku_test.utf8',), dict_path=pku_dict_path,base_folder=base_folder, name='pku_test',mode='test')
29+
msr_dict_path = base_folder + 'msr_training_dict.utf8'
30+
ProcessCWS(files=('msr_test.utf8',), dict_path=msr_dict_path, base_folder=base_folder, name='msr_test', mode='test')
2431

2532
if __name__ == '__main__':
33+
init()
2634
copy()
2735
build_cws_datasets()

0 commit comments

Comments
 (0)