Add multichannel Implementation

practicingman · practicingman · commit 7b7b8284e8bd · 2018-12-29T22:20:37.000+08:00
diff --git a/README.md b/README.md
@@ -43,12 +43,19 @@ python3 main.py
         Batch[1900] - loss: 0.011894  acc: 100.0000%(128/128)
         Evaluation - loss: 0.000018  acc: 95.0000%(6679/7000)
         early stop by 1000 steps, acc: 95.0000%
-- [x] CNN-static 微调预训练的静态词向量
+- [x] CNN-static 微调预训练的词向量
     ```bash
-      python main.py -static=true -no-static=true
+      python main.py -static=true -non-static=true
     ```
     >
         Batch[1500] - loss: 0.008823  acc: 99.0000%(127/128))
         Evaluation - loss: 0.000016  acc: 96.0000%(6729/7000)
         early stop by 1000 steps, acc: 96.0000%
-- [ ] CNN-multichannel
+- [x] CNN-multichannel 微调加静态
+    ```bash
+      python main.py -static=true -non-static=true -multichannel=true
+    ```
+    >
+        Batch[1500] - loss: 0.023020  acc: 98.0000%(126/128))
+        Evaluation - loss: 0.000016  acc: 96.0000%(6744/7000)
+        early stop by 1000 steps, acc: 96.0000%
diff --git a/main.py b/main.py
@@ -30,6 +30,7 @@
 
 parser.add_argument('-static', type=bool, default=False, help='whether to use static pre-trained word vectors')
 parser.add_argument('-non-static', type=bool, default=False, help='whether to fine-tune static pre-trained word vectors')
+parser.add_argument('-multichannel', type=bool, default=False, help='whether to use 2 channel of word vectors')
 parser.add_argument('-pretrained-name', type=str, default='sgns.zhihu.word',
                     help='filename of pre-trained word vectors')
 parser.add_argument('-pretrained-path', type=str, default='pretrained', help='path of pre-trained word vectors')
@@ -72,6 +73,9 @@ def load_dataset(text_field, label_field, args, **kwargs):
 if args.static:
     args.embedding_dim = text_field.vocab.vectors.size()[-1]
     args.vectors = text_field.vocab.vectors
+if args.multichannel:
+    args.static = True
+    args.non_static = True
 args.class_num = len(label_field.vocab)
 args.cuda = args.device != -1 and torch.cuda.is_available()
 args.filter_sizes = [int(size) for size in args.filter_sizes.split(',')]
diff --git a/model.py b/model.py
@@ -18,15 +18,22 @@ def __init__(self, args):
         self.embedding = nn.Embedding(vocabulary_size, embedding_dimension)
         if args.static:
             self.embedding = self.embedding.from_pretrained(args.vectors, freeze=not args.non_static)
-
+        if args.multichannel:
+            self.embedding2 = nn.Embedding(vocabulary_size, embedding_dimension).from_pretrained(args.vectors)
+            chanel_num += 1
+        else:
+            self.embedding2 = None
         self.convs = nn.ModuleList(
             [nn.Conv2d(chanel_num, filter_num, (size, embedding_dimension)) for size in filter_sizes])
         self.dropout = nn.Dropout(args.dropout)
         self.fc = nn.Linear(len(filter_sizes) * filter_num, class_num)
 
     def forward(self, x):
-        x = self.embedding(x)
-        x = x.unsqueeze(1)
+        if self.embedding2:
+            x = torch.stack([self.embedding(x), self.embedding2(x)], dim=1)
+        else:
+            x = self.embedding(x)
+            x = x.unsqueeze(1)
         x = [F.relu(conv(x)).squeeze(3) for conv in self.convs]
         x = [F.max_pool1d(item, item.size(2)).squeeze(2) for item in x]
         x = torch.cat(x, 1)