Add comment explaining the encoding of the position information.

Marko Pranjic · Marko Pranjic · commit 4e73dc3e001e · 2018-07-18T14:35:17.000+02:00
diff --git a/model_pytorch.py b/model_pytorch.py
@@ -165,6 +165,7 @@ def __init__(self, cfg, vocab=40990, n_ctx=512):
     def forward(self, x):
         x = x.view(-1, x.size(-2), x.size(-1))
         e = self.embed(x)
+        # Add the position information to the input embeddings
         h = e.sum(dim=2)
         for block in self.h:
             h = block(h)
diff --git a/train.py b/train.py
@@ -32,6 +32,7 @@ def transform_roc(X1, X2, X3):
         xmb[i, 1, :l13, 0] = x13
         mmb[i, 0, :l12] = 1
         mmb[i, 1, :l13] = 1
+    # Position information that is added to the input embeddings in the TransformerModel
     xmb[:, :, :, 1] = np.arange(n_vocab + n_special, n_vocab + n_special + n_ctx)
     return xmb, mmb