added 11

patrickloeber · patrickloeber · commit d45e5f5884bd · 2020-01-04T12:04:26.000+01:00
diff --git a/11_softmax_and_crossentropy.py b/11_softmax_and_crossentropy.py
@@ -0,0 +1,138 @@
+import torch
+import torch.nn as nn
+import numpy as np
+
+#
+#        -> 2.0              -> 0.65  
+# Linear -> 1.0  -> Softmax  -> 0.25   -> CrossEntropy(y, y_hat)
+#        -> 0.1              -> 0.1                   
+#
+#     scores(logits)      probabilities
+#                           sum = 1.0
+#
+
+# Softmax applies the exponential function to each element, and notmalizes
+# by dividing by the sum of all these exponentials
+# -> squashes the output to be between 0 and 1 = probability
+# sum of all probabilities is 1
+def softmax(x):
+    return np.exp(x) / np.sum(np.exp(x), axis=0)
+
+x = np.array([2.0, 1.0, 0.1])
+outputs = softmax(x)
+print('softmax numpy:', outputs)
+
+x = torch.tensor([2.0, 1.0, 0.1])
+outputs = torch.softmax(x, dim=0) # along values along first axis
+print('softmax torch:', outputs)
+
+# Cross entropy
+# Cross-entropy loss, or log loss, measures the performance of a classification model 
+# whose output is a probability value between 0 and 1. 
+# -> loss increases as the predicted probability diverges from the actual label
+def cross_entropy(actual, predicted):
+    EPS = 1e-15
+    predicted = np.clip(predicted, EPS, 1 - EPS)
+    loss = -np.sum(actual * np.log(predicted))
+    return loss # / float(predicted.shape[0])
+
+# y must be one hot encoded
+# if class 0: [1 0 0]
+# if class 1: [0 1 0]
+# if class 2: [0 0 1]
+Y = np.array([1, 0, 0])
+Y_pred_good = np.array([0.7, 0.2, 0.1])
+Y_pred_bad = np.array([0.1, 0.3, 0.6])
+l1 = cross_entropy(Y, Y_pred_good)
+l2 = cross_entropy(Y, Y_pred_bad)
+print(f'Loss1 numpy: {l1:.4f}')
+print(f'Loss2 numpy: {l2:.4f}')
+
+# CrossEntropyLoss in PyTorch (applies Softmax)
+# nn.LogSoftmax + nn.NLLLoss
+# NLLLoss = negative log likelihood loss
+loss = nn.CrossEntropyLoss()
+# loss(input, target)
+
+# target is of size nBatch = 1
+# each element has class label: 0, 1, or 2
+# Y (=target) contains class labels, not one-hot
+Y = torch.tensor([0])
+
+# input is of size nBatch x nClasses = 1 x 3
+# y_pred (=input) must be raw, unnormalizes scores (logits) for each class, not softmax
+Y_pred_good = torch.tensor([[2.0, 1.0, 0.1]])
+Y_pred_bad = torch.tensor([[0.5, 2.0, 0.3]])
+
+l1 = loss(Y_pred_good, Y)
+l2 = loss(Y_pred_bad, Y)
+
+print(f'PyTorch Loss1: {l1.item():.4f}')
+print(f'PyTorch Loss2: {l2.item():.4f}')
+
+# get predictions
+_, predictions1 = torch.max(Y_pred_good, 1)
+_, predictions2 = torch.max(Y_pred_bad, 1)
+print(f'Actual class: {Y.item()}, Y_pred1: {predictions1.item()}, Y_pred2: {predictions2.item()}')
+
+# allows batch loss for multiple samples
+
+# target is of size nBatch = 3
+# each element has class label: 0, 1, or 2
+Y = torch.tensor([2, 0, 1])
+
+# input is of size nBatch x nClasses = 3 x 3
+# Y_pred are logits (not softmax)
+Y_pred_good = torch.tensor(
+    [[0.1, 0.2, 3.9], # predict class 2
+    [1.2, 0.1, 0.3], # predict class 0
+    [0.3, 2.2, 0.2]]) # predict class 1
+
+Y_pred_bad = torch.tensor(
+    [[0.9, 0.2, 0.1],
+    [0.1, 0.3, 1.5],
+    [1.2, 0.2, 0.5]])
+
+l1 = loss(Y_pred_good, Y)
+l2 = loss(Y_pred_bad, Y)
+print(f'Batch Loss1:  {l1.item():.4f}')
+print(f'Batch Loss2: {l2.item():.4f}')
+
+# Binary classification
+class NeuralNet1(nn.Module):
+    def __init__(self, input_size, hidden_size):
+        super(NeuralNet1, self).__init__()
+        self.linear1 = nn.Linear(input_size, hidden_size) 
+        self.relu = nn.ReLU()
+        self.linear2 = nn.Linear(hidden_size, 1)  
+    
+    def forward(self, x):
+        out = self.linear1(x)
+        out = self.relu(out)
+        out = self.linear2(out)
+        # sigmoid at the end
+        y_pred = torch.sigmoid(out)
+        return y_pred
+
+model = NeuralNet1(input_size=28*28, hidden_size=5)
+criterion = nn.BCELoss()
+
+# Multiclass problem
+class NeuralNet2(nn.Module):
+    def __init__(self, input_size, hidden_size, num_classes):
+        super(NeuralNet2, self).__init__()
+        self.linear1 = nn.Linear(input_size, hidden_size) 
+        self.relu = nn.ReLU()
+        self.linear2 = nn.Linear(hidden_size, num_classes)  
+    
+    def forward(self, x):
+        out = self.linear1(x)
+        out = self.relu(out)
+        out = self.linear2(out)
+        # no softmax at the end
+        return out
+
+model = NeuralNet2(input_size=28*28, hidden_size=5, num_classes=3)
+criterion = nn.CrossEntropyLoss()  # (applies Softmax)
+
+