|
| 1 | +import torch |
| 2 | +import torch.nn as nn |
| 3 | +import numpy as np |
| 4 | + |
| 5 | +# |
| 6 | +# -> 2.0 -> 0.65 |
| 7 | +# Linear -> 1.0 -> Softmax -> 0.25 -> CrossEntropy(y, y_hat) |
| 8 | +# -> 0.1 -> 0.1 |
| 9 | +# |
| 10 | +# scores(logits) probabilities |
| 11 | +# sum = 1.0 |
| 12 | +# |
| 13 | + |
| 14 | +# Softmax applies the exponential function to each element, and notmalizes |
| 15 | +# by dividing by the sum of all these exponentials |
| 16 | +# -> squashes the output to be between 0 and 1 = probability |
| 17 | +# sum of all probabilities is 1 |
| 18 | +def softmax(x): |
| 19 | + return np.exp(x) / np.sum(np.exp(x), axis=0) |
| 20 | + |
| 21 | +x = np.array([2.0, 1.0, 0.1]) |
| 22 | +outputs = softmax(x) |
| 23 | +print('softmax numpy:', outputs) |
| 24 | + |
| 25 | +x = torch.tensor([2.0, 1.0, 0.1]) |
| 26 | +outputs = torch.softmax(x, dim=0) # along values along first axis |
| 27 | +print('softmax torch:', outputs) |
| 28 | + |
| 29 | +# Cross entropy |
| 30 | +# Cross-entropy loss, or log loss, measures the performance of a classification model |
| 31 | +# whose output is a probability value between 0 and 1. |
| 32 | +# -> loss increases as the predicted probability diverges from the actual label |
| 33 | +def cross_entropy(actual, predicted): |
| 34 | + EPS = 1e-15 |
| 35 | + predicted = np.clip(predicted, EPS, 1 - EPS) |
| 36 | + loss = -np.sum(actual * np.log(predicted)) |
| 37 | + return loss # / float(predicted.shape[0]) |
| 38 | + |
| 39 | +# y must be one hot encoded |
| 40 | +# if class 0: [1 0 0] |
| 41 | +# if class 1: [0 1 0] |
| 42 | +# if class 2: [0 0 1] |
| 43 | +Y = np.array([1, 0, 0]) |
| 44 | +Y_pred_good = np.array([0.7, 0.2, 0.1]) |
| 45 | +Y_pred_bad = np.array([0.1, 0.3, 0.6]) |
| 46 | +l1 = cross_entropy(Y, Y_pred_good) |
| 47 | +l2 = cross_entropy(Y, Y_pred_bad) |
| 48 | +print(f'Loss1 numpy: {l1:.4f}') |
| 49 | +print(f'Loss2 numpy: {l2:.4f}') |
| 50 | + |
| 51 | +# CrossEntropyLoss in PyTorch (applies Softmax) |
| 52 | +# nn.LogSoftmax + nn.NLLLoss |
| 53 | +# NLLLoss = negative log likelihood loss |
| 54 | +loss = nn.CrossEntropyLoss() |
| 55 | +# loss(input, target) |
| 56 | + |
| 57 | +# target is of size nBatch = 1 |
| 58 | +# each element has class label: 0, 1, or 2 |
| 59 | +# Y (=target) contains class labels, not one-hot |
| 60 | +Y = torch.tensor([0]) |
| 61 | + |
| 62 | +# input is of size nBatch x nClasses = 1 x 3 |
| 63 | +# y_pred (=input) must be raw, unnormalizes scores (logits) for each class, not softmax |
| 64 | +Y_pred_good = torch.tensor([[2.0, 1.0, 0.1]]) |
| 65 | +Y_pred_bad = torch.tensor([[0.5, 2.0, 0.3]]) |
| 66 | + |
| 67 | +l1 = loss(Y_pred_good, Y) |
| 68 | +l2 = loss(Y_pred_bad, Y) |
| 69 | + |
| 70 | +print(f'PyTorch Loss1: {l1.item():.4f}') |
| 71 | +print(f'PyTorch Loss2: {l2.item():.4f}') |
| 72 | + |
| 73 | +# get predictions |
| 74 | +_, predictions1 = torch.max(Y_pred_good, 1) |
| 75 | +_, predictions2 = torch.max(Y_pred_bad, 1) |
| 76 | +print(f'Actual class: {Y.item()}, Y_pred1: {predictions1.item()}, Y_pred2: {predictions2.item()}') |
| 77 | + |
| 78 | +# allows batch loss for multiple samples |
| 79 | + |
| 80 | +# target is of size nBatch = 3 |
| 81 | +# each element has class label: 0, 1, or 2 |
| 82 | +Y = torch.tensor([2, 0, 1]) |
| 83 | + |
| 84 | +# input is of size nBatch x nClasses = 3 x 3 |
| 85 | +# Y_pred are logits (not softmax) |
| 86 | +Y_pred_good = torch.tensor( |
| 87 | + [[0.1, 0.2, 3.9], # predict class 2 |
| 88 | + [1.2, 0.1, 0.3], # predict class 0 |
| 89 | + [0.3, 2.2, 0.2]]) # predict class 1 |
| 90 | + |
| 91 | +Y_pred_bad = torch.tensor( |
| 92 | + [[0.9, 0.2, 0.1], |
| 93 | + [0.1, 0.3, 1.5], |
| 94 | + [1.2, 0.2, 0.5]]) |
| 95 | + |
| 96 | +l1 = loss(Y_pred_good, Y) |
| 97 | +l2 = loss(Y_pred_bad, Y) |
| 98 | +print(f'Batch Loss1: {l1.item():.4f}') |
| 99 | +print(f'Batch Loss2: {l2.item():.4f}') |
| 100 | + |
| 101 | +# Binary classification |
| 102 | +class NeuralNet1(nn.Module): |
| 103 | + def __init__(self, input_size, hidden_size): |
| 104 | + super(NeuralNet1, self).__init__() |
| 105 | + self.linear1 = nn.Linear(input_size, hidden_size) |
| 106 | + self.relu = nn.ReLU() |
| 107 | + self.linear2 = nn.Linear(hidden_size, 1) |
| 108 | + |
| 109 | + def forward(self, x): |
| 110 | + out = self.linear1(x) |
| 111 | + out = self.relu(out) |
| 112 | + out = self.linear2(out) |
| 113 | + # sigmoid at the end |
| 114 | + y_pred = torch.sigmoid(out) |
| 115 | + return y_pred |
| 116 | + |
| 117 | +model = NeuralNet1(input_size=28*28, hidden_size=5) |
| 118 | +criterion = nn.BCELoss() |
| 119 | + |
| 120 | +# Multiclass problem |
| 121 | +class NeuralNet2(nn.Module): |
| 122 | + def __init__(self, input_size, hidden_size, num_classes): |
| 123 | + super(NeuralNet2, self).__init__() |
| 124 | + self.linear1 = nn.Linear(input_size, hidden_size) |
| 125 | + self.relu = nn.ReLU() |
| 126 | + self.linear2 = nn.Linear(hidden_size, num_classes) |
| 127 | + |
| 128 | + def forward(self, x): |
| 129 | + out = self.linear1(x) |
| 130 | + out = self.relu(out) |
| 131 | + out = self.linear2(out) |
| 132 | + # no softmax at the end |
| 133 | + return out |
| 134 | + |
| 135 | +model = NeuralNet2(input_size=28*28, hidden_size=5, num_classes=3) |
| 136 | +criterion = nn.CrossEntropyLoss() # (applies Softmax) |
| 137 | + |
| 138 | + |
0 commit comments