Spaces:
Sleeping
Sleeping
Jensen Holm
commited on
Commit
·
d7ea050
1
Parent(s):
ff1254a
loss functions now just have staticmethods
Browse files- numpyneuron/__init__.py +6 -0
- numpyneuron/activation.py +2 -2
- numpyneuron/loss.py +17 -20
- test/{test_activation.py → test_activation_fns.py} +0 -0
- test/test_loss_fns.py +0 -0
numpyneuron/__init__.py
CHANGED
@@ -8,3 +8,9 @@ ACTIVATIONS: dict[str, Activation] = {
|
|
8 |
"TanH": TanH(),
|
9 |
"SoftMax": SoftMax(),
|
10 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
"TanH": TanH(),
|
9 |
"SoftMax": SoftMax(),
|
10 |
}
|
11 |
+
|
12 |
+
LOSSES: dict[str, Loss] = {
|
13 |
+
"MSE": MSE(),
|
14 |
+
"CrossEntropy": CrossEntropy(),
|
15 |
+
"CrossEntropyWithLogitsLoss": CrossEntropyWithLogits(),
|
16 |
+
}
|
numpyneuron/activation.py
CHANGED
@@ -4,11 +4,11 @@ from abc import abstractmethod, ABC
|
|
4 |
|
5 |
class Activation(ABC):
|
6 |
@abstractmethod
|
7 |
-
def forward(
|
8 |
pass
|
9 |
|
10 |
@abstractmethod
|
11 |
-
def backward(
|
12 |
pass
|
13 |
|
14 |
|
|
|
4 |
|
5 |
class Activation(ABC):
|
6 |
@abstractmethod
|
7 |
+
def forward(X: np.ndarray) -> np.ndarray:
|
8 |
pass
|
9 |
|
10 |
@abstractmethod
|
11 |
+
def backward(X: np.ndarray) -> np.ndarray:
|
12 |
pass
|
13 |
|
14 |
|
numpyneuron/loss.py
CHANGED
@@ -4,12 +4,14 @@ import numpy as np
|
|
4 |
|
5 |
|
6 |
class Loss(ABC):
|
|
|
7 |
@abstractmethod
|
8 |
-
def forward(
|
9 |
pass
|
10 |
|
|
|
11 |
@abstractmethod
|
12 |
-
def backward(
|
13 |
pass
|
14 |
|
15 |
|
@@ -18,19 +20,22 @@ class LogitsLoss(Loss):
|
|
18 |
|
19 |
|
20 |
class MSE(Loss):
|
21 |
-
|
|
|
22 |
return np.sum(np.square(y_hat - y_true)) / y_true.shape[0]
|
23 |
|
24 |
-
|
|
|
25 |
return (y_hat - y_true) * (2 / y_true.shape[0])
|
26 |
|
27 |
|
28 |
class CrossEntropy(Loss):
|
29 |
-
|
|
|
30 |
y_hat = np.asarray(y_hat)
|
31 |
y_true = np.asarray(y_true)
|
32 |
m = y_true.shape[0]
|
33 |
-
p =
|
34 |
eps = 1e-15 # to prevent log(0)
|
35 |
log_likelihood = -np.log(
|
36 |
np.clip(p[range(m), y_true.argmax(axis=1)], a_min=eps, a_max=None)
|
@@ -38,19 +43,17 @@ class CrossEntropy(Loss):
|
|
38 |
loss = np.sum(log_likelihood) / m
|
39 |
return loss
|
40 |
|
41 |
-
|
|
|
42 |
y_hat = np.asarray(y_hat)
|
43 |
y_true = np.asarray(y_true)
|
44 |
grad = y_hat - y_true
|
45 |
return grad / y_true.shape[0]
|
46 |
|
47 |
-
@staticmethod
|
48 |
-
def _softmax(X: np.ndarray) -> np.ndarray:
|
49 |
-
return SoftMax().forward(X)
|
50 |
-
|
51 |
|
52 |
class CrossEntropyWithLogits(LogitsLoss):
|
53 |
-
|
|
|
54 |
# Apply the log-sum-exp trick for numerical stability
|
55 |
max_logits = np.max(y_hat, axis=1, keepdims=True)
|
56 |
log_sum_exp = np.log(np.sum(np.exp(y_hat - max_logits), axis=1, keepdims=True))
|
@@ -59,17 +62,11 @@ class CrossEntropyWithLogits(LogitsLoss):
|
|
59 |
loss = -np.sum(log_probs * y_true) / y_true.shape[0]
|
60 |
return loss
|
61 |
|
62 |
-
|
|
|
63 |
# Compute softmax probabilities
|
64 |
exps = np.exp(y_hat - np.max(y_hat, axis=1, keepdims=True))
|
65 |
probs = exps / np.sum(exps, axis=1, keepdims=True)
|
66 |
# Subtract the one-hot encoded labels from the probabilities
|
67 |
grad = (probs - y_true) / y_true.shape[0]
|
68 |
return grad
|
69 |
-
|
70 |
-
|
71 |
-
LOSSES: dict[str, Loss] = {
|
72 |
-
"MSE": MSE(),
|
73 |
-
"CrossEntropy": CrossEntropy(),
|
74 |
-
"CrossEntropyWithLogitsLoss": CrossEntropyWithLogits(),
|
75 |
-
}
|
|
|
4 |
|
5 |
|
6 |
class Loss(ABC):
|
7 |
+
@staticmethod
|
8 |
@abstractmethod
|
9 |
+
def forward(y_hat: np.ndarray, y_true: np.ndarray) -> np.ndarray:
|
10 |
pass
|
11 |
|
12 |
+
@staticmethod
|
13 |
@abstractmethod
|
14 |
+
def backward(y_hat: np.ndarray, y_true: np.ndarray) -> np.ndarray:
|
15 |
pass
|
16 |
|
17 |
|
|
|
20 |
|
21 |
|
22 |
class MSE(Loss):
|
23 |
+
@staticmethod
|
24 |
+
def forward(y_hat: np.ndarray, y_true: np.ndarray) -> np.ndarray:
|
25 |
return np.sum(np.square(y_hat - y_true)) / y_true.shape[0]
|
26 |
|
27 |
+
@staticmethod
|
28 |
+
def backward(y_hat: np.ndarray, y_true: np.ndarray) -> np.ndarray:
|
29 |
return (y_hat - y_true) * (2 / y_true.shape[0])
|
30 |
|
31 |
|
32 |
class CrossEntropy(Loss):
|
33 |
+
@staticmethod
|
34 |
+
def forward(y_hat: np.ndarray, y_true: np.ndarray) -> np.ndarray:
|
35 |
y_hat = np.asarray(y_hat)
|
36 |
y_true = np.asarray(y_true)
|
37 |
m = y_true.shape[0]
|
38 |
+
p = SoftMax().forward(y_hat)
|
39 |
eps = 1e-15 # to prevent log(0)
|
40 |
log_likelihood = -np.log(
|
41 |
np.clip(p[range(m), y_true.argmax(axis=1)], a_min=eps, a_max=None)
|
|
|
43 |
loss = np.sum(log_likelihood) / m
|
44 |
return loss
|
45 |
|
46 |
+
@staticmethod
|
47 |
+
def backward(y_hat: np.ndarray, y_true: np.ndarray) -> np.ndarray:
|
48 |
y_hat = np.asarray(y_hat)
|
49 |
y_true = np.asarray(y_true)
|
50 |
grad = y_hat - y_true
|
51 |
return grad / y_true.shape[0]
|
52 |
|
|
|
|
|
|
|
|
|
53 |
|
54 |
class CrossEntropyWithLogits(LogitsLoss):
|
55 |
+
@staticmethod
|
56 |
+
def forward(y_hat: np.ndarray, y_true: np.ndarray) -> np.ndarray:
|
57 |
# Apply the log-sum-exp trick for numerical stability
|
58 |
max_logits = np.max(y_hat, axis=1, keepdims=True)
|
59 |
log_sum_exp = np.log(np.sum(np.exp(y_hat - max_logits), axis=1, keepdims=True))
|
|
|
62 |
loss = -np.sum(log_probs * y_true) / y_true.shape[0]
|
63 |
return loss
|
64 |
|
65 |
+
@staticmethod
|
66 |
+
def backward(y_hat: np.ndarray, y_true: np.ndarray) -> np.ndarray:
|
67 |
# Compute softmax probabilities
|
68 |
exps = np.exp(y_hat - np.max(y_hat, axis=1, keepdims=True))
|
69 |
probs = exps / np.sum(exps, axis=1, keepdims=True)
|
70 |
# Subtract the one-hot encoded labels from the probabilities
|
71 |
grad = (probs - y_true) / y_true.shape[0]
|
72 |
return grad
|
|
|
|
|
|
|
|
|
|
|
|
|
|
test/{test_activation.py → test_activation_fns.py}
RENAMED
File without changes
|
test/test_loss_fns.py
ADDED
File without changes
|