diff --git a/gradipy/tensor.py b/gradipy/tensor.py index 3e091a5..0c2f5e8 100644 --- a/gradipy/tensor.py +++ b/gradipy/tensor.py @@ -97,6 +97,8 @@ def _backward() -> None: def matmul(self, other: "Tensor") -> "Tensor": return self @ other + # there is more stable way to compute softmax :D + # https://ogunlao.github.io/2020/04/26/you_dont_really_know_softmax.html def softmax(self) -> "Tensor": exps = np.exp(self.data - np.max(self.data, axis=1, keepdims=True)) probs = exps / np.sum(exps, axis=1, keepdims=True)