Update exercise2.tex

a9075bf0 · Caina Rose Paul · 5a84cc0d · a9075bf0
Commit a9075bf0 authored 1 year ago by Caina Rose Paul
--- a/exercises/exercise2/exercise2.tex
+++ b/exercises/exercise2/exercise2.tex
@@ -104,8 +104,56 @@ H_{\text{\small out}} = \frac{{H - k + 2p}}{s} + 1
 Solution 

 \begin{equation*}
-    \hat{y}=S(z)
-    S(z_i)=\frac{e^{z_i}}{\sum^N_{k=1}e^{z_k}}
+\begin{split}
+\hat{y} &= S(z) \\
+S(z_i) &= \frac{e^{z_i}}{\sum^N_{k=1}e^{z_k}} 
+\end{split}
+\end{equation*}
+
+For \(N = 3\), the softmax function is given by:
+\[
+S(z_1) = \frac{e^{z_1}}{e^{z_1} + e^{z_2} + e^{z_3}}
+\]
+
+Now, let's find the partial derivative with respect to \(z_j\):
+
+
+The detailed computation for \(j = 1\) is as follows:
+\[
+\begin{aligned}
+\frac{\partial S(z_1)}{\partial z_1} &= \frac{\partial}{\partial z_1}\left(\frac{e^{z_1}}{e^{z_1} + e^{z_2} + e^{z_3}}\right) \\
+&= \frac{e^{z_1} \cdot (e^{z_1} + e^{z_2} + e^{z_3}) - e^{z_1} \cdot e^{z_1}}{(e^{z_1} + e^{z_2} + e^{z_3})^2} \\
+&= \frac{e^{z_1}}{e^{z_1} + e^{z_2} + e^{z_3}} \cdot \frac{e^{z_1} + e^{z_2} + e^{z_3} - e^{z_1}}{e^{z_1} + e^{z_2} + e^{z_3}} \\
+&= S(z_1) \cdot \left(1 - S(z_1)\right)
+\end{aligned}
+\]
+\[
+\begin{aligned}
+\frac{\partial S(z_2)}{\partial z_1} &= \frac{\partial}{\partial z_1}\left(\frac{e^{z_2}}{\sum_{k=1}^N e^{z_k}}\right) \\
+&= \frac{0 - e^{z_2} \cdot e^{z_1}}{\left(\sum_{k=1}^N e^{z_k}\right)^2} \\
+&= -\frac{e^{z_1}e^{z_2}}{\left(\sum_{k=1}^N e^{z_k}\right)^2} \\
+&= -\frac{e^{z_1}}{\sum_{k=1}^N e^{z_k}} \cdot \frac{e^{z_2}}{\sum_{k=1}^N e^{z_k}} \\
+&= -S(z_1)S(z_2)
+\end{aligned}
+\]
+\[
+\begin{aligned}
+\frac{\partial S(z_3)}{\partial z_1} &= \frac{\partial}{\partial z_1}\left(\frac{e^{z_3}}{\sum_{k=1}^N e^{z_k}}\right) \\
+&= \frac{0 - e^{z_3} \cdot e^{z_1}}{\left(\sum_{k=1}^N e^{z_k}\right)^2} \\
+&= -\frac{e^{z_1}e^{z_3}}{\left(\sum_{k=1}^N e^{z_k}\right)^2} \\
+&= -\frac{e^{z_1}}{\sum_{k=1}^N e^{z_k}} \cdot \frac{e^{z_3}}{\sum_{k=1}^N e^{z_k}} \\
+&= -S(z_1)S(z_3)
+\end{aligned}
+\]
+So from this, we can see a pattern 
+\begin{equation*}
+\begin{aligned}
+\frac{\partial S(z_i)}{\partial z_j} &= \begin{cases}
+    S(z_i) \cdot (1 - S(z_i)), & \text{if } i = j  \\
+    -S(z_i)S(z_j), & \text{if } i \neq j
+\end{cases}
+\end{aligned}
+\end{equation*}

 \subsection{Given $\hat{y}=softmax(z$), a target vector $\hat{y} \in \mathbb{R}^N$ and the cross-entropy loss function defined as
 \begin{equation*}