Skip to content
Snippets Groups Projects
Commit a9075bf0 authored by Caina Rose Paul's avatar Caina Rose Paul
Browse files

Update exercise2.tex

parent 5a84cc0d
No related branches found
No related tags found
No related merge requests found
......@@ -104,8 +104,56 @@ H_{\text{\small out}} = \frac{{H - k + 2p}}{s} + 1
Solution
\begin{equation*}
\hat{y}=S(z)
S(z_i)=\frac{e^{z_i}}{\sum^N_{k=1}e^{z_k}}
\begin{split}
\hat{y} &= S(z) \\
S(z_i) &= \frac{e^{z_i}}{\sum^N_{k=1}e^{z_k}}
\end{split}
\end{equation*}
For \(N = 3\), the softmax function is given by:
\[
S(z_1) = \frac{e^{z_1}}{e^{z_1} + e^{z_2} + e^{z_3}}
\]
Now, let's find the partial derivative with respect to \(z_j\):
The detailed computation for \(j = 1\) is as follows:
\[
\begin{aligned}
\frac{\partial S(z_1)}{\partial z_1} &= \frac{\partial}{\partial z_1}\left(\frac{e^{z_1}}{e^{z_1} + e^{z_2} + e^{z_3}}\right) \\
&= \frac{e^{z_1} \cdot (e^{z_1} + e^{z_2} + e^{z_3}) - e^{z_1} \cdot e^{z_1}}{(e^{z_1} + e^{z_2} + e^{z_3})^2} \\
&= \frac{e^{z_1}}{e^{z_1} + e^{z_2} + e^{z_3}} \cdot \frac{e^{z_1} + e^{z_2} + e^{z_3} - e^{z_1}}{e^{z_1} + e^{z_2} + e^{z_3}} \\
&= S(z_1) \cdot \left(1 - S(z_1)\right)
\end{aligned}
\]
\[
\begin{aligned}
\frac{\partial S(z_2)}{\partial z_1} &= \frac{\partial}{\partial z_1}\left(\frac{e^{z_2}}{\sum_{k=1}^N e^{z_k}}\right) \\
&= \frac{0 - e^{z_2} \cdot e^{z_1}}{\left(\sum_{k=1}^N e^{z_k}\right)^2} \\
&= -\frac{e^{z_1}e^{z_2}}{\left(\sum_{k=1}^N e^{z_k}\right)^2} \\
&= -\frac{e^{z_1}}{\sum_{k=1}^N e^{z_k}} \cdot \frac{e^{z_2}}{\sum_{k=1}^N e^{z_k}} \\
&= -S(z_1)S(z_2)
\end{aligned}
\]
\[
\begin{aligned}
\frac{\partial S(z_3)}{\partial z_1} &= \frac{\partial}{\partial z_1}\left(\frac{e^{z_3}}{\sum_{k=1}^N e^{z_k}}\right) \\
&= \frac{0 - e^{z_3} \cdot e^{z_1}}{\left(\sum_{k=1}^N e^{z_k}\right)^2} \\
&= -\frac{e^{z_1}e^{z_3}}{\left(\sum_{k=1}^N e^{z_k}\right)^2} \\
&= -\frac{e^{z_1}}{\sum_{k=1}^N e^{z_k}} \cdot \frac{e^{z_3}}{\sum_{k=1}^N e^{z_k}} \\
&= -S(z_1)S(z_3)
\end{aligned}
\]
So from this, we can see a pattern
\begin{equation*}
\begin{aligned}
\frac{\partial S(z_i)}{\partial z_j} &= \begin{cases}
S(z_i) \cdot (1 - S(z_i)), & \text{if } i = j \\
-S(z_i)S(z_j), & \text{if } i \neq j
\end{cases}
\end{aligned}
\end{equation*}
\subsection{Given $\hat{y}=softmax(z$), a target vector $\hat{y} \in \mathbb{R}^N$ and the cross-entropy loss function defined as
\begin{equation*}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment