실습: 실습 소개
Equations
\[\begin{gathered}
\mathcal{D}=\{(x_i,y_i)\}_{i=1}^N \\
\\
\hat{\phi}=\underset{\phi\in\Phi}{\text{argmax}}\sum_{i=1}^N{
\log{P(x_i;\phi)}
} \\
\hat{\psi}=\underset{\psi\in\Psi}{\text{argmax}}\sum_{i=1}^N{
\log{P(y_i;\psi)}
} \\
\end{gathered}\]
\[\begin{gathered}
\theta_{x\rightarrow{y}}\leftarrow\theta_{x\rightarrow{y}}-\eta\nabla_{\theta_{x\rightarrow{y}}}\mathcal{L}(\theta_{x\rightarrow{y}}) \\
\mathcal{L}(\theta_{x\rightarrow{y}})=\sum_{i=1}^N{
\bigg(
-\log{P(y_i|x_i;\theta_{x\rightarrow{y}})}
+\lambda\times\Big(
\big(
\log{P(x_i;\phi)}+\log{P(y_i|x_i;\theta_{x\rightarrow{y}})}
\big)
-\big(
\log{P(y_i;\psi)}+\log{P(x_i|y_i;\theta_{y\rightarrow{x}})}
\big)
\Big)^2
\bigg)
} \\
\\
\theta_{y\rightarrow{x}}\leftarrow\theta_{y\rightarrow{x}}-\eta\nabla_{\theta_{y\rightarrow{x}}}\mathcal{L}(\theta_{y\rightarrow{x}}) \\
\mathcal{L}(\theta_{y\rightarrow{x}})=\sum_{i=1}^N{
\bigg(
-\log{P(x_i|y_i;\theta_{y\rightarrow{x}})}
+\lambda\times\Big(
\big(
\log{P(x_i;\phi)}+\log{P(y_i|x_i;\theta_{x\rightarrow{y}})}
\big)
-\big(
\log{P(y_i;\psi)}+\log{P(x_i|y_i;\theta_{y\rightarrow{x}})}
\big)
\Big)^2
\bigg)
}
\end{gathered}\]
\[\begin{gathered}
\begin{aligned}
\log{P(y_i|x_i;\theta_{x\rightarrow{y}})}&=\sum_{t=1}^m{
\log{P(y_{i,t}|x_i,y_{i,<t};\theta_{x\rightarrow{y}})}
} \\
&=\sum_{t=1}^m{
y_{i,t}^{\intercal}\cdot\log{\hat{y}_{i,t}}
}
\end{aligned} \\
\\
\begin{aligned}
\log{P(x_i|y_i;\theta_{y\rightarrow{x}})}&=\sum_{t=1}^n{
\log{P(x_{i,t}|y_i,x_{i,<t};\theta_{y\rightarrow{x}})}
} \\
&=\sum_{t=1}^n{
x_{i,t}^\intercal\cdot\log{\hat{x}_{i,t}}
}
\end{aligned} \\
\text{where }x_{i,0}=y_{i,0}=\text{<BOS> and }x_{i,n}=y_{i,m}=\text{<EOS>.}
\end{gathered}\]