From 6513fd229757c072e07c81507396c2667aba206d Mon Sep 17 00:00:00 2001 From: Andreas Tsouchlos Date: Sat, 11 Mar 2023 15:23:10 +0100 Subject: [PATCH] Added tilde to x; P -> p; Minor wording changes --- latex/thesis/chapters/decoding_techniques.tex | 100 ++++++++++-------- .../chapters/theoretical_background.tex | 2 +- 2 files changed, 54 insertions(+), 48 deletions(-) diff --git a/latex/thesis/chapters/decoding_techniques.tex b/latex/thesis/chapters/decoding_techniques.tex index b72b2b3..9b6ad37 100644 --- a/latex/thesis/chapters/decoding_techniques.tex +++ b/latex/thesis/chapters/decoding_techniques.tex @@ -27,7 +27,7 @@ the \ac{ML} decoding problem:% % \begin{align*} \hat{\boldsymbol{c}}_{\text{\ac{MAP}}} &= \argmax_{\boldsymbol{c} \in \mathcal{C}} - P \left(\boldsymbol{c} \mid \boldsymbol{Y} = \boldsymbol{y} + p_{\boldsymbol{C} \mid \boldsymbol{Y}} \left(\boldsymbol{c} \mid \boldsymbol{y} \right)\\ \hat{\boldsymbol{c}}_{\text{\ac{ML}}} &= \argmax_{\boldsymbol{c} \in \mathcal{C}} f_{\boldsymbol{Y} \mid \boldsymbol{C}} \left( \boldsymbol{y} \mid \boldsymbol{c} @@ -182,7 +182,7 @@ They begin by looking at the \ac{ML} decoding problem% making the \ac{ML} and \ac{MAP} decoding problems equivalent.}% % \begin{align} - \hat{\boldsymbol{c}} = \argmax_{\boldsymbol{c} \in \mathcal{C}} + \hat{\boldsymbol{c}}_{\text{\ac{ML}}} = \argmax_{\boldsymbol{c} \in \mathcal{C}} f_{\boldsymbol{Y} \mid \boldsymbol{C}} \left( \boldsymbol{y} \mid \boldsymbol{c} \right)% \label{eq:lp:ml} @@ -192,7 +192,7 @@ Assuming a memoryless channel, equation (\ref{eq:lp:ml}) can be rewritten in ter of the \acp{LLR} $\gamma_i$ \cite[Sec. 2.5]{feldman_thesis}:% % \begin{align*} - \hat{\boldsymbol{c}} = \argmin_{\boldsymbol{c}\in\mathcal{C}} + \hat{\boldsymbol{c}}_{\text{\ac{ML}}} = \argmin_{\boldsymbol{c}\in\mathcal{C}} \sum_{i=1}^{n} \gamma_i c_i,% \hspace{5mm} \gamma_i = \ln\left( \frac{f_{Y_i | C_i} \left( y_i \mid C_i = 0 \right) } @@ -706,46 +706,48 @@ In contrast to \ac{LP} decoding, the objective function is based on a non-convex optimization formulation of the \ac{MAP} decoding problem. In order to derive the objective function, the authors begin with the -\ac{MAP} decoding rule, expressed as a continuous minimization problem over -$\boldsymbol{x}$:% +\ac{MAP} decoding rule, expressed as a continuous maximization problem% +\footnote{The }% +:% % \begin{align} - \hat{\boldsymbol{x}} = \argmax_{\boldsymbol{x} \in \mathbb{R}^{n}} - f_{\boldsymbol{X} \mid \boldsymbol{Y}} - \left( \boldsymbol{x} \mid \boldsymbol{y} \right) - = \argmax_{\boldsymbol{x} \in \mathbb{R}^{n}} f_{\boldsymbol{Y} \mid \boldsymbol{X}} - \left( \boldsymbol{y} \mid \boldsymbol{x} \right) - f_{\boldsymbol{X}}\left( \boldsymbol{x} \right)% + \hat{\boldsymbol{x}} = \argmax_{\tilde{\boldsymbol{x}} \in \mathbb{R}^{n}} + f_{\tilde{\boldsymbol{X}} \mid \boldsymbol{Y}} + \left( \tilde{\boldsymbol{x}} \mid \boldsymbol{y} \right) + = \argmax_{\tilde{\boldsymbol{x}} \in \mathbb{R}^{n}} f_{\boldsymbol{Y} + \mid \tilde{\boldsymbol{X}}} + \left( \boldsymbol{y} \mid \tilde{\boldsymbol{x}} \right) + f_{\tilde{\boldsymbol{X}}}\left( \tilde{\boldsymbol{x}} \right)% \label{eq:prox:vanilla_MAP} .\end{align}% % -The likelihood $f_{\boldsymbol{Y} \mid \boldsymbol{X}} -\left( \boldsymbol{y} \mid \boldsymbol{x} \right) $ is a known function +The likelihood $f_{\boldsymbol{Y} \mid \tilde{\boldsymbol{X}}} +\left( \boldsymbol{y} \mid \tilde{\boldsymbol{x}} \right) $ is a known function determined by the channel model. -The prior \ac{PDF} $f_{\boldsymbol{X}}\left( \boldsymbol{x} \right)$ is also -known as the equal probability assumption is made on +The prior \ac{PDF} $f_{\tilde{\boldsymbol{X}}}\left( \tilde{\boldsymbol{x}} \right)$ is also +known, as the equal probability assumption is made on $\mathcal{C}\left( \boldsymbol{H} \right)$. However, since the considered domain is continuous, the prior \ac{PDF} cannot be ignored as a constant during the minimization as is often done, and has a rather unwieldy representation:% % \begin{align} - f_{\boldsymbol{X}}\left( \boldsymbol{x} \right) = + f_{\tilde{\boldsymbol{X}}}\left( \tilde{\boldsymbol{x}} \right) = \frac{1}{\left| \mathcal{C} \right| } \sum_{\boldsymbol{c} \in \mathcal{C} } - \delta\left( \boldsymbol{x} - \left( -1 \right) ^{\boldsymbol{c}}\right) + \delta\left( \tilde{\boldsymbol{x}} - \left( -1 \right) ^{\boldsymbol{c}}\right) \label{eq:prox:prior_pdf} .\end{align}% % In order to rewrite the prior \ac{PDF} -$f_{\boldsymbol{X}}\left( \boldsymbol{x} \right)$, +$f_{\tilde{\boldsymbol{X}}}\left( \tilde{\boldsymbol{x}} \right)$, the so-called \textit{code-constraint polynomial} is introduced as:% % \begin{align*} - h\left( \boldsymbol{x} \right) = - \underbrace{\sum_{i=1}^{n} \left( x_i^2-1 \right) ^2}_{\text{Bipolar constraint}} + h\left( \tilde{\boldsymbol{x}} \right) = + \underbrace{\sum_{i=1}^{n} \left( \tilde{x_i}^2-1 \right) ^2}_{\text{Bipolar constraint}} + \underbrace{\sum_{j=1}^{m} \left[ - \left( \prod_{i\in N \left( j \right) } x_i \right) + \left( \prod_{i\in N \left( j \right) } \tilde{x_i} \right) -1 \right] ^2}_{\text{Parity constraint}}% .\end{align*}% % @@ -758,8 +760,8 @@ constraints, accommodating the role of the parity-check matrix $\boldsymbol{H}$. The prior \ac{PDF} is then approximated using the code-constraint polynomial as:% % \begin{align} - f_{\boldsymbol{X}}\left( \boldsymbol{x} \right) - \approx \frac{1}{Z}\mathrm{e}^{-\gamma h\left( \boldsymbol{x} \right) }% + f_{\tilde{\boldsymbol{X}}}\left( \tilde{\boldsymbol{x}} \right) + \approx \frac{1}{Z}\mathrm{e}^{-\gamma h\left( \tilde{\boldsymbol{x}} \right) }% \label{eq:prox:prior_pdf_approx} .\end{align}% % @@ -769,35 +771,36 @@ $\gamma \rightarrow \infty$, the approximation in equation (\ref{eq:prox:prior_pdf}). This approximation can then be plugged into equation (\ref{eq:prox:vanilla_MAP}) and the likelihood can be rewritten using the negative log-likelihood -$L \left( \boldsymbol{y} \mid \boldsymbol{x} \right) = -\ln\left( - f_{\boldsymbol{Y} \mid \boldsymbol{X}}\left( - \boldsymbol{y} \mid \boldsymbol{x} \right) \right) $:% +$L \left( \boldsymbol{y} \mid \tilde{\boldsymbol{x}} \right) = -\ln\left( + f_{\boldsymbol{Y} \mid \tilde{\boldsymbol{X}}}\left( + \boldsymbol{y} \mid \tilde{\boldsymbol{x}} \right) \right) $:% % \begin{align*} - \hat{\boldsymbol{x}} &= \argmax_{\boldsymbol{x} \in \mathbb{R}^{n}} - \mathrm{e}^{- L\left( \boldsymbol{y} \mid \boldsymbol{x} \right) } - \mathrm{e}^{-\gamma h\left( \boldsymbol{x} \right) } \\ - &= \argmin_{\boldsymbol{x} \in \mathbb{R}^n} \left( - L\left( \boldsymbol{y} \mid \boldsymbol{x} \right) - + \gamma h\left( \boldsymbol{x} \right) + \hat{\boldsymbol{x}} &= \argmax_{\tilde{\boldsymbol{x}} \in \mathbb{R}^{n}} + \mathrm{e}^{- L\left( \boldsymbol{y} \mid \tilde{\boldsymbol{x}} \right) } + \mathrm{e}^{-\gamma h\left( \tilde{\boldsymbol{x}} \right) } \\ + &= \argmin_{\tilde{\boldsymbol{x}} \in \mathbb{R}^n} \left( + L\left( \boldsymbol{y} \mid \tilde{\boldsymbol{x}} \right) + + \gamma h\left( \tilde{\boldsymbol{x}} \right) \right)% .\end{align*}% % Thus, with proximal decoding, the objective function -$g\left( \boldsymbol{x} \right)$ considered is% +$g\left( \tilde{\boldsymbol{x}} \right)$ considered is% % \begin{align} - g\left( \boldsymbol{x} \right) = L\left( \boldsymbol{y} \mid \boldsymbol{x} \right) - + \gamma h\left( \boldsymbol{x} \right)% + g\left( \tilde{\boldsymbol{x}} \right) = L\left( \boldsymbol{y} \mid \tilde{\boldsymbol{x}} + \right) + + \gamma h\left( \tilde{\boldsymbol{x}} \right)% \label{eq:prox:objective_function} \end{align}% % and the decoding problem is reformulated to% % \begin{align*} - \text{minimize}\hspace{2mm} &L\left( \boldsymbol{y} \mid \boldsymbol{x} \right) - + \gamma h\left( \boldsymbol{x} \right)\\ - \text{subject to}\hspace{2mm} &\boldsymbol{x} \in \mathbb{R}^n + \text{minimize}\hspace{2mm} &L\left( \boldsymbol{y} \mid \tilde{\boldsymbol{x}} \right) + + \gamma h\left( \tilde{\boldsymbol{x}} \right)\\ + \text{subject to}\hspace{2mm} &\tilde{\boldsymbol{x}} \in \mathbb{R}^n .\end{align*} % @@ -825,11 +828,11 @@ $\gamma h\left( \boldsymbol{x} \right) $ has to be computed. It is then immediately approximated with gradient-descent:% % \begin{align*} - \text{prox}_{\gamma h} \left( \boldsymbol{x} \right) &\equiv + \text{prox}_{\gamma h} \left( \tilde{\boldsymbol{x}} \right) &\equiv \argmin_{\boldsymbol{t} \in \mathbb{R}^n} \left( \gamma h\left( \boldsymbol{t} \right) + - \frac{1}{2} \lVert \boldsymbol{t} - \boldsymbol{x} \rVert \right)\\ - &\approx \boldsymbol{r} - \gamma \nabla h \left( \boldsymbol{r} \right), + \frac{1}{2} \lVert \boldsymbol{t} - \tilde{\boldsymbol{x}} \rVert \right)\\ + &\approx \tilde{\boldsymbol{x}} - \gamma \nabla h \left( \tilde{\boldsymbol{x}} \right), \hspace{5mm} \gamma > 0, \text{ small} .\end{align*}% % @@ -862,12 +865,15 @@ according to the decoding performance \cite[Sec. 3.1]{proximal_paper}. %\todo{$x_k$: $k$ or some other indexing variable?}% %% In the case of \ac{AWGN}, the likelihood -$f_{\boldsymbol{Y} \mid \boldsymbol{X}}\left( \boldsymbol{y} \mid \boldsymbol{x} \right)$ +$f_{\boldsymbol{Y} \mid \tilde{\boldsymbol{X}}} + \left( \boldsymbol{y} \mid \tilde{\boldsymbol{x}} \right)$ is% % \begin{align*} - f_{\boldsymbol{Y} \mid \boldsymbol{X}}\left( \boldsymbol{y} \mid \boldsymbol{x} \right) - = \frac{1}{\sqrt{2\pi\sigma^2}}\mathrm{e}^{-\frac{\lVert \boldsymbol{y}-\boldsymbol{x} + f_{\boldsymbol{Y} \mid \tilde{\boldsymbol{X}}} + \left( \boldsymbol{y} \mid \tilde{\boldsymbol{x}} \right) + = \frac{1}{\sqrt{2\pi\sigma^2}}\mathrm{e}^{ + -\frac{\lVert \boldsymbol{y}-\tilde{\boldsymbol{x}} \rVert^2 } {2\sigma^2}} .\end{align*} @@ -877,9 +883,9 @@ Thus, the gradient of the negative log-likelihood becomes% it suffices to consider only proportionality instead of equality.}% % \begin{align*} - \nabla L \left( \boldsymbol{y} \mid \boldsymbol{x} \right) - &\propto -\nabla \lVert \boldsymbol{y} - \boldsymbol{x} \rVert^2\\ - &\propto \boldsymbol{x} - \boldsymbol{y} + \nabla L \left( \boldsymbol{y} \mid \tilde{\boldsymbol{x}} \right) + &\propto -\nabla \lVert \boldsymbol{y} - \tilde{\boldsymbol{x}} \rVert^2\\ + &\propto \tilde{\boldsymbol{x}} - \boldsymbol{y} ,\end{align*}% % allowing equation \ref{eq:prox:step_log_likelihood} to be rewritten as% diff --git a/latex/thesis/chapters/theoretical_background.tex b/latex/thesis/chapters/theoretical_background.tex index 189acba..5b47ea1 100644 --- a/latex/thesis/chapters/theoretical_background.tex +++ b/latex/thesis/chapters/theoretical_background.tex @@ -21,7 +21,7 @@ Lastly, the optimization methods utilized are described. \begin{itemize} \item General remarks on notation (matrices, \ldots) - \item Probabilistic quantities (random variables, \acp{PDF}, \ldots) + \item Probabilistic quantities (random variables, \acp{PDF}, pdfs vs pmfs vs cdfs, \ldots) \end{itemize}